diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-10-12 15:00:03 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-12 04:32:55 +0000 |
commit | e2c19613fdd410c2710222c727870ac295996d5c (patch) | |
tree | d38369dea5cd100b10191cfcc87afecc722ebf0f /src/third_party/wiredtiger | |
parent | 393eef534e7f8ba9f880f5dd46196ce2e6dec23f (diff) | |
download | mongo-e2c19613fdd410c2710222c727870ac295996d5c.tar.gz |
Import wiredtiger: f4b62301193f8180890351a892baa067e3e7c27b from branch mongodb-4.4
ref: bb1cc65a63..f4b6230119
for: 4.4.2
WT-6410 Remove WT_SESSION.rebalance
WT-6643 Explicitly set the 64-bit uint part of the LSN for atomic assignment
WT-6690 Add support for table import when the exported configuration is provided
WT-6692 Handle scenario where imported timestamps are newer than current DB's timestamps
WT-6719 Split hs.c to multiple files
WT-6720 Add new hs open() and close() methods
WT-6723 Remove lookaside comments
WT-6731 Prevent WT_RESTART from being returned to API calls
WT-6732 Fix post-task command noises in Evergreen task logs
WT-6745 Rollback the global durable timestamp after rollback to stable
WT-6747 Add prepare support in checkpoint test
WT-6748 Support testing different durable timestamp and commit timestamp in timestamp abort
WT-6749 Write a python test for update restore of prepared updates
WT-6756 Documentation: reorganize top level
WT-6762 Use stats instead of files to check consumption
WT-6765 Add more debugging and earlier detection of missing file
Diffstat (limited to 'src/third_party/wiredtiger')
100 files changed, 2726 insertions, 2938 deletions
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index 5eddc4e356b..23756c2b36a 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -294,7 +294,6 @@ wtbin = env.Program("wt", [ "src/utilities/util_misc.c", "src/utilities/util_printlog.c", "src/utilities/util_read.c", - "src/utilities/util_rebalance.c", "src/utilities/util_rename.c", "src/utilities/util_salvage.c", "src/utilities/util_stat.c", diff --git a/src/third_party/wiredtiger/build_posix/Make.base b/src/third_party/wiredtiger/build_posix/Make.base index 252a9b7f7cc..af38b894fff 100644 --- a/src/third_party/wiredtiger/build_posix/Make.base +++ b/src/third_party/wiredtiger/build_posix/Make.base @@ -28,7 +28,6 @@ wt_SOURCES =\ src/utilities/util_misc.c \ src/utilities/util_printlog.c \ src/utilities/util_read.c \ - src/utilities/util_rebalance.c \ src/utilities/util_rename.c \ src/utilities/util_salvage.c \ src/utilities/util_stat.c \ diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 4356af87e87..53590016097 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -725,7 +725,6 @@ connection_runtime_config = [ 'mutex', 'overflow', 'read', - 'rebalance', 'reconcile', 'recovery', 'recovery_progress', @@ -1376,7 +1375,6 @@ methods = { choices=['commit', 'first_commit', 'prepare', 'read']), ]), -'WT_SESSION.rebalance' : Method([]), 'WT_SESSION.rename' : Method([]), 'WT_SESSION.reset' : Method([]), 'WT_SESSION.salvage' : Method([ diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 139347514d3..00ce38d68ef 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -31,7 +31,6 @@ src/btree/bt_ovfl.c src/btree/bt_page.c src/btree/bt_random.c src/btree/bt_read.c -src/btree/bt_rebalance.c src/btree/bt_ret.c src/btree/bt_slvg.c src/btree/bt_split.c @@ -82,6 +81,7 @@ src/cursor/cur_config.c src/cursor/cur_ds.c src/cursor/cur_dump.c src/cursor/cur_file.c +src/cursor/cur_hs.c src/cursor/cur_index.c src/cursor/cur_join.c src/cursor/cur_json.c @@ -94,7 +94,10 @@ src/evict/evict_file.c src/evict/evict_lru.c src/evict/evict_page.c src/evict/evict_stat.c -src/history/hs.c +src/history/hs_conn.c +src/history/hs_cursor.c +src/history/hs_verify.c +src/history/hs_rec.c src/log/log.c src/log/log_auto.c src/log/log_slot.c diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 9faf673afcc..6364398f6ff 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -323,7 +323,6 @@ RCS RDNOLOCK RDONLY READONLY -REBALANCE RECNO REF's REFs @@ -341,7 +340,6 @@ RXB Radu ReadFile Readonly -Rebalance RedHat Redistributions Refactor @@ -1174,7 +1172,6 @@ readunlock readv realloc rebalance -rebalanced rebalancing recno recnos diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 2669dd0c970..a2106be8242 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -265,6 +265,7 @@ connection_stats = [ CacheStat('cache_eviction_walk_from_root', 'eviction walks started from root of tree'), CacheStat('cache_eviction_walk_leaf_notfound', 'eviction server waiting for a leaf page'), CacheStat('cache_eviction_walk_passes', 'eviction passes of a file'), + CacheStat('cache_eviction_walk_restart', 'eviction walks restarted'), CacheStat('cache_eviction_walk_saved_pos', 'eviction walks started from saved location in tree'), CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale'), @@ -580,8 +581,6 @@ connection_stats = [ SessionOpStat('session_table_drop_success', 'table drop successful calls', 'no_clear,no_scale'), SessionOpStat('session_table_import_fail', 'table import failed calls', 'no_clear,no_scale'), SessionOpStat('session_table_import_success', 'table import successful calls', 'no_clear,no_scale'), - SessionOpStat('session_table_rebalance_fail', 'table rebalance failed calls', 'no_clear,no_scale'), - SessionOpStat('session_table_rebalance_success', 'table rebalance successful calls', 'no_clear,no_scale'), SessionOpStat('session_table_rename_fail', 'table rename failed calls', 'no_clear,no_scale'), SessionOpStat('session_table_rename_success', 'table rename successful calls', 'no_clear,no_scale'), SessionOpStat('session_table_salvage_fail', 'table salvage failed calls', 'no_clear,no_scale'), @@ -762,6 +761,7 @@ dsrc_stats = [ CacheStat('cache_eviction_target_page_lt64', 'eviction walk target pages histogram - 32-63'), CacheStat('cache_eviction_walk_from_root', 'eviction walks started from root of tree'), CacheStat('cache_eviction_walk_passes', 'eviction walk passes of a file'), + CacheStat('cache_eviction_walk_restart', 'eviction walks restarted'), CacheStat('cache_eviction_walk_saved_pos', 'eviction walks started from saved location in tree'), CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_ended', 'eviction walks reached end of tree'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index 163c9cfddda..a6c6c5f8c43 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -692,10 +692,6 @@ session_ops(WT_SESSION *session) /*! [Import a file] */ #endif - /*! [Rebalance a table] */ - error_check(session->rebalance(session, "table:mytable", NULL)); - /*! [Rebalance a table] */ - error_check( session->create(session, "table:old", "key_format=r,value_format=S,cache_resident=true")); /*! [Rename a table] */ diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index fa00c3ab98d..e5cb765060f 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "bb1cc65a63a4fb081cb5969f0728ff2b74fc7c8b" + "commit": "f4b62301193f8180890351a892baa067e3e7c27b" } diff --git a/src/third_party/wiredtiger/lang/java/java_doc.i b/src/third_party/wiredtiger/lang/java/java_doc.i index df9bc4fb609..ef63c5ff614 100644 --- a/src/third_party/wiredtiger/lang/java/java_doc.i +++ b/src/third_party/wiredtiger/lang/java/java_doc.i @@ -31,7 +31,6 @@ COPYDOC(__wt_session, WT_SESSION, drop) COPYDOC(__wt_session, WT_SESSION, join) COPYDOC(__wt_session, WT_SESSION, log_flush) COPYDOC(__wt_session, WT_SESSION, log_printf) -COPYDOC(__wt_session, WT_SESSION, rebalance) COPYDOC(__wt_session, WT_SESSION, rename) COPYDOC(__wt_session, WT_SESSION, reset) COPYDOC(__wt_session, WT_SESSION, salvage) diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index d9b30f9483a..2dac56ee485 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -149,20 +149,13 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) else { WT_ERR(__wt_btree_tree_open(session, root_addr, root_addr_size)); - /* - * Rebalance uses the cache, but only wants the root page, nothing else. - */ - if (!F_ISSET(btree, WT_BTREE_REBALANCE)) { - /* Warm the cache, if possible. */ - WT_WITH_PAGE_INDEX(session, ret = __btree_preload(session)); - WT_ERR(ret); - - /* - * Get the last record number in a column-store file. - */ - if (btree->type != BTREE_ROW) - WT_ERR(__btree_get_last_recno(session)); - } + /* Warm the cache, if possible. */ + WT_WITH_PAGE_INDEX(session, ret = __btree_preload(session)); + WT_ERR(ret); + + /* Get the last record number in a column-store file. */ + if (btree->type != BTREE_ROW) + WT_ERR(__btree_get_last_recno(session)); } } @@ -178,9 +171,7 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) * configuration when finished so that handle close behaves correctly. */ if (btree->original || - F_ISSET(btree, - WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | - WT_BTREE_VERIFY)) { + F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) { WT_ERR(__wt_evict_file_exclusive_on(session)); btree->evict_disabled_open = true; } diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c deleted file mode 100644 index 44b379ba804..00000000000 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ /dev/null @@ -1,438 +0,0 @@ -/*- - * Copyright (c) 2014-2020 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * Shared rebalance information. - */ -typedef struct { - WT_REF **leaf; /* List of leaf pages */ - size_t leaf_next; /* Next entry */ - size_t leaf_allocated; /* Allocated bytes */ - - WT_ADDR *fl; /* List of objects to free */ - size_t fl_next; /* Next entry */ - size_t fl_allocated; /* Allocated bytes */ - - WT_PAGE *root; /* Created root page */ - - uint8_t type; /* Internal page type */ - -#define WT_REBALANCE_PROGRESS_INTERVAL 100 - uint64_t progress; /* Progress counter */ - - WT_ITEM *tmp1; /* Temporary buffers */ - WT_ITEM *tmp2; -} WT_REBALANCE_STUFF; - -/* - * __rebalance_discard -- - * Free the allocated information. - */ -static void -__rebalance_discard(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) -{ - while (rs->leaf_next > 0) { - --rs->leaf_next; - __wt_free_ref(session, rs->leaf[rs->leaf_next], rs->type, false); - } - __wt_free(session, rs->leaf); - - while (rs->fl_next > 0) { - --rs->fl_next; - __wt_free(session, rs->fl[rs->fl_next].addr); - } - __wt_free(session, rs->fl); -} - -/* - * __rebalance_leaf_append -- - * Add a new entry to the list of leaf pages. - */ -static int -__rebalance_leaf_append(WT_SESSION_IMPL *session, const uint8_t *key, size_t key_len, - WT_CELL_UNPACK_ADDR *unpack, WT_REBALANCE_STUFF *rs) -{ - WT_ADDR *copy_addr; - WT_REF *copy; - - __wt_verbose(session, WT_VERB_REBALANCE, "rebalance leaf-list append %s, %s", - __wt_buf_set_printable(session, key, key_len, rs->tmp2), - __wt_addr_string(session, unpack->data, unpack->size, rs->tmp1)); - - /* Allocate and initialize a new leaf page reference. */ - WT_RET(__wt_realloc_def(session, &rs->leaf_allocated, rs->leaf_next + 1, &rs->leaf)); - WT_RET(__wt_calloc_one(session, ©)); - rs->leaf[rs->leaf_next++] = copy; - - F_SET(copy, WT_REF_FLAG_LEAF); - copy->state = WT_REF_DISK; - - WT_RET(__wt_calloc_one(session, ©_addr)); - copy->addr = copy_addr; - WT_TIME_AGGREGATE_COPY(©_addr->ta, &unpack->ta); - WT_RET(__wt_memdup(session, unpack->data, unpack->size, ©_addr->addr)); - copy_addr->size = (uint8_t)unpack->size; - copy_addr->type = unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO; - - if (key == NULL) - copy->ref_recno = unpack->v; - else - WT_RET(__wt_row_ikey(session, 0, key, key_len, copy)); - - return (0); -} - -/* - * __rebalance_fl_append -- - * Add a new entry to the free list. - */ -static int -__rebalance_fl_append( - WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_len, WT_REBALANCE_STUFF *rs) -{ - WT_ADDR *copy; - - WT_RET(__wt_realloc_def(session, &rs->fl_allocated, rs->fl_next + 1, &rs->fl)); - copy = &rs->fl[rs->fl_next++]; - - WT_RET(__wt_memdup(session, addr, addr_len, ©->addr)); - copy->size = (uint8_t)addr_len; - copy->type = 0; - - return (0); -} - -/* - * __rebalance_internal -- - * Build an in-memory page that references all of the leaf pages we've found. - */ -static int -__rebalance_internal(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - WT_PAGE_INDEX *pindex; - WT_REF **refp; - uint32_t i, leaf_next; - - btree = S2BT(session); - - /* - * There's a limit to the number of pages we can rebalance: the number of elements on a page is - * a 4B quantity and it's technically possible there could be more pages than that in a tree. - */ - if (rs->leaf_next > UINT32_MAX) - WT_RET_MSG(session, ENOTSUP, - "too many leaf pages to rebalance, %" WT_SIZET_FMT - " pages exceeds the maximum of %" PRIu32, - rs->leaf_next, UINT32_MAX); - leaf_next = (uint32_t)rs->leaf_next; - - /* Allocate a row-store root (internal) page and fill it in. */ - WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page)); - page->pg_intl_parent_ref = &btree->root; - WT_ERR(__wt_page_modify_init(session, page)); - __wt_page_modify_set(session, page); - - pindex = WT_INTL_INDEX_GET_SAFE(page); - for (refp = pindex->index, i = 0; i < leaf_next; ++i) { - rs->leaf[i]->home = page; - *refp++ = rs->leaf[i]; - rs->leaf[i] = NULL; - } - - rs->root = page; - return (0); - -err: - __wt_page_out(session, &page); - return (ret); -} - -/* - * __rebalance_free_original -- - * Free the tracked internal pages and overflow keys. - */ -static int -__rebalance_free_original(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) -{ - WT_ADDR *addr; - uint64_t i; - - for (i = 0; i < rs->fl_next; ++i) { - addr = &rs->fl[i]; - - __wt_verbose(session, WT_VERB_REBALANCE, "rebalance discarding %s", - __wt_addr_string(session, addr->addr, addr->size, rs->tmp1)); - - WT_RET(__wt_btree_block_free(session, addr->addr, addr->size)); - } - return (0); -} - -/* - * __rebalance_col_walk -- - * Walk a column-store page and its descendants. - */ -static int -__rebalance_col_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) -{ - WT_CELL_UNPACK_ADDR unpack; - WT_DECL_ITEM(buf); - WT_DECL_RET; - - WT_ERR(__wt_scr_alloc(session, 0, &buf)); - - /* Report progress periodically. */ - if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0) - WT_ERR(__wt_progress(session, NULL, rs->progress)); - - /* - * Walk the page, instantiating keys: the page contains sorted key and location cookie pairs. - * Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items. - */ - WT_CELL_FOREACH_ADDR (session, dsk, unpack) { - switch (unpack.type) { - case WT_CELL_ADDR_INT: - /* An internal page: read it and recursively walk it. */ - WT_ERR(__wt_bt_read(session, buf, unpack.data, unpack.size)); - WT_ERR(__rebalance_col_walk(session, buf->data, rs)); - __wt_verbose(session, WT_VERB_REBALANCE, "free-list append internal page: %s", - __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1)); - WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs)); - break; - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - WT_ERR(__rebalance_leaf_append(session, NULL, 0, &unpack, rs)); - break; - default: - WT_ERR(__wt_illegal_value(session, unpack.type)); - } - } - WT_CELL_FOREACH_END; - -err: - __wt_scr_free(session, &buf); - return (ret); -} - -/* - * __rebalance_row_leaf_key -- - * Acquire a copy of the key for a leaf page. - */ -static int -__rebalance_row_leaf_key(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_len, - WT_ITEM *key, WT_REBALANCE_STUFF *rs) -{ - WT_DECL_RET; - WT_PAGE *page; - - /* - * We need the first key from a leaf page. Leaf pages are relatively complex (Huffman encoding, - * prefix compression, and so on), do the work to instantiate the page and copy the first key to - * the buffer. - * - * Page flags are 0 because we aren't releasing the memory used to read the page into memory and - * we don't want page discard to free it. - */ - WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len)); - WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, &page)); - ret = __wt_row_leaf_key_copy(session, page, &page->pg_row[0], key); - __wt_page_out(session, &page); - return (ret); -} - -/* - * __rebalance_row_walk -- - * Walk a row-store page and its descendants. - */ -static int -__rebalance_row_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) -{ - WT_CELL_UNPACK_ADDR key, unpack; - WT_DECL_ITEM(buf); - WT_DECL_ITEM(leafkey); - WT_DECL_RET; - size_t len; - bool first_cell; - const void *p; - - WT_CLEAR(key); /* [-Werror=maybe-uninitialized] */ - - WT_ERR(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_scr_alloc(session, 0, &leafkey)); - - /* Report progress periodically. */ - if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0) - WT_ERR(__wt_progress(session, NULL, rs->progress)); - - /* - * Walk the page, instantiating keys: the page contains sorted key and location cookie pairs. - * Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items. - */ - first_cell = true; - WT_CELL_FOREACH_ADDR (session, dsk, unpack) { - switch (unpack.type) { - case WT_CELL_KEY: - key = unpack; - break; - case WT_CELL_KEY_OVFL: - /* - * Any overflow key that references an internal page is of no further use, schedule its - * blocks to be freed. - * - * We could potentially use the same overflow key being freed here for the internal page - * we're creating, but that's more work to get reconciliation to understand and overflow - * keys are (well, should be), uncommon. - */ - __wt_verbose(session, WT_VERB_REBALANCE, "free-list append overflow key: %s", - __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1)); - - WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs)); - - key = unpack; - break; - case WT_CELL_ADDR_DEL: - /* - * A deleted leaf page: we're rebalancing this tree, which means no transaction can be - * active in it, which means no deleted leaf page is interesting, ignore it. - */ - first_cell = false; - break; - case WT_CELL_ADDR_INT: - /* An internal page, schedule its blocks to be freed. */ - __wt_verbose(session, WT_VERB_REBALANCE, "free-list append internal page: %s", - __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1)); - WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs)); - - /* Read and recursively walk the page. */ - WT_ERR(__wt_bt_read(session, buf, unpack.data, unpack.size)); - WT_ERR(__rebalance_row_walk(session, buf->data, rs)); - break; - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - /* - * A leaf page. We can't trust the 0th key on an internal page (we often don't store - * them in reconciliation because it saves space), get it from the underlying leaf page. - * Else, if the internal page key is an overflow key, instantiate it and use it. Else, - * we can use the internal page's key as is, it's sufficient for the page. - */ - if (first_cell) { - WT_ERR(__rebalance_row_leaf_key(session, unpack.data, unpack.size, leafkey, rs)); - p = leafkey->data; - len = leafkey->size; - } else if (key.type == WT_CELL_KEY_OVFL) { - WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_INT, &key, leafkey)); - p = leafkey->data; - len = leafkey->size; - } else { - p = key.data; - len = key.size; - } - WT_ERR(__rebalance_leaf_append(session, p, len, &unpack, rs)); - - first_cell = false; - break; - default: - WT_ERR(__wt_illegal_value(session, unpack.type)); - } - } - WT_CELL_FOREACH_END; - -err: - __wt_scr_free(session, &buf); - __wt_scr_free(session, &leafkey); - return (ret); -} - -/* - * __wt_bt_rebalance -- - * Rebalance the last checkpoint in the file. - */ -int -__wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_REBALANCE_STUFF *rs, _rstuff; - WT_REF *ref; - - WT_UNUSED(cfg); - - btree = S2BT(session); - ref = &btree->root; - - /* - * If the tree has never been written to disk, we're done, rebalance walks disk images, not - * in-memory pages. For the same reason, the tree has to be clean. - */ - if (ref->page->dsk == NULL) - return (0); - if (btree->modified) - WT_RET_MSG(session, EINVAL, "tree is modified, only clean trees may be rebalanced"); - - WT_CLEAR(_rstuff); - rs = &_rstuff; - - WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp1)); - WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2)); - - /* Set the internal page tree type. */ - rs->type = ref->page->type; - - /* - * Recursively walk the tree. We start with a durable timestamp, but it should never be used - * (we'll accumulate durable timestamps from all the internal pages in our final write), so set - * it to something impossible. - */ - switch (rs->type) { - case WT_PAGE_ROW_INT: - WT_ERR(__rebalance_row_walk(session, ref->page->dsk, rs)); - break; - case WT_PAGE_COL_INT: - WT_ERR(__rebalance_col_walk(session, ref->page->dsk, rs)); - break; - default: - WT_ERR(__wt_illegal_value(session, rs->type)); - } - - /* Build a new root page. */ - WT_ERR(__rebalance_internal(session, rs)); - - /* - * Schedule the free of the original blocks (they shouldn't actually be freed until the next - * checkpoint completes). - */ - WT_ERR(__rebalance_free_original(session, rs)); - - /* - * Swap the old root page for our newly built root page, writing the new root page as part of a - * checkpoint will finish the rebalance. - */ - __wt_page_out(session, &ref->page); - ref->page = rs->root; - rs->root = NULL; - -err: - /* Discard any leftover root page we created. */ - if (rs->root != NULL) { - __wt_page_modify_clear(session, rs->root); - __wt_page_out(session, &rs->root); - } - - /* Discard any leftover leaf and internal page information. */ - __rebalance_discard(session, rs); - - __wt_scr_free(session, &rs->tmp1); - __wt_scr_free(session, &rs->tmp2); - - return (ret); -} diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index e72009b98da..3a5b3c52a73 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -135,8 +135,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { ",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"history_store\",\"history_store_activity\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"rts\",\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," "\"version\",\"write\"]", NULL, 0}, @@ -596,8 +596,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { ",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"history_store\",\"history_store_activity\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"rts\",\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," "\"version\",\"write\"]", NULL, 0}, @@ -672,8 +672,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { ",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"history_store\",\"history_store_activity\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"rts\",\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," "\"version\",\"write\"]", NULL, 0}, @@ -743,8 +743,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { ",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"history_store\",\"history_store_activity\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"rts\",\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," "\"version\",\"write\"]", NULL, 0}, @@ -814,8 +814,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { ",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"history_store\",\"history_store_activity\",\"lsm\"," "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"rts\",\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," "\"version\",\"write\"]", NULL, 0}, @@ -933,7 +933,6 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", {"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction, 1}, {"WT_SESSION.query_timestamp", "get=read", confchk_WT_SESSION_query_timestamp, 1}, - {"WT_SESSION.rebalance", "", NULL, 0}, {"WT_SESSION.reconfigure", "cache_cursors=true,ignore_cache_size=false," "isolation=read-committed", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 4fe1086f339..99ec1ed72cb 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1852,9 +1852,9 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) {"history_store_activity", WT_VERB_HS_ACTIVITY}, {"lsm", WT_VERB_LSM}, {"lsm_manager", WT_VERB_LSM_MANAGER}, {"metadata", WT_VERB_METADATA}, {"mutex", WT_VERB_MUTEX}, {"overflow", WT_VERB_OVERFLOW}, {"read", WT_VERB_READ}, - {"rebalance", WT_VERB_REBALANCE}, {"reconcile", WT_VERB_RECONCILE}, - {"recovery", WT_VERB_RECOVERY}, {"recovery_progress", WT_VERB_RECOVERY_PROGRESS}, - {"rts", WT_VERB_RTS}, {"salvage", WT_VERB_SALVAGE}, {"shared_cache", WT_VERB_SHARED_CACHE}, + {"reconcile", WT_VERB_RECONCILE}, {"recovery", WT_VERB_RECOVERY}, + {"recovery_progress", WT_VERB_RECOVERY_PROGRESS}, {"rts", WT_VERB_RTS}, + {"salvage", WT_VERB_SALVAGE}, {"shared_cache", WT_VERB_SHARED_CACHE}, {"split", WT_VERB_SPLIT}, {"temporary", WT_VERB_TEMPORARY}, {"thread_group", WT_VERB_THREAD_GROUP}, {"timestamp", WT_VERB_TIMESTAMP}, {"transaction", WT_VERB_TRANSACTION}, {"verify", WT_VERB_VERIFY}, diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 3f506bec283..16a9d7812b5 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -819,7 +819,7 @@ restart: } /* Shut down the history store table after all eviction is complete. */ - __wt_hs_destroy(session); + __wt_hs_close(session); /* * Closing the files may have resulted in entries on our default session's list of open data diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index c22be09e3db..acba9ebb12c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -406,7 +406,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) */ if ((dbg_val + 1) >= log->fileid) return (0); - if (log->ckpt_lsn.l.file == 1 && log->ckpt_lsn.l.offset == 0) + if (WT_IS_INIT_LSN(&log->ckpt_lsn)) min_lognum = log->fileid - (dbg_val + 1); else min_lognum = WT_MIN(log->fileid - (dbg_val + 1), min_lognum); @@ -584,7 +584,7 @@ __log_file_server(void *arg) * to be set again. Copy the LSN before clearing the file handle. Use a barrier to * make sure the compiler does not reorder the following two statements. */ - close_end_lsn = log->log_close_lsn; + WT_ASSIGN_LSN(&close_end_lsn, &log->log_close_lsn); WT_FULL_BARRIER(); log->log_close_fh = NULL; /* @@ -612,7 +612,7 @@ __log_file_server(void *arg) locked = true; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp(&close_end_lsn, &log->sync_lsn) >= 0); - log->sync_lsn = close_end_lsn; + WT_ASSIGN_LSN(&log->sync_lsn, &close_end_lsn); __wt_cond_signal(session, log->log_sync_cond); locked = false; __wt_spin_unlock(session, &log->log_sync_lock); @@ -625,7 +625,7 @@ __log_file_server(void *arg) /* * Save the latest write LSN which is the minimum we will have written to disk. */ - min_lsn = log->write_lsn; + WT_ASSIGN_LSN(&min_lsn, &log->write_lsn); /* * We have to wait until the LSN we asked for is written. If it isn't signal the wrlsn * thread to get it written. @@ -650,7 +650,7 @@ __log_file_server(void *arg) */ if (__wt_log_cmp(&log->sync_lsn, &min_lsn) <= 0) { WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file); - log->sync_lsn = min_lsn; + WT_ASSIGN_LSN(&log->sync_lsn, &min_lsn); __wt_cond_signal(session, log->log_sync_cond); } locked = false; @@ -733,7 +733,7 @@ restart: if (slot->slot_state != WT_LOG_SLOT_WRITTEN) continue; written[written_i].slot_index = save_i; - written[written_i++].lsn = slot->slot_release_lsn; + WT_ASSIGN_LSN(&written[written_i++].lsn, &slot->slot_release_lsn); } /* * If we found any written slots process them. We sort them based on the release LSN, and then @@ -773,7 +773,7 @@ restart: * If we get here we have a slot to coalesce and free. */ coalescing->slot_last_offset = slot->slot_last_offset; - coalescing->slot_end_lsn = slot->slot_end_lsn; + WT_ASSIGN_LSN(&coalescing->slot_end_lsn, &slot->slot_end_lsn); WT_STAT_CONN_INCR(session, log_slot_coalesced); /* * Copy the flag for later closing. @@ -786,7 +786,7 @@ restart: * slots. A synchronous write may update write_lsn so save the last one we saw to * check when coalescing slots. */ - save_lsn = log->write_lsn; + WT_ASSIGN_LSN(&save_lsn, &log->write_lsn); if (__wt_log_cmp(&log->write_lsn, &written[i].lsn) != 0) { coalescing = slot; continue; @@ -802,8 +802,8 @@ restart: */ if (slot->slot_start_lsn.l.offset != slot->slot_last_offset) slot->slot_start_lsn.l.offset = (uint32_t)slot->slot_last_offset; - log->write_start_lsn = slot->slot_start_lsn; - log->write_lsn = slot->slot_end_lsn; + WT_ASSIGN_LSN(&log->write_start_lsn, &slot->slot_start_lsn); + WT_ASSIGN_LSN(&log->write_lsn, &slot->slot_end_lsn); __wt_cond_signal(session, log->log_write_cond); WT_STAT_CONN_INCR(session, log_write_lsn); /* diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 3c0ee7fe74c..8d45b5eb678 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -217,15 +217,10 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_meta_track_init(session)); /* - * Drop the lookaside file if it still exists. - */ - WT_RET(__wt_hs_cleanup_las(session)); - - /* * Create the history store file. This will only actually create it on a clean upgrade or when * creating a new database. */ - WT_RET(__wt_hs_create(session, cfg)); + WT_RET(__wt_hs_open(session, cfg)); /* * Start the optional logging/archive threads. NOTE: The log manager must be started before diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c new file mode 100644 index 00000000000..8aaca515f11 --- /dev/null +++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c @@ -0,0 +1,91 @@ + +/*- + * Copyright (c) 2014-2020 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_hs_cursor_open -- + * Open a new history store table cursor. + */ +int +__wt_hs_cursor_open(WT_SESSION_IMPL *session) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL}; + + /* Not allowed to open a cursor if you already have one */ + WT_ASSERT(session, session->hs_cursor == NULL); + + WT_WITHOUT_DHANDLE( + session, ret = __wt_open_cursor(session, WT_HS_URI, NULL, open_cursor_cfg, &cursor)); + WT_RET(ret); + + /* History store cursors should always ignore tombstones. */ + F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE); + + session->hs_cursor = cursor; + return (0); +} + +/* + * __wt_hs_cursor_close -- + * Discard a history store cursor. + */ +int +__wt_hs_cursor_close(WT_SESSION_IMPL *session) +{ + /* Should only be called when session has an open history store cursor */ + WT_ASSERT(session, session->hs_cursor != NULL); + + WT_RET(session->hs_cursor->close(session->hs_cursor)); + session->hs_cursor = NULL; + return (0); +} + +/* + * __wt_hs_cursor_next -- + * Execute a next operation on a history store cursor with the appropriate isolation level. + */ +int +__wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->next(cursor)); + return (ret); +} + +/* + * __wt_hs_cursor_prev -- + * Execute a prev operation on a history store cursor with the appropriate isolation level. + */ +int +__wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->prev(cursor)); + return (ret); +} + +/* + * __wt_hs_cursor_search_near -- + * Execute a search near operation on a history store cursor with the appropriate isolation + * level. + */ +int +__wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION( + session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp)); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 5d7d82805ba..01564f3fb34 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -22,8 +22,8 @@ __curlog_logrec(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN WT_UNUSED(firstrecord); /* Set up the LSNs and take a copy of the log record for the cursor. */ - *cl->cur_lsn = *lsnp; - *cl->next_lsn = *next_lsnp; + WT_ASSIGN_LSN(cl->cur_lsn, lsnp); + WT_ASSIGN_LSN(cl->next_lsn, next_lsnp); WT_RET(__wt_buf_set(session, cl->logrec, logrec->data, logrec->size)); /* diff --git a/src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox b/src/third_party/wiredtiger/src/docs/arch-dhandle-lifecycle.dox index 8f79a0da22e..0a151eb41bc 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-dhandle-lifecycle.dox +++ b/src/third_party/wiredtiger/src/docs/arch-dhandle-lifecycle.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-dhandle-lifecycle Data Handle Lifecycle +/*! @page arch-dhandle-lifecycle Data Handle Lifecycle A WiredTiger Data Handle (dhandle) is a generic representation of any named data source. This representation contains information such as its name, diff --git a/src/third_party/wiredtiger/src/docs/devdoc-glossary.dox b/src/third_party/wiredtiger/src/docs/arch-glossary.dox index 4cb762cbaa7..3ae6a90eb81 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-glossary.dox +++ b/src/third_party/wiredtiger/src/docs/arch-glossary.dox @@ -1,10 +1,10 @@ -/*! @page devdoc-glossary Developer Documentation Glossary of Terms +/*! @page arch-glossary Architecture Guide Glossary of Terms WiredTiger has a lot of domain specific nomenclature - this page attempts to decode it. This is intended for those navigating the WiredTiger source tree - it describes terms internal to the storage engine. -@section devdoc-glossary-general General Terms Used in WiredTiger +@section arch-glossary-general General Terms Used in WiredTiger <table> <caption id="general_terms">General Term Table</caption> @@ -12,14 +12,14 @@ tree - it describes terms internal to the storage engine. <tr><td>Hello<td>The typical next word is World </table> -@section devdoc-glossary-checkpoint Terms Related to Checkpoints +@section arch-glossary-checkpoint Terms Related to Checkpoints <table> <caption id="checkpoint_terms">Checkpoint Term Table</caption> <tr><th>Term <th>Definition <tr><td>Hello<td>The typical next word is World </table> -@section devdoc-glossary-transactions Terms Related to Transactions +@section arch-glossary-transactions Terms Related to Transactions <table> <caption id="transaction_terms">Transaction Term Table</caption> diff --git a/src/third_party/wiredtiger/src/docs/arch-index.dox b/src/third_party/wiredtiger/src/docs/arch-index.dox new file mode 100644 index 00000000000..53a49de7ed3 --- /dev/null +++ b/src/third_party/wiredtiger/src/docs/arch-index.dox @@ -0,0 +1,21 @@ +/*! @page arch-index WiredTiger Architecture Guide + +@subpage arch-glossary + +WiredTiger assigns specific meanings to certain words. Here we decode them. + +@subpage arch-schema + +Most applications begin to make use of WiredTiger by creating a table (or other +data object) to store their data in. Create is one of several schema operations +available in WiredTiger. + +@subpage arch-dhandle-lifecycle + +An internal structure called Data Handle (dhandle) is used to represent and +access a table in WiredTiger. A dhandle gets created when a table is accessed +for the first time. It is kept in a global list and is shared across the +sessions. When a dhandle is not needed anymore and has been idle for a while, +it is closed and destroyed, releasing all the resources associated with it. + +*/ diff --git a/src/third_party/wiredtiger/src/docs/devdoc-schema.dox b/src/third_party/wiredtiger/src/docs/arch-schema.dox index dd59aa2535b..f6ead3ac824 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-schema.dox +++ b/src/third_party/wiredtiger/src/docs/arch-schema.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-schema Schema Operations +/*! @page arch-schema Schema Operations A schema defines the format of the application data in WiredTiger. WiredTiger supports various types of schemas (See @ref schema for more diff --git a/src/third_party/wiredtiger/src/docs/eviction.dox b/src/third_party/wiredtiger/src/docs/eviction.dox index 37d400f31cc..3519e4ca7f4 100644 --- a/src/third_party/wiredtiger/src/docs/eviction.dox +++ b/src/third_party/wiredtiger/src/docs/eviction.dox @@ -11,7 +11,7 @@ accessed out of the memory to free up enough space to read data that are requested by the user but currently reside on the disk back into memory. This process is called eviction. -@section overview Eviction overview +@section eviction_overview Eviction overview The WiredTiger eviction runs in the background with one eviction server thread and several eviction worker threads. diff --git a/src/third_party/wiredtiger/src/docs/introduction.dox b/src/third_party/wiredtiger/src/docs/introduction.dox index a3719e77fa3..52d1921996a 100644 --- a/src/third_party/wiredtiger/src/docs/introduction.dox +++ b/src/third_party/wiredtiger/src/docs/introduction.dox @@ -21,7 +21,7 @@ the ANSI C99, POSIX 1003.1 and POSIX 1003.1c (threads extension) standards. For more information on the WiredTiger architecture and why it might be right for your project, see: -- @subpage architecture +- @subpage overview For more information about building and installing WiredTiger, see: @@ -35,6 +35,14 @@ For more information about writing WiredTiger applications, see: (The Java API is not available on Windows.) - @ref wt "WiredTiger API reference manual" +For more information about the architecture and internals of WiredTiger, see: + +- @subpage arch-index + +For more information on tools and techniques used by WiredTiger developers, see: + +- @subpage tool-index + For more information about administrating WiredTiger databases, see: - @subpage command_line @@ -58,8 +66,4 @@ To browse the WiredTiger source code repository or contact us, see: - @subpage community -For more information on WiredTiger internals, see: - -- @subpage devdoc-index - */ diff --git a/src/third_party/wiredtiger/src/docs/architecture.dox b/src/third_party/wiredtiger/src/docs/overview.dox index 840910139c9..e4b8953cd41 100644 --- a/src/third_party/wiredtiger/src/docs/architecture.dox +++ b/src/third_party/wiredtiger/src/docs/overview.dox @@ -1,4 +1,4 @@ -/*! @page architecture WiredTiger Architecture +/*! @page overview WiredTiger Overview and Features The WiredTiger data engine is a high performance, scalable, transactional, production quality, open source, NoSQL data engine, created to maximize the diff --git a/src/third_party/wiredtiger/src/docs/programming.dox b/src/third_party/wiredtiger/src/docs/programming.dox index d2b9e244b79..ae3d3337860 100644 --- a/src/third_party/wiredtiger/src/docs/programming.dox +++ b/src/third_party/wiredtiger/src/docs/programming.dox @@ -44,7 +44,6 @@ each of which is ordered by one or more columns. - @subpage in_memory - @subpage cursor_join - @subpage cursor_log -- @subpage rebalance - @subpage operation_tracking - @subpage shared_cache - @subpage statistics diff --git a/src/third_party/wiredtiger/src/docs/rebalance.dox b/src/third_party/wiredtiger/src/docs/rebalance.dox deleted file mode 100644 index a6acfe07ef5..00000000000 --- a/src/third_party/wiredtiger/src/docs/rebalance.dox +++ /dev/null @@ -1,14 +0,0 @@ -/*! @m_page{{c,java},rebalance,Rebalance} - -The WT_SESSION::rebalance method can be used to rebalance data sources' -underlying btrees. If a tree has become unbalanced (that is, one part of -the tree is excessively deep), WT_SESSION::rebalance rewrites the tree -as a balanced tree. - -The data source must be quiescent. - -The WT_SESSION::rebalance method should never be needed, as WiredTiger -btrees are maintained as balanced trees. It is only provided as a tool -to handle the unexpected. - - */ diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index af8fbff10a8..26f40b5e099 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -85,7 +85,6 @@ PMU PPC PRELOAD README -Rebalance RedHat RepMgr Riak @@ -205,7 +204,6 @@ desc destructor destructors dev -devdoc dhandle dhandle's dhandles @@ -450,7 +448,6 @@ readonly realclean realloc realloc'd -rebalance recno recnoN recnum diff --git a/src/third_party/wiredtiger/src/docs/devdoc-index.dox b/src/third_party/wiredtiger/src/docs/tool-index.dox index bcae15c1f48..afcad075022 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-index.dox +++ b/src/third_party/wiredtiger/src/docs/tool-index.dox @@ -1,38 +1,20 @@ -/*! @page devdoc-index Developer Documentation +/*! @page tool-index Tools and Techniques -@subpage devdoc-glossary - -WiredTiger assigns specific meanings to certain words. Here we decode them. - -@subpage devdoc-schema - -Most applications begin to make use of WiredTiger by creating a table (or other -data object) to store their data in. Create is one of several schema operations -available in WiredTiger. - -@subpage devdoc-dhandle-lifecycle - -An internal structure called Data Handle (dhandle) is used to represent and -access a table in WiredTiger. A dhandle gets created when a table is accessed -for the first time. It is kept in a global list and is shared across the -sessions. When a dhandle is not needed anymore and has been idle for a while, -it is closed and destroyed, releasing all the resources associated with it. - -@subpage devdoc-statistics +@subpage tool-statistics WiredTiger can generate statistics that are useful for providing information necessary when performance tuning your WiredTiger application. Here we focus on analyzing and reviewing the data generated by the statistics logging functionality. -@subpage devdoc-optrack +@subpage tool-optrack The purpose of operation tracking is to visualize WiredTiger's execution so that correlations between performance anomalies are easily spotted. This operation tracking tutorial provides a general overview of operation tracking and describes ways to visualize the data in fine detail. -@subpage devdoc-perf +@subpage tool-perf Linux `perf` is a tool that allows counting and sampling of various events in the hardware and in the kernel. Hardware events are available via performance @@ -40,17 +22,22 @@ monitoring units (PMU); they measure CPU cycles, cache misses, branches, etc. Kernel events include scheduling context switches, page faults and block I/O. Here we provide a quick cheat sheet of how to use `perf` with WiredTiger. -@subpage devdoc-xray +@subpage tool-xray XRay is a tool, originally developed at Google and now integrated in LLVM, that instruments the program such that when it runs it produces a trace of executed functions and their timestamps. This article explains how to instrument WiredTiger, collect the XRay traces, and analyze them. -@subpage devdoc-perf-flamegraphs +@subpage tool-perf-flamegraphs Why is my CPU busy? FlameGraphs help visually summarize on-CPU call stacks and allow for the quick identification of hot code paths. Here we explain how to generate FlameGraphs from WiredTiger `perf` data. +@section tool-other Other Resources + +The WiredTiger @ref command_line has facilities for examining tables +and metadata, and has various other administrative functions. + */ diff --git a/src/third_party/wiredtiger/src/docs/devdoc-optrack.dox b/src/third_party/wiredtiger/src/docs/tool-optrack.dox index 8d1253dd903..c13dd31ae84 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-optrack.dox +++ b/src/third_party/wiredtiger/src/docs/tool-optrack.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-optrack Operation Tracking +/*! @page tool-optrack Operation Tracking # Overview diff --git a/src/third_party/wiredtiger/src/docs/devdoc-perf-flamegraphs.dox b/src/third_party/wiredtiger/src/docs/tool-perf-flamegraphs.dox index 1e5bc1ad4fd..5d632b809b7 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-perf-flamegraphs.dox +++ b/src/third_party/wiredtiger/src/docs/tool-perf-flamegraphs.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-perf-flamegraphs CPU Flame Graphs +/*! @page tool-perf-flamegraphs CPU Flame Graphs # Introduction @@ -41,7 +41,7 @@ their full names. # Generating FlameGraphs To generate FlameGraphs, you first need to run perf and generate a perf.data -file using the `perf record -g` command. [Click here](@ref devdoc-perf) for +file using the `perf record -g` command. [Click here](@ref tool-perf) for instructions explaining how to do this with WiredTiger. Don't forget the `-g` option: you need it in order to record the call stacks! diff --git a/src/third_party/wiredtiger/src/docs/devdoc-perf.dox b/src/third_party/wiredtiger/src/docs/tool-perf.dox index 4739fba897f..62b9d4cc2d9 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-perf.dox +++ b/src/third_party/wiredtiger/src/docs/tool-perf.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-perf Performance Monitoring +/*! @page tool-perf Performance Monitoring # Overview @@ -23,7 +23,7 @@ blocking](https://perf.wiki.kernel.org/index.php/Tutorial#Profiling_sleep_times) I was not able to get it to do so after many attempts. For alternative ways to perform off-CPU analysis, read [this post by Brendan Gregg](http://www.brendangregg.com/offcpuanalysis.html) or use WiredTiger's -@ref devdoc-optrack. +@ref tool-optrack. What follows is a quick cheat sheet of how to use `perf` with WiredTiger. Most of the information in this cheat sheet comes from [this excellent tutorial by diff --git a/src/third_party/wiredtiger/src/docs/devdoc-statistics.dox b/src/third_party/wiredtiger/src/docs/tool-statistics.dox index d8befa61136..5fee38c8fcf 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-statistics.dox +++ b/src/third_party/wiredtiger/src/docs/tool-statistics.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-statistics Statistics Logging +/*! @page tool-statistics Statistics Logging # Introduction diff --git a/src/third_party/wiredtiger/src/docs/devdoc-xray.dox b/src/third_party/wiredtiger/src/docs/tool-xray.dox index 145c3ac0a19..176fcaca7d8 100644 --- a/src/third_party/wiredtiger/src/docs/devdoc-xray.dox +++ b/src/third_party/wiredtiger/src/docs/tool-xray.dox @@ -1,4 +1,4 @@ -/*! @page devdoc-xray Instrumentation and introspection with XRay +/*! @page tool-xray Instrumentation and introspection with XRay # Instrumenting with XRay @@ -75,7 +75,7 @@ of Brendan Gregg's FlameGraph script (\c flamegraph.pl). After running a program instrumented with XRay, a log file will be produced containing performance information. There is a tool called \c xray_to_optrack which is designed to convert this log to the [Operation Tracking] -(@ref devdoc-optrack) format. +(@ref tool-optrack) format. ## Step 1: Take a copy of the wtperf binary and the XRay log @@ -146,6 +146,6 @@ our case, and \c xray_log is the log file. The script will produce one or more files with a prefix \c optrack. You can view these files with optrack tools, described in the [optrack documentation] -(@ref devdoc-optrack). +(@ref tool-optrack). */ diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index bf5b10c6bb6..b016332338c 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1791,9 +1791,15 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent read_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT | WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK; if (btree->evict_ref == NULL) { - /* Ensure internal pages indexes remain valid */ - WT_WITH_PAGE_INDEX( - session, ret = __wt_random_descent(session, &btree->evict_ref, read_flags)); + for (;;) { + /* Ensure internal pages indexes remain valid */ + WT_WITH_PAGE_INDEX( + session, ret = __wt_random_descent(session, &btree->evict_ref, read_flags)); + if (ret != WT_RESTART) + break; + WT_STAT_CONN_INCR(session, cache_eviction_walk_restart); + WT_STAT_DATA_INCR(session, cache_eviction_walk_restart); + } WT_RET_NOTFOUND_OK(ret); } break; @@ -2224,7 +2230,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) WT_REF *ref; WT_TRACK_OP_DECL; uint64_t time_start, time_stop; - uint32_t flags; uint8_t previous_state; bool app_timer; @@ -2237,8 +2242,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) cache = S2C(session)->cache; time_start = time_stop = 0; - flags = 0; - /* * An internal session flags either the server itself or an eviction worker thread. */ @@ -2257,10 +2260,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) } } - /* Set a flag to indicate that either eviction server or worker thread is evicting the page. */ - if (F_ISSET(session, WT_SESSION_INTERNAL)) - LF_SET(WT_REC_EVICTION_THREAD); - /* * In case something goes wrong, don't pick the same set of pages every time. * @@ -2270,7 +2269,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) */ __wt_cache_read_gen_bump(session, ref->page); - WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, previous_state, flags)); + WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, previous_state, 0)); (void)__wt_atomic_subv32(&btree->evict_busy, 1); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index c6fbfcb860d..7b432308b5c 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -546,7 +546,6 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool WT_PAGE *page; uint32_t flags; bool closing, modified; - bool snapshot_acquired; *inmem_splitp = false; @@ -554,7 +553,8 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool page = ref->page; flags = WT_REC_EVICT; closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING); - snapshot_acquired = false; + if (!WT_SESSION_BTREE_SYNC(session)) + LF_SET(WT_REC_VISIBLE_ALL); /* * Fail if an internal has active children, the children must be evicted first. The test is @@ -675,64 +675,12 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool LF_SET(WT_REC_SCRUB); } - /* Acquire a snapshot if coming through eviction thread route. */ - - /* - * TODO: We are deliberately not using a snapshot when checkpoint is active. This will ensure - * that point-in-time checkpoints have a consistent version of data. Remove this condition once - * fuzzy transaction ID based checkpoints work is merged. - */ - if (FLD_ISSET(evict_flags, WT_REC_EVICTION_THREAD) && !WT_IS_HS(S2BT(session)) && - !conn->txn_global.checkpoint_running) { - /* - * We rely on the fact that the eviction threads are created with read committed isolation - * by default. If this fact doesn't hold anymore in future, we have to force isolation - * level. - */ - WT_ASSERT(session, session->txn->isolation == WT_ISO_READ_COMMITTED); - - /* - * Eviction threads do not need to pin anything in the cache. We have a exclusive lock for - * the page being evicted so we are sure that the page will always be there while it is - * being processed. Therefore, we use snapshot API that doesn't publish shared IDs to the - * outside world. - */ - __wt_txn_bump_snapshot(session); - - /* - * Make sure once more that there is no checkpoint running. A new checkpoint might have - * started between previous check and acquiring snapshot. If there is a checkpoint running, - * release the checkpoint and fallback to global visibility checks. - */ - if (conn->txn_global.checkpoint_running) { - __wt_txn_release_snapshot(session); - LF_SET(WT_REC_VISIBLE_ALL); - } else { - /* - * If we acquired a snapshot for eviction, force the isolation to ensure the snapshot - * isn't released when history store cursors are closed. - */ - ++session->txn->forced_iso; - snapshot_acquired = true; - } - } else if (!WT_SESSION_BTREE_SYNC(session)) - LF_SET(WT_REC_VISIBLE_ALL); - - WT_ASSERT(session, LF_ISSET(WT_REC_VISIBLE_ALL) || F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT)); - - /* - * Reconcile the page. - */ + /* Reconcile the page. */ ret = __wt_reconcile(session, ref, NULL, flags); if (ret != 0) WT_STAT_CONN_INCR(session, cache_eviction_fail_in_reconciliation); - if (snapshot_acquired) { - --session->txn->forced_iso; - __wt_txn_release_snapshot(session); - } - WT_RET(ret); /* diff --git a/src/third_party/wiredtiger/src/history/hs_conn.c b/src/third_party/wiredtiger/src/history/hs_conn.c new file mode 100644 index 00000000000..2917aec815c --- /dev/null +++ b/src/third_party/wiredtiger/src/history/hs_conn.c @@ -0,0 +1,175 @@ +/*- + * Copyright (c) 2014-2020 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __hs_start_internal_session -- + * Create a temporary internal session to retrieve history store. + */ +static int +__hs_start_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp) +{ + WT_ASSERT(session, !F_ISSET(session, WT_CONN_HS_OPEN)); + return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, int_sessionp)); +} + +/* + * __hs_release_internal_session -- + * Release the temporary internal session started to retrieve history store. + */ +static int +__hs_release_internal_session(WT_SESSION_IMPL *int_session) +{ + return (__wt_session_close_internal(int_session)); +} + +/* + * __hs_cleanup_las -- + * Drop the lookaside file if it exists. + */ +static int +__hs_cleanup_las(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL}; + + conn = S2C(session); + + /* Read-only and in-memory configurations won't drop the lookaside. */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) + return (0); + + /* The LAS table may exist on upgrade. Discard it. */ + WT_WITH_SCHEMA_LOCK( + session, ret = __wt_schema_drop(session, "file:WiredTigerLAS.wt", drop_cfg)); + + return (ret); +} + +/* + * __wt_hs_get_btree -- + * Get the history store btree. Open a history store cursor if needed to get the btree. + */ +int +__wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) +{ + WT_DECL_RET; + + *hs_btreep = NULL; + + WT_RET(__wt_hs_cursor_open(session)); + + *hs_btreep = CUR2BT(session->hs_cursor); + WT_ASSERT(session, *hs_btreep != NULL); + + WT_TRET(__wt_hs_cursor_close(session)); + + return (ret); +} + +/* + * __wt_hs_config -- + * Configure the history store table. + */ +int +__wt_hs_config(WT_SESSION_IMPL *session, const char **cfg) +{ + WT_BTREE *btree; + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION_IMPL *tmp_setup_session; + + conn = S2C(session); + tmp_setup_session = NULL; + + WT_ERR(__wt_config_gets(session, cfg, "history_store.file_max", &cval)); + if (cval.val != 0 && cval.val < WT_HS_FILE_MIN) + WT_ERR_MSG(session, EINVAL, "max history store size %" PRId64 " below minimum %d", cval.val, + WT_HS_FILE_MIN); + + /* in-memory or readonly configurations do not have a history store. */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) + return (0); + + WT_ERR(__hs_start_internal_session(session, &tmp_setup_session)); + + /* + * Retrieve the btree from the history store cursor. + */ + WT_ERR(__wt_hs_get_btree(tmp_setup_session, &btree)); + + /* Track the history store file ID. */ + if (conn->cache->hs_fileid == 0) + conn->cache->hs_fileid = btree->id; + + /* + * Set special flags for the history store table: the history store flag (used, for example, to + * avoid writing records during reconciliation), also turn off checkpoints and logging. + * + * Test flags before setting them so updates can't race in subsequent opens (the first update is + * safe because it's single-threaded from wiredtiger_open). + */ + if (!F_ISSET(btree, WT_BTREE_HS)) + F_SET(btree, WT_BTREE_HS); + if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) + F_SET(btree, WT_BTREE_NO_LOGGING); + + /* + * We need to set file_max on the btree associated with one of the history store sessions. + */ + btree->file_max = (uint64_t)cval.val; + WT_STAT_CONN_SET(session, cache_hs_ondisk_max, btree->file_max); + +err: + if (tmp_setup_session != NULL) + WT_TRET(__hs_release_internal_session(tmp_setup_session)); + return (ret); +} + +/* + * __wt_hs_open -- + * Initialize the database's history store. + */ +int +__wt_hs_open(WT_SESSION_IMPL *session, const char **cfg) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Read-only and in-memory configurations don't need the history store table. */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) + return (0); + + /* Drop the lookaside file if it still exists. */ + WT_RET(__hs_cleanup_las(session)); + + /* Create the table. */ + WT_RET(__wt_session_create(session, WT_HS_URI, WT_HS_CONFIG)); + + WT_RET(__wt_hs_config(session, cfg)); + + /* The statistics server is already running, make sure we don't race. */ + WT_WRITE_BARRIER(); + F_SET(conn, WT_CONN_HS_OPEN); + + return (0); +} + +/* + * __wt_hs_close -- + * Destroy the database's history store. + */ +void +__wt_hs_close(WT_SESSION_IMPL *session) +{ + F_CLR(S2C(session), WT_CONN_HS_OPEN); +} diff --git a/src/third_party/wiredtiger/src/history/hs_cursor.c b/src/third_party/wiredtiger/src/history/hs_cursor.c new file mode 100644 index 00000000000..02de1fc5a52 --- /dev/null +++ b/src/third_party/wiredtiger/src/history/hs_cursor.c @@ -0,0 +1,421 @@ +/*- + * Copyright (c) 2014-2020 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_hs_row_search -- + * Search the history store for a given key and position the cursor on it. + */ +int +__wt_hs_row_search(WT_CURSOR_BTREE *hs_cbt, WT_ITEM *srch_key, bool insert) +{ + WT_CURSOR *hs_cursor; + WT_DECL_RET; + bool leaf_found; + + hs_cursor = &hs_cbt->iface; + leaf_found = false; + + /* + * Check whether the search key can be find in the provided leaf page, if exists. Otherwise + * perform a full search. + */ + if (hs_cbt->ref != NULL) { + WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), + ret = __wt_row_search(hs_cbt, srch_key, insert, hs_cbt->ref, false, &leaf_found)); + WT_RET(ret); + + /* + * Only use the pinned page search results if search returns an exact match or a slot other + * than the page's boundary slots, if that's not the case, the record might belong on an + * entirely different page. + */ + if (leaf_found && + (hs_cbt->compare != 0 && + (hs_cbt->slot == 0 || hs_cbt->slot == hs_cbt->ref->page->entries - 1))) + leaf_found = false; + if (!leaf_found) + hs_cursor->reset(hs_cursor); + } + + if (!leaf_found) + WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), + ret = __wt_row_search(hs_cbt, srch_key, insert, NULL, false, NULL)); + +#ifdef HAVE_DIAGNOSTIC + WT_TRET(__wt_cursor_key_order_init(hs_cbt)); +#endif + return (ret); +} + +/* + * __wt_hs_modify -- + * Make an update to the history store. + * + * History store updates don't use transactions as those updates should be immediately visible and + * don't follow normal transaction semantics. For this reason, history store updates are + * directly modified using the low level api instead of the ordinary cursor api. + */ +int +__wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) +{ + WT_DECL_RET; + + /* + * We don't have exclusive access to the history store page so we need to pass "false" here to + * ensure that we're locking when inserting new keys to an insert list. + */ + WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), + ret = __wt_row_modify(hs_cbt, &hs_cbt->iface.key, NULL, hs_upd, WT_UPDATE_INVALID, false)); + return (ret); +} + +/* + * __hs_cursor_position_int -- + * Internal function to position a history store cursor at the end of a set of updates for a + * given btree id, record key and timestamp. + */ +static int +__hs_cursor_position_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, + const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) +{ + WT_DECL_ITEM(srch_key); + WT_DECL_RET; + int cmp, exact; + + if (user_srch_key == NULL) + WT_RET(__wt_scr_alloc(session, 0, &srch_key)); + else + srch_key = user_srch_key; + + /* + * Because of the special visibility rules for the history store, a new key can appear in + * between our search and the set of updates that we're interested in. Keep trying until we find + * it. + * + * There may be no history store entries for the given btree id and record key if they have been + * removed by WT_CONNECTION::rollback_to_stable. + * + * Note that we need to compare the raw key off the cursor to determine where we are in the + * history store as opposed to comparing the embedded data store key since the ordering is not + * guaranteed to be the same. + */ + cursor->set_key( + cursor, btree_id, key, timestamp != WT_TS_NONE ? timestamp : WT_TS_MAX, UINT64_MAX); + /* Copy the raw key before searching as a basis for comparison. */ + WT_ERR(__wt_buf_set(session, srch_key, cursor->key.data, cursor->key.size)); + WT_ERR(cursor->search_near(cursor, &exact)); + if (exact > 0) { + /* + * It's possible that we may race with a history store insert for another key. So we may be + * more than one record away the end of our target key/timestamp range. Keep iterating + * backwards until we land on our key. + */ + while ((ret = cursor->prev(cursor)) == 0) { + WT_STAT_CONN_INCR(session, cursor_skip_hs_cur_position); + WT_STAT_DATA_INCR(session, cursor_skip_hs_cur_position); + + WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp)); + if (cmp <= 0) + break; + } + } +#ifdef HAVE_DIAGNOSTIC + if (ret == 0) { + WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp)); + WT_ASSERT(session, cmp <= 0); + } +#endif +err: + if (user_srch_key == NULL) + __wt_scr_free(session, &srch_key); + return (ret); +} + +/* + * __wt_hs_cursor_position -- + * Position a history store cursor at the end of a set of updates for a given btree id, record + * key and timestamp. There may be no history store entries for the given btree id and record + * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional + * argument to store the key that we used to position the cursor which can be used to assess + * where the cursor is relative to it. The function executes with isolation level set as + * WT_ISO_READ_UNCOMMITTED. + */ +int +__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, + const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) +{ + WT_DECL_RET; + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, + ret = __hs_cursor_position_int(session, cursor, btree_id, key, timestamp, user_srch_key)); + return (ret); +} + +/* + * __wt_hs_find_upd -- + * Scan the history store for a record the btree cursor wants to position on. Create an update + * for the record and return to the caller. The caller may choose to optionally allow prepared + * updates to be returned regardless of whether prepare is being ignored globally. Otherwise, a + * prepare conflict will be returned upon reading a prepared update. + */ +int +__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, + WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, WT_TIME_WINDOW *on_disk_tw) +{ + WT_CURSOR *hs_cursor; + WT_CURSOR_BTREE *hs_cbt; + WT_DECL_ITEM(hs_value); + WT_DECL_ITEM(orig_hs_value_buf); + WT_DECL_RET; + WT_ITEM hs_key, recno_key; + WT_MODIFY_VECTOR modifies; + WT_TXN *txn; + WT_TXN_SHARED *txn_shared; + WT_UPDATE *mod_upd, *upd; + wt_timestamp_t durable_timestamp, durable_timestamp_tmp, hs_start_ts, hs_start_ts_tmp; + wt_timestamp_t hs_stop_durable_ts, hs_stop_durable_ts_tmp, read_timestamp; + uint64_t hs_counter, hs_counter_tmp, upd_type_full; + uint32_t hs_btree_id; + uint8_t *p, recno_key_buf[WT_INTPACK64_MAXSIZE], upd_type; + int cmp; + bool modify; + + hs_cursor = NULL; + mod_upd = upd = NULL; + orig_hs_value_buf = NULL; + WT_CLEAR(hs_key); + __wt_modify_vector_init(session, &modifies); + txn = session->txn; + txn_shared = WT_SESSION_TXN_SHARED(session); + hs_btree_id = S2BT(session)->id; + WT_NOT_READ(modify, false); + + WT_STAT_CONN_INCR(session, cursor_search_hs); + WT_STAT_DATA_INCR(session, cursor_search_hs); + + /* Row-store key is as passed to us, create the column-store key as needed. */ + WT_ASSERT( + session, (key == NULL && recno != WT_RECNO_OOB) || (key != NULL && recno == WT_RECNO_OOB)); + if (key == NULL) { + p = recno_key_buf; + WT_RET(__wt_vpack_uint(&p, 0, recno)); + memset(&recno_key, 0, sizeof(recno_key)); + key = &recno_key; + key->data = recno_key_buf; + key->size = WT_PTRDIFF(p, recno_key_buf); + } + + /* Allocate buffer for the history store value. */ + WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); + + /* Open a history store table cursor. */ + WT_ERR(__wt_hs_cursor_open(session)); + hs_cursor = session->hs_cursor; + hs_cbt = (WT_CURSOR_BTREE *)hs_cursor; + + /* + * After positioning our cursor, we're stepping backwards to find the correct update. Since the + * timestamp is part of the key, our cursor needs to go from the newest record (further in the + * history store) to the oldest (earlier in the history store) for a given key. + */ + read_timestamp = allow_prepare ? txn->prepare_timestamp : txn_shared->read_timestamp; + WT_ERR_NOTFOUND_OK( + __wt_hs_cursor_position(session, hs_cursor, hs_btree_id, key, read_timestamp, NULL), true); + if (ret == WT_NOTFOUND) { + ret = 0; + goto done; + } + for (;; ret = __wt_hs_cursor_prev(session, hs_cursor)) { + WT_ERR_NOTFOUND_OK(ret, true); + /* If we hit the end of the table, let's get out of here. */ + if (ret == WT_NOTFOUND) { + ret = 0; + goto done; + } + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); + + /* Stop before crossing over to the next btree */ + if (hs_btree_id != S2BT(session)->id) + goto done; + + /* + * Keys are sorted in an order, skip the ones before the desired key, and bail out if we + * have crossed over the desired key and not found the record we are looking for. + */ + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); + if (cmp != 0) + goto done; + + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { + WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); + continue; + } + /* + * If the stop time point of a record is visible to us, we won't be able to see anything for + * this entire key. Just jump straight to the end. + */ + if (__wt_txn_tw_stop_visible(session, &hs_cbt->upd_value->tw)) + goto done; + /* If the start time point is visible to us, let's return that record. */ + if (__wt_txn_tw_start_visible(session, &hs_cbt->upd_value->tw)) + break; + } + + WT_ERR(hs_cursor->get_value( + hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &upd_type_full, hs_value)); + upd_type = (uint8_t)upd_type_full; + + /* We do not have tombstones in the history store anymore. */ + WT_ASSERT(session, upd_type != WT_UPDATE_TOMBSTONE); + + /* + * If the caller has signalled they don't need the value buffer, don't bother reconstructing a + * modify update or copying the contents into the value buffer. + */ + if (upd_value->skip_buf) + goto skip_buf; + + /* + * Keep walking until we get a non-modify update. Once we get to that point, squash the updates + * together. + */ + if (upd_type == WT_UPDATE_MODIFY) { + WT_NOT_READ(modify, true); + /* Store this so that we don't have to make a special case for the first modify. */ + hs_stop_durable_ts_tmp = hs_stop_durable_ts; + + /* + * Resolving update chains of reverse deltas requires the current transaction to look beyond + * its current snapshot in certain scenarios. This flag allows us to ignore transaction + * visibility checks when reading in order to construct the modify chain, so we can create + * the value we expect. + */ + while (upd_type == WT_UPDATE_MODIFY) { + WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL)); + WT_ERR(__wt_modify_vector_push(&modifies, mod_upd)); + mod_upd = NULL; + + /* + * Find the base update to apply the reverse deltas. If our cursor next fails to find an + * update here we fall back to the datastore version. If its timestamp doesn't match our + * timestamp then we return not found. + */ + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true); + if (ret == WT_NOTFOUND) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + hs_start_ts_tmp = WT_TS_NONE; + /* + * Make sure we use the temporary variants of these variables. We need to retain the + * timestamps of the original modify we saw. + * + * We keep looking back into history store until we find a base update to apply the + * reverse deltas on top of. + */ + WT_ERR(hs_cursor->get_key( + hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts_tmp, &hs_counter_tmp)); + + if (hs_btree_id != S2BT(session)->id) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); + + if (cmp != 0) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + + /* + * If we find a history store record that either corresponds to the on-disk value or is + * newer than it then we should use the on-disk value as the base value and apply our + * modifies on top of it. + */ + if (on_disk_tw->start_ts < hs_start_ts_tmp || + (on_disk_tw->start_ts == hs_start_ts_tmp && + on_disk_tw->start_txn <= hs_cbt->upd_value->tw.start_txn)) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + + WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp, + &upd_type_full, hs_value)); + upd_type = (uint8_t)upd_type_full; + } + WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD); + while (modifies.size > 0) { + __wt_modify_vector_pop(&modifies, &mod_upd); + WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data)); + __wt_free_update_list(session, &mod_upd); + mod_upd = NULL; + } + WT_STAT_CONN_INCR(session, cache_hs_read_squash); + } + + /* + * Potential optimization: We can likely get rid of this copy and the update allocation above. + * We already have buffers containing the modify values so there's no good reason to allocate an + * update other than to work with our modify vector implementation. + */ + WT_ERR(__wt_buf_set(session, &upd_value->buf, hs_value->data, hs_value->size)); +skip_buf: + upd_value->tw.durable_start_ts = durable_timestamp; + upd_value->tw.start_txn = WT_TXN_NONE; + upd_value->type = upd_type; + +done: +err: + if (orig_hs_value_buf != NULL) + __wt_scr_free(session, &orig_hs_value_buf); + else + __wt_scr_free(session, &hs_value); + WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0); + + WT_TRET(__wt_hs_cursor_close(session)); + + __wt_free_update_list(session, &mod_upd); + while (modifies.size > 0) { + __wt_modify_vector_pop(&modifies, &upd); + __wt_free_update_list(session, &upd); + } + __wt_modify_vector_free(&modifies); + + if (ret == 0) { + /* Couldn't find a record. */ + if (upd == NULL) { + ret = WT_NOTFOUND; + WT_STAT_CONN_INCR(session, cache_hs_read_miss); + } else { + WT_STAT_CONN_INCR(session, cache_hs_read); + WT_STAT_DATA_INCR(session, cache_hs_read); + } + } + + WT_ASSERT(session, upd != NULL || ret != 0); + + return (ret); +} diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs_rec.c index f7588ca35c0..a03b8e31739 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs_rec.c @@ -25,277 +25,6 @@ static int __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR const WT_ITEM *srch_key); /* - * __hs_start_internal_session -- - * Create a temporary internal session to retrieve history store. - */ -static int -__hs_start_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp) -{ - WT_ASSERT(session, !F_ISSET(session, WT_CONN_HS_OPEN)); - return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, int_sessionp)); -} - -/* - * __hs_release_internal_session -- - * Release the temporary internal session started to retrieve history store. - */ -static int -__hs_release_internal_session(WT_SESSION_IMPL *int_session) -{ - return (__wt_session_close_internal(int_session)); -} - -/* - * __wt_hs_get_btree -- - * Get the history store btree. Open a history store cursor if needed to get the btree. - */ -int -__wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) -{ - WT_DECL_RET; - - *hs_btreep = NULL; - - WT_RET(__wt_hs_cursor_open(session)); - - *hs_btreep = CUR2BT(session->hs_cursor); - WT_ASSERT(session, *hs_btreep != NULL); - - WT_TRET(__wt_hs_cursor_close(session)); - - return (ret); -} - -/* - * __wt_hs_config -- - * Configure the history store table. - */ -int -__wt_hs_config(WT_SESSION_IMPL *session, const char **cfg) -{ - WT_BTREE *btree; - WT_CONFIG_ITEM cval; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_SESSION_IMPL *tmp_setup_session; - - conn = S2C(session); - tmp_setup_session = NULL; - - WT_ERR(__wt_config_gets(session, cfg, "history_store.file_max", &cval)); - if (cval.val != 0 && cval.val < WT_HS_FILE_MIN) - WT_ERR_MSG(session, EINVAL, "max history store size %" PRId64 " below minimum %d", cval.val, - WT_HS_FILE_MIN); - - /* in-memory or readonly configurations do not have a history store. */ - if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) - return (0); - - WT_ERR(__hs_start_internal_session(session, &tmp_setup_session)); - - /* - * Retrieve the btree from the history store cursor. - */ - WT_ERR(__wt_hs_get_btree(tmp_setup_session, &btree)); - - /* Track the history store file ID. */ - if (conn->cache->hs_fileid == 0) - conn->cache->hs_fileid = btree->id; - - /* - * Set special flags for the history store table: the history store flag (used, for example, to - * avoid writing records during reconciliation), also turn off checkpoints and logging. - * - * Test flags before setting them so updates can't race in subsequent opens (the first update is - * safe because it's single-threaded from wiredtiger_open). - */ - if (!F_ISSET(btree, WT_BTREE_HS)) - F_SET(btree, WT_BTREE_HS); - if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) - F_SET(btree, WT_BTREE_NO_LOGGING); - - /* - * We need to set file_max on the btree associated with one of the history store sessions. - */ - btree->file_max = (uint64_t)cval.val; - WT_STAT_CONN_SET(session, cache_hs_ondisk_max, btree->file_max); - -err: - if (tmp_setup_session != NULL) - WT_TRET(__hs_release_internal_session(tmp_setup_session)); - return (ret); -} - -/* - * __wt_hs_cleanup_las -- - * Drop the lookaside file if it exists. - */ -int -__wt_hs_cleanup_las(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL}; - - conn = S2C(session); - - /* Read-only and in-memory configurations won't drop the lookaside. */ - if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) - return (0); - - /* The LAS table may exist on upgrade. Discard it. */ - WT_WITH_SCHEMA_LOCK( - session, ret = __wt_schema_drop(session, "file:WiredTigerLAS.wt", drop_cfg)); - - return (ret); -} - -/* - * __wt_hs_create -- - * Initialize the database's history store. - */ -int -__wt_hs_create(WT_SESSION_IMPL *session, const char **cfg) -{ - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - - /* Read-only and in-memory configurations don't need the history store table. */ - if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) - return (0); - - /* Create the table. */ - WT_RET(__wt_session_create(session, WT_HS_URI, WT_HS_CONFIG)); - - WT_RET(__wt_hs_config(session, cfg)); - - /* The statistics server is already running, make sure we don't race. */ - WT_WRITE_BARRIER(); - F_SET(conn, WT_CONN_HS_OPEN); - - return (0); -} - -/* - * __wt_hs_destroy -- - * Destroy the database's history store. - */ -void -__wt_hs_destroy(WT_SESSION_IMPL *session) -{ - F_CLR(S2C(session), WT_CONN_HS_OPEN); -} - -/* - * __wt_hs_cursor_open -- - * Open a new history store table cursor. - */ -int -__wt_hs_cursor_open(WT_SESSION_IMPL *session) -{ - WT_CURSOR *cursor; - WT_DECL_RET; - const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL}; - - /* Not allowed to open a cursor if you already have one */ - WT_ASSERT(session, session->hs_cursor == NULL); - - WT_WITHOUT_DHANDLE( - session, ret = __wt_open_cursor(session, WT_HS_URI, NULL, open_cursor_cfg, &cursor)); - WT_RET(ret); - - /* History store cursors should always ignore tombstones. */ - F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE); - - session->hs_cursor = cursor; - return (0); -} - -/* - * __wt_hs_cursor_close -- - * Discard a history store cursor. - */ -int -__wt_hs_cursor_close(WT_SESSION_IMPL *session) -{ - /* Should only be called when session has an open history store cursor */ - WT_ASSERT(session, session->hs_cursor != NULL); - - WT_RET(session->hs_cursor->close(session->hs_cursor)); - session->hs_cursor = NULL; - return (0); -} - -/* - * __hs_row_search -- - * Search the history store for a given key and position the cursor on it. - */ -static int -__hs_row_search(WT_CURSOR_BTREE *hs_cbt, WT_ITEM *srch_key, bool insert) -{ - WT_CURSOR *hs_cursor; - WT_DECL_RET; - bool leaf_found; - - hs_cursor = &hs_cbt->iface; - leaf_found = false; - - /* - * Check whether the search key can be find in the provided leaf page, if exists. Otherwise - * perform a full search. - */ - if (hs_cbt->ref != NULL) { - WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), - ret = __wt_row_search(hs_cbt, srch_key, insert, hs_cbt->ref, false, &leaf_found)); - WT_RET(ret); - - /* - * Only use the pinned page search results if search returns an exact match or a slot other - * than the page's boundary slots, if that's not the case, the record might belong on an - * entirely different page. - */ - if (leaf_found && - (hs_cbt->compare != 0 && - (hs_cbt->slot == 0 || hs_cbt->slot == hs_cbt->ref->page->entries - 1))) - leaf_found = false; - if (!leaf_found) - hs_cursor->reset(hs_cursor); - } - - if (!leaf_found) - WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), - ret = __wt_row_search(hs_cbt, srch_key, insert, NULL, false, NULL)); - -#ifdef HAVE_DIAGNOSTIC - WT_TRET(__wt_cursor_key_order_init(hs_cbt)); -#endif - return (ret); -} - -/* - * __wt_hs_modify -- - * Make an update to the history store. - * - * History store updates don't use transactions as those updates should be immediately visible and - * don't follow normal transaction semantics. For this reason, history store updates are - * directly modified using the low level api instead of the ordinary cursor api. - */ -int -__wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) -{ - WT_DECL_RET; - - /* - * We don't have exclusive access to the history store page so we need to pass "false" here to - * ensure that we're locking when inserting new keys to an insert list. - */ - WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt), - ret = __wt_row_modify(hs_cbt, &hs_cbt->iface.key, NULL, hs_upd, WT_UPDATE_INVALID, false)); - return (ret); -} - -/* * __hs_insert_updates_verbose -- * Display a verbose message once per checkpoint with details about the cache state when * performing a history store table write. @@ -394,7 +123,7 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, W hs_upd = upd_local; /* Search the page and insert the updates. */ - WT_WITH_PAGE_INDEX(session, ret = __hs_row_search(cbt, &cursor->key, true)); + WT_WITH_PAGE_INDEX(session, ret = __wt_hs_row_search(cbt, &cursor->key, true)); WT_ERR(ret); WT_ERR(__wt_hs_modify(cbt, hs_upd)); @@ -1023,391 +752,6 @@ err: } /* - * __hs_cursor_position_int -- - * Internal function to position a history store cursor at the end of a set of updates for a - * given btree id, record key and timestamp. - */ -static int -__hs_cursor_position_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) -{ - WT_DECL_ITEM(srch_key); - WT_DECL_RET; - int cmp, exact; - - if (user_srch_key == NULL) - WT_RET(__wt_scr_alloc(session, 0, &srch_key)); - else - srch_key = user_srch_key; - - /* - * Because of the special visibility rules for the history store, a new key can appear in - * between our search and the set of updates that we're interested in. Keep trying until we find - * it. - * - * There may be no history store entries for the given btree id and record key if they have been - * removed by WT_CONNECTION::rollback_to_stable. - * - * Note that we need to compare the raw key off the cursor to determine where we are in the - * history store as opposed to comparing the embedded data store key since the ordering is not - * guaranteed to be the same. - */ - cursor->set_key( - cursor, btree_id, key, timestamp != WT_TS_NONE ? timestamp : WT_TS_MAX, UINT64_MAX); - /* Copy the raw key before searching as a basis for comparison. */ - WT_ERR(__wt_buf_set(session, srch_key, cursor->key.data, cursor->key.size)); - WT_ERR(cursor->search_near(cursor, &exact)); - if (exact > 0) { - /* - * It's possible that we may race with a history store insert for another key. So we may be - * more than one record away the end of our target key/timestamp range. Keep iterating - * backwards until we land on our key. - */ - while ((ret = cursor->prev(cursor)) == 0) { - WT_STAT_CONN_INCR(session, cursor_skip_hs_cur_position); - WT_STAT_DATA_INCR(session, cursor_skip_hs_cur_position); - - WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp)); - if (cmp <= 0) - break; - } - } -#ifdef HAVE_DIAGNOSTIC - if (ret == 0) { - WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp)); - WT_ASSERT(session, cmp <= 0); - } -#endif -err: - if (user_srch_key == NULL) - __wt_scr_free(session, &srch_key); - return (ret); -} - -/* - * __wt_hs_cursor_position -- - * Position a history store cursor at the end of a set of updates for a given btree id, record - * key and timestamp. There may be no history store entries for the given btree id and record - * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional - * argument to store the key that we used to position the cursor which can be used to assess - * where the cursor is relative to it. The function executes with isolation level set as - * WT_ISO_READ_UNCOMMITTED. - */ -int -__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) -{ - WT_DECL_RET; - WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, - ret = __hs_cursor_position_int(session, cursor, btree_id, key, timestamp, user_srch_key)); - return (ret); -} - -/* - * __wt_hs_cursor_prev -- - * Execute a prev operation on a history store cursor with the appropriate isolation level. - */ -int -__wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor) -{ - WT_DECL_RET; - - WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->prev(cursor)); - return (ret); -} - -/* - * __wt_hs_cursor_next -- - * Execute a next operation on a history store cursor with the appropriate isolation level. - */ -int -__wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor) -{ - WT_DECL_RET; - - WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->next(cursor)); - return (ret); -} - -/* - * __wt_hs_cursor_search_near -- - * Execute a search near operation on a history store cursor with the appropriate isolation - * level. - */ -int -__wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp) -{ - WT_DECL_RET; - - WT_WITH_TXN_ISOLATION( - session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp)); - return (ret); -} - -/* - * __wt_hs_find_upd -- - * Scan the history store for a record the btree cursor wants to position on. Create an update - * for the record and return to the caller. The caller may choose to optionally allow prepared - * updates to be returned regardless of whether prepare is being ignored globally. Otherwise, a - * prepare conflict will be returned upon reading a prepared update. - */ -int -__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, - WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, WT_TIME_WINDOW *on_disk_tw) -{ - WT_CURSOR *hs_cursor; - WT_CURSOR_BTREE *hs_cbt; - WT_DECL_ITEM(hs_value); - WT_DECL_ITEM(orig_hs_value_buf); - WT_DECL_RET; - WT_ITEM hs_key, recno_key; - WT_MODIFY_VECTOR modifies; - WT_TXN *txn; - WT_TXN_SHARED *txn_shared; - WT_UPDATE *mod_upd, *upd; - wt_timestamp_t durable_timestamp, durable_timestamp_tmp, hs_start_ts, hs_start_ts_tmp; - wt_timestamp_t hs_stop_durable_ts, hs_stop_durable_ts_tmp, read_timestamp; - uint64_t hs_counter, hs_counter_tmp, upd_type_full; - uint32_t hs_btree_id; - uint8_t *p, recno_key_buf[WT_INTPACK64_MAXSIZE], upd_type; - int cmp; - bool modify; - - hs_cursor = NULL; - mod_upd = upd = NULL; - orig_hs_value_buf = NULL; - WT_CLEAR(hs_key); - __wt_modify_vector_init(session, &modifies); - txn = session->txn; - txn_shared = WT_SESSION_TXN_SHARED(session); - hs_btree_id = S2BT(session)->id; - WT_NOT_READ(modify, false); - - WT_STAT_CONN_INCR(session, cursor_search_hs); - WT_STAT_DATA_INCR(session, cursor_search_hs); - - /* Row-store key is as passed to us, create the column-store key as needed. */ - WT_ASSERT( - session, (key == NULL && recno != WT_RECNO_OOB) || (key != NULL && recno == WT_RECNO_OOB)); - if (key == NULL) { - p = recno_key_buf; - WT_RET(__wt_vpack_uint(&p, 0, recno)); - memset(&recno_key, 0, sizeof(recno_key)); - key = &recno_key; - key->data = recno_key_buf; - key->size = WT_PTRDIFF(p, recno_key_buf); - } - - /* Allocate buffer for the history store value. */ - WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); - - /* Open a history store table cursor. */ - WT_ERR(__wt_hs_cursor_open(session)); - hs_cursor = session->hs_cursor; - hs_cbt = (WT_CURSOR_BTREE *)hs_cursor; - - /* - * After positioning our cursor, we're stepping backwards to find the correct update. Since the - * timestamp is part of the key, our cursor needs to go from the newest record (further in the - * history store) to the oldest (earlier in the history store) for a given key. - */ - read_timestamp = allow_prepare ? txn->prepare_timestamp : txn_shared->read_timestamp; - WT_ERR_NOTFOUND_OK( - __wt_hs_cursor_position(session, hs_cursor, hs_btree_id, key, read_timestamp, NULL), true); - if (ret == WT_NOTFOUND) { - ret = 0; - goto done; - } - for (;; ret = __wt_hs_cursor_prev(session, hs_cursor)) { - WT_ERR_NOTFOUND_OK(ret, true); - /* If we hit the end of the table, let's get out of here. */ - if (ret == WT_NOTFOUND) { - ret = 0; - goto done; - } - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); - - /* Stop before crossing over to the next btree */ - if (hs_btree_id != S2BT(session)->id) - goto done; - - /* - * Keys are sorted in an order, skip the ones before the desired key, and bail out if we - * have crossed over the desired key and not found the record we are looking for. - */ - WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); - if (cmp != 0) - goto done; - - /* - * If the stop time pair on the tombstone in the history store is already globally visible - * we can skip it. - */ - if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { - WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); - continue; - } - /* - * If the stop time point of a record is visible to us, we won't be able to see anything for - * this entire key. Just jump straight to the end. - */ - if (__wt_txn_tw_stop_visible(session, &hs_cbt->upd_value->tw)) - goto done; - /* If the start time point is visible to us, let's return that record. */ - if (__wt_txn_tw_start_visible(session, &hs_cbt->upd_value->tw)) - break; - } - - WT_ERR(hs_cursor->get_value( - hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &upd_type_full, hs_value)); - upd_type = (uint8_t)upd_type_full; - - /* We do not have tombstones in the history store anymore. */ - WT_ASSERT(session, upd_type != WT_UPDATE_TOMBSTONE); - - /* - * If the caller has signalled they don't need the value buffer, don't bother reconstructing a - * modify update or copying the contents into the value buffer. - */ - if (upd_value->skip_buf) - goto skip_buf; - - /* - * Keep walking until we get a non-modify update. Once we get to that point, squash the updates - * together. - */ - if (upd_type == WT_UPDATE_MODIFY) { - WT_NOT_READ(modify, true); - /* Store this so that we don't have to make a special case for the first modify. */ - hs_stop_durable_ts_tmp = hs_stop_durable_ts; - - /* - * Resolving update chains of reverse deltas requires the current transaction to look beyond - * its current snapshot in certain scenarios. This flag allows us to ignore transaction - * visibility checks when reading in order to construct the modify chain, so we can create - * the value we expect. - */ - while (upd_type == WT_UPDATE_MODIFY) { - WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL)); - WT_ERR(__wt_modify_vector_push(&modifies, mod_upd)); - mod_upd = NULL; - - /* - * Find the base update to apply the reverse deltas. If our cursor next fails to find an - * update here we fall back to the datastore version. If its timestamp doesn't match our - * timestamp then we return not found. - */ - WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true); - if (ret == WT_NOTFOUND) { - /* Fallback to the onpage value as the base value. */ - orig_hs_value_buf = hs_value; - hs_value = on_disk_buf; - upd_type = WT_UPDATE_STANDARD; - break; - } - hs_start_ts_tmp = WT_TS_NONE; - /* - * Make sure we use the temporary variants of these variables. We need to retain the - * timestamps of the original modify we saw. - * - * We keep looking back into history store until we find a base update to apply the - * reverse deltas on top of. - */ - WT_ERR(hs_cursor->get_key( - hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts_tmp, &hs_counter_tmp)); - - if (hs_btree_id != S2BT(session)->id) { - /* Fallback to the onpage value as the base value. */ - orig_hs_value_buf = hs_value; - hs_value = on_disk_buf; - upd_type = WT_UPDATE_STANDARD; - break; - } - - WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); - - if (cmp != 0) { - /* Fallback to the onpage value as the base value. */ - orig_hs_value_buf = hs_value; - hs_value = on_disk_buf; - upd_type = WT_UPDATE_STANDARD; - break; - } - - /* - * If we find a history store record that either corresponds to the on-disk value or is - * newer than it then we should use the on-disk value as the base value and apply our - * modifies on top of it. - */ - if (on_disk_tw->start_ts < hs_start_ts_tmp || - (on_disk_tw->start_ts == hs_start_ts_tmp && - on_disk_tw->start_txn <= hs_cbt->upd_value->tw.start_txn)) { - /* Fallback to the onpage value as the base value. */ - orig_hs_value_buf = hs_value; - hs_value = on_disk_buf; - upd_type = WT_UPDATE_STANDARD; - break; - } - - WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp, - &upd_type_full, hs_value)); - upd_type = (uint8_t)upd_type_full; - } - WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD); - while (modifies.size > 0) { - __wt_modify_vector_pop(&modifies, &mod_upd); - WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data)); - __wt_free_update_list(session, &mod_upd); - mod_upd = NULL; - } - WT_STAT_CONN_INCR(session, cache_hs_read_squash); - } - - /* - * Potential optimization: We can likely get rid of this copy and the update allocation above. - * We already have buffers containing the modify values so there's no good reason to allocate an - * update other than to work with our modify vector implementation. - */ - WT_ERR(__wt_buf_set(session, &upd_value->buf, hs_value->data, hs_value->size)); -skip_buf: - upd_value->tw.durable_start_ts = durable_timestamp; - upd_value->tw.start_txn = WT_TXN_NONE; - upd_value->type = upd_type; - -done: -err: - if (orig_hs_value_buf != NULL) - __wt_scr_free(session, &orig_hs_value_buf); - else - __wt_scr_free(session, &hs_value); - WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0); - - WT_TRET(__wt_hs_cursor_close(session)); - - __wt_free_update_list(session, &mod_upd); - while (modifies.size > 0) { - __wt_modify_vector_pop(&modifies, &upd); - __wt_free_update_list(session, &upd); - } - __wt_modify_vector_free(&modifies); - - if (ret == 0) { - /* Couldn't find a record. */ - if (upd == NULL) { - ret = WT_NOTFOUND; - WT_STAT_CONN_INCR(session, cache_hs_read_miss); - } else { - WT_STAT_CONN_INCR(session, cache_hs_read); - WT_STAT_DATA_INCR(session, cache_hs_read); - } - } - - WT_ASSERT(session, upd != NULL || ret != 0); - - return (ret); -} - -/* * __hs_delete_key_from_ts_int -- * Internal helper for deleting history store content of a given key from a timestamp. */ @@ -1721,208 +1065,3 @@ err: __wt_free(session, upd); return (ret); } - -/* - * __verify_history_store_id -- - * Verify the history store for a single btree. Given a cursor to the tree, walk all history - * store keys. This function assumes any caller has already opened a cursor to the history - * store. - */ -static int -__verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id) -{ - WT_CURSOR *hs_cursor; - WT_CURSOR_BTREE *hs_cbt; - WT_DECL_ITEM(prev_key); - WT_DECL_RET; - WT_ITEM key; - wt_timestamp_t hs_start_ts; - uint64_t hs_counter; - uint32_t btree_id; - int cmp; - - hs_cursor = session->hs_cursor; - hs_cbt = (WT_CURSOR_BTREE *)hs_cursor; - WT_CLEAR(key); - - WT_ERR(__wt_scr_alloc(session, 0, &prev_key)); - - /* - * If using standard cursors, we need to skip the non-globally visible tombstones in the data - * table to verify the corresponding entries in the history store are too present in the data - * store. Though this is not required currently as we are directly searching btree cursors, - * leave it here in case we switch to standard cursors. - */ - F_SET(&ds_cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); - - /* - * The caller is responsible for positioning the history store cursor at the first record to - * verify. When we return after moving to a new key the caller is responsible for keeping the - * cursor there or deciding they're done. - */ - for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { - WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter)); - - /* - * If the btree id does not match the preview one, we're done. It is up to the caller to set - * up for the next tree and call us, if they choose. For a full history store walk, the - * caller sends in WT_BTREE_ID_INVALID and this function will set and use the first btree id - * it finds and will return once it walks off that tree, leaving the cursor set to the first - * key of that new tree. - */ - if (btree_id != this_btree_id) - break; - - /* - * If the stop time pair on the tombstone in the history store is already globally visible - * we can skip it. - */ - if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { - WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone); - continue; - } - - /* - * If we have already checked against this key, keep going to the next key. We only need to - * check the key once. - */ - WT_ERR(__wt_compare(session, NULL, &key, prev_key, &cmp)); - if (cmp == 0) - continue; - - WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(ds_cbt, &key, false, NULL, false, NULL)); - WT_ERR(ret); - - if (ds_cbt->compare != 0) { - F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); - WT_ERR_PANIC(session, WT_PANIC, - "the associated history store key %s was not found in the data store %s", - __wt_buf_set_printable(session, key.data, key.size, prev_key), - session->dhandle->name); - } - - WT_ERR(__cursor_reset(ds_cbt)); - - /* - * Copy the key memory into our scratch buffer. The key will get invalidated on our next - * cursor iteration. - */ - WT_ERR(__wt_buf_set(session, prev_key, key.data, key.size)); - } - WT_ERR_NOTFOUND_OK(ret, true); -err: - F_CLR(&ds_cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); - WT_ASSERT(session, key.mem == NULL && key.memsize == 0); - __wt_scr_free(session, &prev_key); - return (ret); -} - -/* - * __wt_history_store_verify_one -- - * Verify the history store for the btree that is set up in this session. This must be called - * when we are known to have exclusive access to the btree. - */ -int -__wt_history_store_verify_one(WT_SESSION_IMPL *session) -{ - WT_CURSOR *hs_cursor; - WT_CURSOR_BTREE ds_cbt; - WT_DECL_RET; - WT_ITEM hs_key; - uint32_t btree_id; - int exact; - - hs_cursor = session->hs_cursor; - btree_id = S2BT(session)->id; - - /* - * We are required to position the history store cursor. Set it to the first record of our btree - * in the history store. - */ - memset(&hs_key, 0, sizeof(hs_key)); - hs_cursor->set_key(hs_cursor, btree_id, &hs_key, 0, 0); - ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact); - if (ret == 0 && exact < 0) - ret = __wt_hs_cursor_next(session, hs_cursor); - - /* - * If we positioned the cursor there is something to verify. - * - * We are in verify and we are not able to open a standard cursor because the btree is flagged - * as WT_BTREE_VERIFY. However, we have exclusive access to the btree so we can directly open - * the btree cursor to work around it. - */ - if (ret == 0) { - __wt_btcur_init(session, &ds_cbt); - __wt_btcur_open(&ds_cbt); - ret = __verify_history_store_id(session, &ds_cbt, btree_id); - WT_TRET(__wt_btcur_close(&ds_cbt, false)); - } - return (ret == WT_NOTFOUND ? 0 : ret); -} - -/* - * __wt_history_store_verify -- - * Verify the history store. There can't be an entry in the history store without having the - * latest value for the respective key in the data store. - */ -int -__wt_history_store_verify(WT_SESSION_IMPL *session) -{ - WT_CURSOR *ds_cursor, *hs_cursor; - WT_DECL_ITEM(buf); - WT_DECL_RET; - WT_ITEM key; - wt_timestamp_t hs_start_ts; - uint64_t hs_counter; - uint32_t btree_id; - char *uri_data; - bool stop; - - /* We should never reach here if working in context of the default session. */ - WT_ASSERT(session, S2C(session)->default_session != session); - - ds_cursor = hs_cursor = NULL; - WT_CLEAR(key); - btree_id = WT_BTREE_ID_INVALID; - uri_data = NULL; - - WT_ERR(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_hs_cursor_open(session)); - hs_cursor = session->hs_cursor; - WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true); - stop = ret == WT_NOTFOUND ? true : false; - ret = 0; - - /* - * We have the history store cursor positioned at the first record that we want to verify. The - * internal function is expecting a btree cursor, so open and initialize that. - */ - while (!stop) { - /* - * The cursor is positioned either from above or left over from the internal call on the - * first key of a new btree id. - */ - WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter)); - if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0) { - F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); - WT_ERR_PANIC(session, WT_PANIC, - "Unable to find btree id %" PRIu32 " in the metadata file for the associated key %s", - btree_id, __wt_buf_set_printable(session, key.data, key.size, buf)); - } - WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &ds_cursor)); - F_SET(ds_cursor, WT_CURSOR_RAW_OK); - ret = __verify_history_store_id(session, (WT_CURSOR_BTREE *)ds_cursor, btree_id); - if (ret == WT_NOTFOUND) - stop = true; - WT_TRET(ds_cursor->close(ds_cursor)); - WT_ERR_NOTFOUND_OK(ret, false); - } -err: - WT_TRET(__wt_hs_cursor_close(session)); - - __wt_scr_free(session, &buf); - WT_ASSERT(session, key.mem == NULL && key.memsize == 0); - __wt_free(session, uri_data); - return (ret); -} diff --git a/src/third_party/wiredtiger/src/history/hs_verify.c b/src/third_party/wiredtiger/src/history/hs_verify.c new file mode 100644 index 00000000000..d31c0c51afd --- /dev/null +++ b/src/third_party/wiredtiger/src/history/hs_verify.c @@ -0,0 +1,214 @@ +/*- + * Copyright (c) 2014-2020 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __verify_history_store_id -- + * Verify the history store for a single btree. Given a cursor to the tree, walk all history + * store keys. This function assumes any caller has already opened a cursor to the history + * store. + */ +static int +__verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id) +{ + WT_CURSOR *hs_cursor; + WT_CURSOR_BTREE *hs_cbt; + WT_DECL_ITEM(prev_key); + WT_DECL_RET; + WT_ITEM key; + wt_timestamp_t hs_start_ts; + uint64_t hs_counter; + uint32_t btree_id; + int cmp; + + hs_cursor = session->hs_cursor; + hs_cbt = (WT_CURSOR_BTREE *)hs_cursor; + WT_CLEAR(key); + + WT_ERR(__wt_scr_alloc(session, 0, &prev_key)); + + /* + * If using standard cursors, we need to skip the non-globally visible tombstones in the data + * table to verify the corresponding entries in the history store are too present in the data + * store. Though this is not required currently as we are directly searching btree cursors, + * leave it here in case we switch to standard cursors. + */ + F_SET(&ds_cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); + + /* + * The caller is responsible for positioning the history store cursor at the first record to + * verify. When we return after moving to a new key the caller is responsible for keeping the + * cursor there or deciding they're done. + */ + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { + WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter)); + + /* + * If the btree id does not match the preview one, we're done. It is up to the caller to set + * up for the next tree and call us, if they choose. For a full history store walk, the + * caller sends in WT_BTREE_ID_INVALID and this function will set and use the first btree id + * it finds and will return once it walks off that tree, leaving the cursor set to the first + * key of that new tree. + */ + if (btree_id != this_btree_id) + break; + + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { + WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone); + continue; + } + + /* + * If we have already checked against this key, keep going to the next key. We only need to + * check the key once. + */ + WT_ERR(__wt_compare(session, NULL, &key, prev_key, &cmp)); + if (cmp == 0) + continue; + + WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(ds_cbt, &key, false, NULL, false, NULL)); + WT_ERR(ret); + + if (ds_cbt->compare != 0) { + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + WT_ERR_PANIC(session, WT_PANIC, + "the associated history store key %s was not found in the data store %s", + __wt_buf_set_printable(session, key.data, key.size, prev_key), + session->dhandle->name); + } + + WT_ERR(__cursor_reset(ds_cbt)); + + /* + * Copy the key memory into our scratch buffer. The key will get invalidated on our next + * cursor iteration. + */ + WT_ERR(__wt_buf_set(session, prev_key, key.data, key.size)); + } + WT_ERR_NOTFOUND_OK(ret, true); +err: + F_CLR(&ds_cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); + WT_ASSERT(session, key.mem == NULL && key.memsize == 0); + __wt_scr_free(session, &prev_key); + return (ret); +} + +/* + * __wt_history_store_verify_one -- + * Verify the history store for the btree that is set up in this session. This must be called + * when we are known to have exclusive access to the btree. + */ +int +__wt_history_store_verify_one(WT_SESSION_IMPL *session) +{ + WT_CURSOR *hs_cursor; + WT_CURSOR_BTREE ds_cbt; + WT_DECL_RET; + WT_ITEM hs_key; + uint32_t btree_id; + int exact; + + hs_cursor = session->hs_cursor; + btree_id = S2BT(session)->id; + + /* + * We are required to position the history store cursor. Set it to the first record of our btree + * in the history store. + */ + memset(&hs_key, 0, sizeof(hs_key)); + hs_cursor->set_key(hs_cursor, btree_id, &hs_key, 0, 0); + ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact); + if (ret == 0 && exact < 0) + ret = __wt_hs_cursor_next(session, hs_cursor); + + /* + * If we positioned the cursor there is something to verify. + * + * We are in verify and we are not able to open a standard cursor because the btree is flagged + * as WT_BTREE_VERIFY. However, we have exclusive access to the btree so we can directly open + * the btree cursor to work around it. + */ + if (ret == 0) { + __wt_btcur_init(session, &ds_cbt); + __wt_btcur_open(&ds_cbt); + ret = __verify_history_store_id(session, &ds_cbt, btree_id); + WT_TRET(__wt_btcur_close(&ds_cbt, false)); + } + return (ret == WT_NOTFOUND ? 0 : ret); +} + +/* + * __wt_history_store_verify -- + * Verify the history store. There can't be an entry in the history store without having the + * latest value for the respective key in the data store. + */ +int +__wt_history_store_verify(WT_SESSION_IMPL *session) +{ + WT_CURSOR *ds_cursor, *hs_cursor; + WT_DECL_ITEM(buf); + WT_DECL_RET; + WT_ITEM key; + wt_timestamp_t hs_start_ts; + uint64_t hs_counter; + uint32_t btree_id; + char *uri_data; + bool stop; + + /* We should never reach here if working in context of the default session. */ + WT_ASSERT(session, S2C(session)->default_session != session); + + ds_cursor = hs_cursor = NULL; + WT_CLEAR(key); + btree_id = WT_BTREE_ID_INVALID; + uri_data = NULL; + + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_hs_cursor_open(session)); + hs_cursor = session->hs_cursor; + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true); + stop = ret == WT_NOTFOUND ? true : false; + ret = 0; + + /* + * We have the history store cursor positioned at the first record that we want to verify. The + * internal function is expecting a btree cursor, so open and initialize that. + */ + while (!stop) { + /* + * The cursor is positioned either from above or left over from the internal call on the + * first key of a new btree id. + */ + WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter)); + if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0) { + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + WT_ERR_PANIC(session, WT_PANIC, + "Unable to find btree id %" PRIu32 " in the metadata file for the associated key %s", + btree_id, __wt_buf_set_printable(session, key.data, key.size, buf)); + } + WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &ds_cursor)); + F_SET(ds_cursor, WT_CURSOR_RAW_OK); + ret = __verify_history_store_id(session, (WT_CURSOR_BTREE *)ds_cursor, btree_id); + if (ret == WT_NOTFOUND) + stop = true; + WT_TRET(ds_cursor->close(ds_cursor)); + WT_ERR_NOTFOUND_OK(ret, false); + } +err: + WT_TRET(__wt_hs_cursor_close(session)); + + __wt_scr_free(session, &buf); + WT_ASSERT(session, key.mem == NULL && key.memsize == 0); + __wt_free(session, uri_data); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 49586073665..baa8fbbdb50 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -29,12 +29,11 @@ #define WT_REC_CHECKPOINT 0x002u #define WT_REC_CLEAN_AFTER_REC 0x004u #define WT_REC_EVICT 0x008u -#define WT_REC_EVICTION_THREAD 0x010u -#define WT_REC_HS 0x020u -#define WT_REC_IN_MEMORY 0x040u -#define WT_REC_SCRUB 0x080u -#define WT_REC_VISIBILITY_ERR 0x100u -#define WT_REC_VISIBLE_ALL 0x200u +#define WT_REC_HS 0x010u +#define WT_REC_IN_MEMORY 0x020u +#define WT_REC_SCRUB 0x040u +#define WT_REC_VISIBILITY_ERR 0x080u +#define WT_REC_VISIBLE_ALL 0x100u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 8ec0953c378..11d05c1f327 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -264,18 +264,16 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x004000u /* Disable checkpoints */ #define WT_BTREE_NO_LOGGING 0x008000u /* Disable logging */ #define WT_BTREE_READONLY 0x010000u /* Handle is readonly */ -#define WT_BTREE_REBALANCE 0x020000u /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x040000u /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x080000u /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x100000u /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x200000u /* Handle is for verify */ +#define WT_BTREE_SALVAGE 0x020000u /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x040000u /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x080000u /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x100000u /* Handle is for verify */ uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ -#define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_REBALANCE | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | \ - WT_BTREE_VERIFY) +#define WT_BTREE_SPECIAL_FLAGS \ + (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index a297ea6eb4c..b549da1de64 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -82,28 +82,27 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 28 #define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 29 #define WT_CONFIG_ENTRY_WT_SESSION_query_timestamp 30 -#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 31 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 32 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 33 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 34 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 35 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 36 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 37 -#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 38 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 39 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 40 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 41 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 42 -#define WT_CONFIG_ENTRY_colgroup_meta 43 -#define WT_CONFIG_ENTRY_file_config 44 -#define WT_CONFIG_ENTRY_file_meta 45 -#define WT_CONFIG_ENTRY_index_meta 46 -#define WT_CONFIG_ENTRY_lsm_meta 47 -#define WT_CONFIG_ENTRY_table_meta 48 -#define WT_CONFIG_ENTRY_wiredtiger_open 49 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 50 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 51 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 52 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 31 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 32 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 33 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 34 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 35 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 36 +#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 37 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 38 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 39 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 40 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 41 +#define WT_CONFIG_ENTRY_colgroup_meta 42 +#define WT_CONFIG_ENTRY_file_config 43 +#define WT_CONFIG_ENTRY_file_meta 44 +#define WT_CONFIG_ENTRY_index_meta 45 +#define WT_CONFIG_ENTRY_lsm_meta 46 +#define WT_CONFIG_ENTRY_table_meta 47 +#define WT_CONFIG_ENTRY_wiredtiger_open 48 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 49 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 50 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 51 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index a4befa30322..54220d07268 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -481,21 +481,20 @@ struct __wt_connection_impl { #define WT_VERB_MUTEX 0x0000100000u #define WT_VERB_OVERFLOW 0x0000200000u #define WT_VERB_READ 0x0000400000u -#define WT_VERB_REBALANCE 0x0000800000u -#define WT_VERB_RECONCILE 0x0001000000u -#define WT_VERB_RECOVERY 0x0002000000u -#define WT_VERB_RECOVERY_PROGRESS 0x0004000000u -#define WT_VERB_RTS 0x0008000000u -#define WT_VERB_SALVAGE 0x0010000000u -#define WT_VERB_SHARED_CACHE 0x0020000000u -#define WT_VERB_SPLIT 0x0040000000u -#define WT_VERB_TEMPORARY 0x0080000000u -#define WT_VERB_THREAD_GROUP 0x0100000000u -#define WT_VERB_TIMESTAMP 0x0200000000u -#define WT_VERB_TRANSACTION 0x0400000000u -#define WT_VERB_VERIFY 0x0800000000u -#define WT_VERB_VERSION 0x1000000000u -#define WT_VERB_WRITE 0x2000000000u +#define WT_VERB_RECONCILE 0x0000800000u +#define WT_VERB_RECOVERY 0x0001000000u +#define WT_VERB_RECOVERY_PROGRESS 0x0002000000u +#define WT_VERB_RTS 0x0004000000u +#define WT_VERB_SALVAGE 0x0008000000u +#define WT_VERB_SHARED_CACHE 0x0010000000u +#define WT_VERB_SPLIT 0x0020000000u +#define WT_VERB_TEMPORARY 0x0040000000u +#define WT_VERB_THREAD_GROUP 0x0080000000u +#define WT_VERB_TIMESTAMP 0x0100000000u +#define WT_VERB_TRANSACTION 0x0200000000u +#define WT_VERB_VERIFY 0x0400000000u +#define WT_VERB_VERSION 0x0800000000u +#define WT_VERB_WRITE 0x1000000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint64_t verbose; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 9e61d1ad53d..4abca6c4a0c 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -237,8 +237,6 @@ extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -734,12 +732,8 @@ extern int __wt_history_store_verify(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_history_store_verify_one(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hs_cleanup_las(WT_SESSION_IMPL *session) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hs_create(WT_SESSION_IMPL *session, const char **cfg) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor) @@ -764,6 +758,10 @@ extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MU WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_open(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_row_search(WT_CURSOR_BTREE *hs_cbt, WT_ITEM *srch_key, bool insert) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, @@ -1031,6 +1029,8 @@ extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char * const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_get_with_config(WT_SESSION_IMPL *session, bool update, + WT_CKPT **ckptbasep, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf) @@ -1669,7 +1669,7 @@ extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generat extern void __wt_gen_init(WT_SESSION_IMPL *session); extern void __wt_gen_next_drain(WT_SESSION_IMPL *session, int which); extern void __wt_hazard_close(WT_SESSION_IMPL *session); -extern void __wt_hs_destroy(WT_SESSION_IMPL *session); +extern void __wt_hs_close(WT_SESSION_IMPL *session); extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor); extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index 26dbdc51d57..63921e21194 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -50,8 +50,11 @@ union __wt_lsn { #define WT_LOG_ALIGN 128 /* - * Atomically set the two components of the LSN. + * Atomically set the LSN. There are two forms. We need WT_ASSIGN_LSN because some compilers (at + * least clang address sanitizer) does not do atomic 64-bit structure assignment so we need to + * explicitly assign the 64-bit field. And WT_SET_LSN atomically sets the LSN given a file/offset. */ +#define WT_ASSIGN_LSN(dstl, srcl) (dstl)->file_offset = (srcl)->file_offset #define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o)) #define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 2b80c498fa8..c812a9de71c 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -359,6 +359,7 @@ struct __wt_connection_stats { int64_t cache_eviction_walks_gave_up_no_targets; int64_t cache_eviction_walks_gave_up_ratio; int64_t cache_eviction_walks_ended; + int64_t cache_eviction_walk_restart; int64_t cache_eviction_walk_from_root; int64_t cache_eviction_walk_saved_pos; int64_t cache_eviction_active_workers; @@ -670,8 +671,6 @@ struct __wt_connection_stats { int64_t session_table_drop_success; int64_t session_table_import_fail; int64_t session_table_import_success; - int64_t session_table_rebalance_fail; - int64_t session_table_rebalance_success; int64_t session_table_rename_fail; int64_t session_table_rename_success; int64_t session_table_salvage_fail; @@ -835,6 +834,7 @@ struct __wt_dsrc_stats { int64_t cache_eviction_walks_gave_up_no_targets; int64_t cache_eviction_walks_gave_up_ratio; int64_t cache_eviction_walks_ended; + int64_t cache_eviction_walk_restart; int64_t cache_eviction_walk_from_root; int64_t cache_eviction_walk_saved_pos; int64_t cache_eviction_hazard; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 087b2a9f0c6..d1603e0014a 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1367,21 +1367,6 @@ struct __wt_session { int __F(log_printf)(WT_SESSION *session, const char *format, ...); /*! - * Rebalance a table or file, see @ref rebalance. - * - * @exclusive - * - * @snippet ex_all.c Rebalance a table - * - * @param session the session handle - * @param uri the current URI of the object, such as \c "table:mytable" - * @configempty{WT_SESSION.rebalance, see dist/api_data.py} - * @ebusy_errors - */ - int __F(rebalance)( - WT_SESSION *session, const char *uri, const char *config); - - /*! * Rename an object. * * @not_transactional @@ -2151,10 +2136,10 @@ struct __wt_connection { * "checkpoint_cleanup"\, \c "checkpoint_progress"\, \c "compact"\, \c "compact_progress"\, * \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c * "handleops"\, \c "log"\, \c "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c - * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, - * \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c "rts"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\, + * \c "recovery"\, \c "recovery_progress"\, \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c + * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c + * "verify"\, \c "version"\, \c "write"; default empty.} * @configend * @errors */ @@ -2867,10 +2852,9 @@ struct __wt_connection { * "checkpoint_progress"\, \c "compact"\, \c "compact_progress"\, \c "error_returns"\, \c "evict"\, * \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c * "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c - * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "recovery_progress"\, \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, - * \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; - * default empty.} + * "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\, + * \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, + * \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{verify_metadata, open connection and verify any WiredTiger metadata. This API allows * verification and detection of corruption in WiredTiger metadata., a boolean flag; default \c * false.} @@ -4831,994 +4815,992 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1062 /*! cache: eviction walks reached end of tree */ #define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1063 +/*! cache: eviction walks restarted */ +#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1064 /*! cache: eviction walks started from root of tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1064 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1065 /*! cache: eviction walks started from saved location in tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1065 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1066 /*! cache: eviction worker thread active */ -#define WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS 1066 +#define WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS 1067 /*! cache: eviction worker thread created */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED 1067 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED 1068 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1068 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1069 /*! cache: eviction worker thread removed */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1069 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1070 /*! cache: eviction worker thread stable number */ -#define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1070 +#define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1071 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1071 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1072 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1072 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1073 /*! cache: force re-tuning of eviction workers once in a while */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1073 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1074 /*! * cache: forced eviction - history store pages failed to evict while * session has history store cursor open */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS_FAIL 1074 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS_FAIL 1075 /*! * cache: forced eviction - history store pages selected while session * has history store cursor open */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS 1075 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS 1076 /*! * cache: forced eviction - history store pages successfully evicted * while session has history store cursor open */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS_SUCCESS 1076 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_HS_SUCCESS 1077 /*! cache: forced eviction - pages evicted that were clean count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN 1077 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN 1078 /*! cache: forced eviction - pages evicted that were clean time (usecs) */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN_TIME 1078 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN_TIME 1079 /*! cache: forced eviction - pages evicted that were dirty count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY 1079 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY 1080 /*! cache: forced eviction - pages evicted that were dirty time (usecs) */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY_TIME 1080 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY_TIME 1081 /*! * cache: forced eviction - pages selected because of too many deleted * items count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1081 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1082 /*! cache: forced eviction - pages selected count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1082 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1083 /*! cache: forced eviction - pages selected unable to be evicted count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1083 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1084 /*! cache: forced eviction - pages selected unable to be evicted time */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1084 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1085 /*! * cache: forced eviction - session returned rollback error while force * evicting due to being oldest */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_ROLLBACK 1085 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_ROLLBACK 1086 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1086 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1087 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1087 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1088 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1088 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1089 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1089 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1090 /*! cache: history store score */ -#define WT_STAT_CONN_CACHE_HS_SCORE 1090 +#define WT_STAT_CONN_CACHE_HS_SCORE 1091 /*! cache: history store table insert calls */ -#define WT_STAT_CONN_CACHE_HS_INSERT 1091 +#define WT_STAT_CONN_CACHE_HS_INSERT 1092 /*! cache: history store table insert calls that returned restart */ -#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1092 +#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1093 /*! cache: history store table max on-disk size */ -#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1093 +#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1094 /*! cache: history store table on-disk size */ -#define WT_STAT_CONN_CACHE_HS_ONDISK 1094 +#define WT_STAT_CONN_CACHE_HS_ONDISK 1095 /*! * cache: history store table out-of-order resolved updates that lose * their durable timestamp */ -#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1095 +#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1096 /*! * cache: history store table out-of-order updates that were fixed up by * moving existing records */ -#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1096 +#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1097 /*! * cache: history store table out-of-order updates that were fixed up * during insertion */ -#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1097 +#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1098 /*! cache: history store table reads */ -#define WT_STAT_CONN_CACHE_HS_READ 1098 +#define WT_STAT_CONN_CACHE_HS_READ 1099 /*! cache: history store table reads missed */ -#define WT_STAT_CONN_CACHE_HS_READ_MISS 1099 +#define WT_STAT_CONN_CACHE_HS_READ_MISS 1100 /*! cache: history store table reads requiring squashed modifies */ -#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1100 +#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1101 /*! * cache: history store table truncation by rollback to stable to remove * an unstable update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1101 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1102 /*! * cache: history store table truncation by rollback to stable to remove * an update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1102 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1103 /*! * cache: history store table truncation due to mixed timestamps that * returned restart */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS_RESTART 1103 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS_RESTART 1104 /*! cache: history store table truncation to remove an update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1104 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1105 /*! * cache: history store table truncation to remove range of updates due * to key being removed from the data page during reconciliation */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1105 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1106 /*! * cache: history store table truncation to remove range of updates due * to mixed timestamps */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS 1106 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS 1107 /*! cache: history store table writes requiring squashed modifies */ -#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1107 +#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1108 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1108 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1109 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1109 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1110 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1110 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1111 /*! cache: internal pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1111 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1112 /*! cache: internal pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1112 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1113 /*! cache: internal pages seen by eviction walk that are already queued */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1113 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1114 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1114 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1115 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1115 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1116 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1116 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1117 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1117 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1118 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1118 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1119 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1119 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1120 /*! cache: operations timed out waiting for space in cache */ -#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1120 +#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1121 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1121 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1122 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1122 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1123 /*! cache: page written requiring history store records */ -#define WT_STAT_CONN_CACHE_WRITE_HS 1123 +#define WT_STAT_CONN_CACHE_WRITE_HS 1124 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1124 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1125 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1125 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1126 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1126 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1127 /*! cache: pages queued for eviction post lru sorting */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1127 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1128 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1128 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1129 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1129 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1130 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1130 +#define WT_STAT_CONN_CACHE_READ 1131 /*! cache: pages read into cache after truncate */ -#define WT_STAT_CONN_CACHE_READ_DELETED 1131 +#define WT_STAT_CONN_CACHE_READ_DELETED 1132 /*! cache: pages read into cache after truncate in prepare state */ -#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1132 +#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1133 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1133 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1134 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1134 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1135 /*! cache: pages seen by eviction walk that are already queued */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1135 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1136 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1136 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1137 /*! * cache: pages selected for eviction unable to be evicted as the parent * page has overflow items */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1137 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1138 /*! * cache: pages selected for eviction unable to be evicted because of * active children on an internal page */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1138 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1139 /*! * cache: pages selected for eviction unable to be evicted because of * failure in reconciliation */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1139 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1140 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1140 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1141 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1141 +#define WT_STAT_CONN_CACHE_WRITE 1142 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1142 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1143 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1143 +#define WT_STAT_CONN_CACHE_OVERHEAD 1144 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1144 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1145 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1145 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1146 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1146 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1147 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1147 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1148 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1148 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1149 /*! capacity: background fsync file handles considered */ -#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1149 +#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1150 /*! capacity: background fsync file handles synced */ -#define WT_STAT_CONN_FSYNC_ALL_FH 1150 +#define WT_STAT_CONN_FSYNC_ALL_FH 1151 /*! capacity: background fsync time (msecs) */ -#define WT_STAT_CONN_FSYNC_ALL_TIME 1151 +#define WT_STAT_CONN_FSYNC_ALL_TIME 1152 /*! capacity: bytes read */ -#define WT_STAT_CONN_CAPACITY_BYTES_READ 1152 +#define WT_STAT_CONN_CAPACITY_BYTES_READ 1153 /*! capacity: bytes written for checkpoint */ -#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1153 +#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1154 /*! capacity: bytes written for eviction */ -#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1154 +#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1155 /*! capacity: bytes written for log */ -#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1155 +#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1156 /*! capacity: bytes written total */ -#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1156 +#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1157 /*! capacity: threshold to call fsync */ -#define WT_STAT_CONN_CAPACITY_THRESHOLD 1157 +#define WT_STAT_CONN_CAPACITY_THRESHOLD 1158 /*! capacity: time waiting due to total capacity (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1158 +#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1159 /*! capacity: time waiting during checkpoint (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1159 +#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1160 /*! capacity: time waiting during eviction (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1160 +#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1161 /*! capacity: time waiting during logging (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_LOG 1161 +#define WT_STAT_CONN_CAPACITY_TIME_LOG 1162 /*! capacity: time waiting during read (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_READ 1162 +#define WT_STAT_CONN_CAPACITY_TIME_READ 1163 /*! checkpoint-cleanup: pages added for eviction */ -#define WT_STAT_CONN_CC_PAGES_EVICT 1163 +#define WT_STAT_CONN_CC_PAGES_EVICT 1164 /*! checkpoint-cleanup: pages removed */ -#define WT_STAT_CONN_CC_PAGES_REMOVED 1164 +#define WT_STAT_CONN_CC_PAGES_REMOVED 1165 /*! checkpoint-cleanup: pages skipped during tree walk */ -#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1165 +#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1166 /*! checkpoint-cleanup: pages visited */ -#define WT_STAT_CONN_CC_PAGES_VISITED 1166 +#define WT_STAT_CONN_CC_PAGES_VISITED 1167 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1167 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1168 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1168 +#define WT_STAT_CONN_COND_AUTO_WAIT 1169 /*! * connection: auto adjusting condition wait raced to update timeout and * skipped updating */ -#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1169 +#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1170 /*! connection: detected system time went backwards */ -#define WT_STAT_CONN_TIME_TRAVEL 1170 +#define WT_STAT_CONN_TIME_TRAVEL 1171 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1171 +#define WT_STAT_CONN_FILE_OPEN 1172 /*! connection: hash bucket array size for data handles */ -#define WT_STAT_CONN_BUCKETS_DH 1172 +#define WT_STAT_CONN_BUCKETS_DH 1173 /*! connection: hash bucket array size general */ -#define WT_STAT_CONN_BUCKETS 1173 +#define WT_STAT_CONN_BUCKETS 1174 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1174 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1175 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1175 +#define WT_STAT_CONN_MEMORY_FREE 1176 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1176 +#define WT_STAT_CONN_MEMORY_GROW 1177 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1177 +#define WT_STAT_CONN_COND_WAIT 1178 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1178 +#define WT_STAT_CONN_RWLOCK_READ 1179 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1179 +#define WT_STAT_CONN_RWLOCK_WRITE 1180 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1180 +#define WT_STAT_CONN_FSYNC_IO 1181 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1181 +#define WT_STAT_CONN_READ_IO 1182 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1182 +#define WT_STAT_CONN_WRITE_IO 1183 /*! cursor: Total number of entries skipped by cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1183 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1184 /*! cursor: Total number of entries skipped by cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1184 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1185 /*! * cursor: Total number of entries skipped to position the history store * cursor */ -#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1185 +#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1186 /*! cursor: cached cursor count */ -#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1186 +#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1187 /*! cursor: cursor bulk loaded cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT_BULK 1187 +#define WT_STAT_CONN_CURSOR_INSERT_BULK 1188 /*! cursor: cursor close calls that result in cache */ -#define WT_STAT_CONN_CURSOR_CACHE 1188 +#define WT_STAT_CONN_CURSOR_CACHE 1189 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1189 +#define WT_STAT_CONN_CURSOR_CREATE 1190 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1190 +#define WT_STAT_CONN_CURSOR_INSERT 1191 /*! cursor: cursor insert key and value bytes */ -#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1191 +#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1192 /*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1192 +#define WT_STAT_CONN_CURSOR_MODIFY 1193 /*! cursor: cursor modify key and value bytes affected */ -#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1193 +#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1194 /*! cursor: cursor modify value bytes modified */ -#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1194 +#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1195 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1195 +#define WT_STAT_CONN_CURSOR_NEXT 1196 /*! * cursor: cursor next calls that skip due to a globally visible history * store tombstone */ -#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1196 +#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1197 /*! * cursor: cursor next calls that skip due to a globally visible history * store tombstone in rollback to stable */ -#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE_RTS 1197 +#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE_RTS 1198 /*! * cursor: cursor next calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1198 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1199 /*! cursor: cursor next calls that skip less than 100 entries */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1199 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1200 /*! cursor: cursor operation restarted */ -#define WT_STAT_CONN_CURSOR_RESTART 1200 +#define WT_STAT_CONN_CURSOR_RESTART 1201 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1201 +#define WT_STAT_CONN_CURSOR_PREV 1202 /*! * cursor: cursor prev calls that skip due to a globally visible history * store tombstone */ -#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1202 +#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1203 /*! * cursor: cursor prev calls that skip due to a globally visible history * store tombstone in rollback to stable */ -#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE_RTS 1203 +#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE_RTS 1204 /*! * cursor: cursor prev calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1204 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1205 /*! cursor: cursor prev calls that skip less than 100 entries */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1205 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1206 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1206 +#define WT_STAT_CONN_CURSOR_REMOVE 1207 /*! cursor: cursor remove key bytes removed */ -#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1207 +#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1208 /*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1208 +#define WT_STAT_CONN_CURSOR_RESERVE 1209 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1209 +#define WT_STAT_CONN_CURSOR_RESET 1210 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1210 +#define WT_STAT_CONN_CURSOR_SEARCH 1211 /*! cursor: cursor search history store calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_HS 1211 +#define WT_STAT_CONN_CURSOR_SEARCH_HS 1212 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1212 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1213 /*! cursor: cursor sweep buckets */ -#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1213 +#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1214 /*! cursor: cursor sweep cursors closed */ -#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1214 +#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1215 /*! cursor: cursor sweep cursors examined */ -#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1215 +#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1216 /*! cursor: cursor sweeps */ -#define WT_STAT_CONN_CURSOR_SWEEP 1216 +#define WT_STAT_CONN_CURSOR_SWEEP 1217 /*! cursor: cursor truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1217 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1218 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1218 +#define WT_STAT_CONN_CURSOR_UPDATE 1219 /*! cursor: cursor update key and value bytes */ -#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1219 +#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1220 /*! cursor: cursor update value size change */ -#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1220 +#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1221 /*! cursor: cursors reused from cache */ -#define WT_STAT_CONN_CURSOR_REOPEN 1221 +#define WT_STAT_CONN_CURSOR_REOPEN 1222 /*! cursor: open cursor count */ -#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1222 +#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1223 /*! data-handle: connection data handle size */ -#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1223 +#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1224 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1224 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1225 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1225 +#define WT_STAT_CONN_DH_SWEEP_REF 1226 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1226 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1227 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1227 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1228 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1228 +#define WT_STAT_CONN_DH_SWEEP_TOD 1229 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1229 +#define WT_STAT_CONN_DH_SWEEPS 1230 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1230 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1231 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1231 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1232 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1232 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1233 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1233 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1234 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1234 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1235 /*! lock: dhandle lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1235 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1236 /*! lock: dhandle lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1236 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1237 /*! lock: dhandle read lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1237 +#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1238 /*! lock: dhandle write lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1238 +#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1239 /*! * lock: durable timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1239 +#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1240 /*! * lock: durable timestamp queue lock internal thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1240 +#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1241 /*! lock: durable timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1241 +#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1242 /*! lock: durable timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1242 +#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1243 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1243 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1244 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1244 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1245 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1245 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1246 /*! * lock: read timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1246 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1247 /*! lock: read timestamp queue lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1247 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1248 /*! lock: read timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1248 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1249 /*! lock: read timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1249 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1250 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1250 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1251 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1251 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1252 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1252 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1253 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1253 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1254 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1254 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1255 /*! lock: table read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1255 +#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1256 /*! lock: table write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1256 +#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1257 /*! lock: txn global lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1257 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1258 /*! lock: txn global lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1258 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1259 /*! lock: txn global read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1259 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1260 /*! lock: txn global write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1260 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1261 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1261 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1262 /*! log: force archive time sleeping (usecs) */ -#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1262 +#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1263 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1263 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1264 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1264 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1265 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1265 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1266 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1266 +#define WT_STAT_CONN_LOG_FLUSH 1267 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1267 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1268 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1268 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1269 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1269 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1270 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1270 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1271 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1271 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1272 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1272 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1273 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1273 +#define WT_STAT_CONN_LOG_SCANS 1274 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1274 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1275 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1275 +#define WT_STAT_CONN_LOG_WRITE_LSN 1276 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1276 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1277 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1277 +#define WT_STAT_CONN_LOG_SYNC 1278 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1278 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1279 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1279 +#define WT_STAT_CONN_LOG_SYNC_DIR 1280 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1280 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1281 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1281 +#define WT_STAT_CONN_LOG_WRITES 1282 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1282 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1283 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1283 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1284 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1284 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1285 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1285 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1286 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1286 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1287 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1287 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1288 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1288 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1289 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1289 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1290 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1290 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1291 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1291 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1292 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1292 +#define WT_STAT_CONN_LOG_SLOT_RACES 1293 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1293 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1294 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1294 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1295 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1295 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1296 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1296 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1297 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1297 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1298 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1298 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1299 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1299 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1300 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1300 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1301 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1301 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1302 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1302 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1303 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1303 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1304 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1304 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1305 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1305 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1306 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1306 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1307 /*! perf: file system read latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1307 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1308 /*! perf: file system read latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1308 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1309 /*! perf: file system read latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1309 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1310 /*! perf: file system read latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1310 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1311 /*! perf: file system read latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1311 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1312 /*! perf: file system read latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1312 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1313 /*! perf: file system write latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1313 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1314 /*! perf: file system write latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1314 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1315 /*! perf: file system write latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1315 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1316 /*! perf: file system write latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1316 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1317 /*! perf: file system write latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1317 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1318 /*! perf: file system write latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1318 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1319 /*! perf: operation read latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1319 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1320 /*! perf: operation read latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1320 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1321 /*! perf: operation read latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1321 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1322 /*! perf: operation read latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1322 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1323 /*! perf: operation read latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1323 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1324 /*! perf: operation write latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1324 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1325 /*! perf: operation write latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1325 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1326 /*! perf: operation write latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1326 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1327 /*! perf: operation write latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1327 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1328 /*! perf: operation write latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1328 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1329 /*! reconciliation: approximate byte size of timestamps in pages written */ -#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1329 +#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1330 /*! * reconciliation: approximate byte size of transaction IDs in pages * written */ -#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1330 +#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1331 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1331 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1332 /*! reconciliation: maximum seconds spent in a reconciliation call */ -#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1332 +#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1333 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1333 +#define WT_STAT_CONN_REC_PAGES 1334 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1334 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1335 /*! * reconciliation: page reconciliation calls that resulted in values with * prepared transaction metadata */ -#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1335 +#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1336 /*! * reconciliation: page reconciliation calls that resulted in values with * timestamps */ -#define WT_STAT_CONN_REC_PAGES_WITH_TS 1336 +#define WT_STAT_CONN_REC_PAGES_WITH_TS 1337 /*! * reconciliation: page reconciliation calls that resulted in values with * transaction ids */ -#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1337 +#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1338 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1338 +#define WT_STAT_CONN_REC_PAGE_DELETE 1339 /*! * reconciliation: pages written including an aggregated newest start * durable timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1339 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1340 /*! * reconciliation: pages written including an aggregated newest stop * durable timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1340 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1341 /*! * reconciliation: pages written including an aggregated newest stop * timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1341 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1342 /*! * reconciliation: pages written including an aggregated newest stop * transaction ID */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1342 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1343 /*! * reconciliation: pages written including an aggregated oldest start * timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1343 +#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1344 /*! * reconciliation: pages written including an aggregated oldest start * transaction ID */ -#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TXN 1344 +#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TXN 1345 /*! reconciliation: pages written including an aggregated prepare */ -#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1345 +#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1346 /*! reconciliation: pages written including at least one prepare state */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1346 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1347 /*! * reconciliation: pages written including at least one start durable * timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1347 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1348 /*! reconciliation: pages written including at least one start timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1348 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1349 /*! * reconciliation: pages written including at least one start transaction * ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1349 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1350 /*! * reconciliation: pages written including at least one stop durable * timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1350 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1351 /*! reconciliation: pages written including at least one stop timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1351 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1352 /*! * reconciliation: pages written including at least one stop transaction * ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1352 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1353 /*! reconciliation: records written including a prepare state */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1353 +#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1354 /*! reconciliation: records written including a start durable timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1354 +#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1355 /*! reconciliation: records written including a start timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1355 +#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1356 /*! reconciliation: records written including a start transaction ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1356 +#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1357 /*! reconciliation: records written including a stop durable timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1357 +#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1358 /*! reconciliation: records written including a stop timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1358 +#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1359 /*! reconciliation: records written including a stop transaction ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1359 +#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1360 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1360 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1361 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1361 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1362 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1362 +#define WT_STAT_CONN_SESSION_OPEN 1363 /*! session: session query timestamp calls */ -#define WT_STAT_CONN_SESSION_QUERY_TS 1363 +#define WT_STAT_CONN_SESSION_QUERY_TS 1364 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1364 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1365 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1365 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1366 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1366 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1367 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1367 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1368 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1368 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1369 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1369 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1370 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1370 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1371 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1371 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1372 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1372 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1373 /*! session: table import failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_IMPORT_FAIL 1373 +#define WT_STAT_CONN_SESSION_TABLE_IMPORT_FAIL 1374 /*! session: table import successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_IMPORT_SUCCESS 1374 -/*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1375 -/*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1376 +#define WT_STAT_CONN_SESSION_TABLE_IMPORT_SUCCESS 1375 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1377 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1376 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1378 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1377 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1379 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1378 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1380 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1379 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1381 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1380 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1382 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1381 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1383 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1382 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1384 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1383 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1385 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1384 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1386 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1385 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1387 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1386 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1388 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1387 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1389 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1388 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1390 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1389 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1391 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1390 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1392 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1391 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1393 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1392 /*! thread-yield: log server sync yielded for log write */ -#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1394 +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1393 /*! thread-yield: page access yielded due to prepare state change */ -#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1395 +#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1394 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1396 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1395 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1397 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1396 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1398 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1397 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1399 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1398 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1400 +#define WT_STAT_CONN_PAGE_SLEEP 1399 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1401 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1400 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1402 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1401 /*! transaction: Number of prepared updates */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1403 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1402 /*! transaction: durable timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_DURABLE_QUEUE_WALKED 1404 +#define WT_STAT_CONN_TXN_DURABLE_QUEUE_WALKED 1403 /*! transaction: durable timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_DURABLE_QUEUE_EMPTY 1405 +#define WT_STAT_CONN_TXN_DURABLE_QUEUE_EMPTY 1404 /*! transaction: durable timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_DURABLE_QUEUE_HEAD 1406 +#define WT_STAT_CONN_TXN_DURABLE_QUEUE_HEAD 1405 /*! transaction: durable timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_DURABLE_QUEUE_INSERTS 1407 +#define WT_STAT_CONN_TXN_DURABLE_QUEUE_INSERTS 1406 /*! transaction: durable timestamp queue length */ -#define WT_STAT_CONN_TXN_DURABLE_QUEUE_LEN 1408 +#define WT_STAT_CONN_TXN_DURABLE_QUEUE_LEN 1407 /*! transaction: prepared transactions */ -#define WT_STAT_CONN_TXN_PREPARE 1409 +#define WT_STAT_CONN_TXN_PREPARE 1408 /*! transaction: prepared transactions committed */ -#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1410 +#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1409 /*! transaction: prepared transactions currently active */ -#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1411 +#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1410 /*! transaction: prepared transactions rolled back */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1412 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1411 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1413 +#define WT_STAT_CONN_TXN_QUERY_TS 1412 /*! transaction: race to read prepared update retry */ -#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1414 +#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1413 /*! transaction: read timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1415 +#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1414 /*! transaction: read timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1416 +#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1415 /*! transaction: read timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1417 +#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1416 /*! transaction: read timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1418 +#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1417 /*! transaction: read timestamp queue length */ -#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1419 +#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1418 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_RTS 1420 +#define WT_STAT_CONN_TXN_RTS 1419 /*! * transaction: rollback to stable hs records with stop timestamps older * than newer records */ -#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1421 +#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1420 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1422 +#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1421 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1423 +#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1422 /*! transaction: rollback to stable pages visited */ -#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1424 +#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1423 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1425 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1424 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1426 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1425 /*! transaction: rollback to stable tree walk skipping pages */ -#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1427 +#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1426 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1428 +#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1427 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1429 +#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1428 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1430 +#define WT_STAT_CONN_TXN_SET_TS 1429 /*! transaction: set timestamp durable calls */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1431 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1430 /*! transaction: set timestamp durable updates */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1432 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1431 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1433 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1432 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1434 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1433 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1435 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1434 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1436 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1435 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1437 +#define WT_STAT_CONN_TXN_BEGIN 1436 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1438 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1437 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1439 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1438 /*! * transaction: transaction checkpoint history store file duration * (usecs) */ -#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1440 +#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1439 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1441 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1440 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1442 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1441 /*! * transaction: transaction checkpoint most recent duration for gathering * all handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1443 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1442 /*! * transaction: transaction checkpoint most recent duration for gathering * applied handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1444 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1443 /*! * transaction: transaction checkpoint most recent duration for gathering * skipped handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1445 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1444 /*! transaction: transaction checkpoint most recent handles applied */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1446 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1445 /*! transaction: transaction checkpoint most recent handles skipped */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1447 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1446 /*! transaction: transaction checkpoint most recent handles walked */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1448 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1447 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1449 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1448 /*! transaction: transaction checkpoint prepare currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1450 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1449 /*! transaction: transaction checkpoint prepare max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1451 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1450 /*! transaction: transaction checkpoint prepare min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1452 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1451 /*! transaction: transaction checkpoint prepare most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1453 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1452 /*! transaction: transaction checkpoint prepare total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1454 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1453 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1455 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1454 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1456 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1455 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1457 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1456 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1458 +#define WT_STAT_CONN_TXN_CHECKPOINT 1457 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1459 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1458 /*! transaction: transaction failures due to history store */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1460 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1459 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1461 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1460 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1462 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1461 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1463 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1462 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1464 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1463 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1465 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1464 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1466 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1465 /*! * transaction: transaction range of timestamps pinned by the oldest * active read timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1467 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1466 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1468 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1467 /*! transaction: transaction read timestamp of the oldest active reader */ -#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1469 +#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1468 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1470 +#define WT_STAT_CONN_TXN_SYNC 1469 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1471 +#define WT_STAT_CONN_TXN_COMMIT 1470 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1472 +#define WT_STAT_CONN_TXN_ROLLBACK 1471 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1473 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1472 /*! * @} @@ -5983,372 +5965,374 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2057 /*! cache: eviction walks reached end of tree */ #define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2058 +/*! cache: eviction walks restarted */ +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_RESTART 2059 /*! cache: eviction walks started from root of tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2059 +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2060 /*! cache: eviction walks started from saved location in tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2060 +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2061 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2061 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2062 /*! cache: history store table reads */ -#define WT_STAT_DSRC_CACHE_HS_READ 2062 +#define WT_STAT_DSRC_CACHE_HS_READ 2063 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2063 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2064 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2064 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2065 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2065 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2066 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2066 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2067 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2067 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2068 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2068 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2069 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2069 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2070 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2070 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2071 /*! cache: page written requiring history store records */ -#define WT_STAT_DSRC_CACHE_WRITE_HS 2071 +#define WT_STAT_DSRC_CACHE_WRITE_HS 2072 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2072 +#define WT_STAT_DSRC_CACHE_READ 2073 /*! cache: pages read into cache after truncate */ -#define WT_STAT_DSRC_CACHE_READ_DELETED 2073 +#define WT_STAT_DSRC_CACHE_READ_DELETED 2074 /*! cache: pages read into cache after truncate in prepare state */ -#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2074 +#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2075 /*! cache: pages requested from the cache */ -#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2075 +#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2076 /*! cache: pages seen by eviction walk */ -#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2076 +#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2077 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2077 +#define WT_STAT_DSRC_CACHE_WRITE 2078 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2078 +#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2079 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2079 +#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2080 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2080 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2081 /*! * cache_walk: Average difference between current eviction generation * when the page was last considered, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2081 +#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2082 /*! * cache_walk: Average on-disk page image size seen, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2082 +#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2083 /*! * cache_walk: Average time in cache for pages that have been visited by * the eviction server, only reported if cache_walk or all statistics are * enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2083 +#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2084 /*! * cache_walk: Average time in cache for pages that have not been visited * by the eviction server, only reported if cache_walk or all statistics * are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2084 +#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2085 /*! * cache_walk: Clean pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2085 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2086 /*! * cache_walk: Current eviction generation, only reported if cache_walk * or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2086 +#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2087 /*! * cache_walk: Dirty pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2087 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2088 /*! * cache_walk: Entries in the root page, only reported if cache_walk or * all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2088 +#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2089 /*! * cache_walk: Internal pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2089 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2090 /*! * cache_walk: Leaf pages currently in cache, only reported if cache_walk * or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2090 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2091 /*! * cache_walk: Maximum difference between current eviction generation * when the page was last considered, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2091 +#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2092 /*! * cache_walk: Maximum page size seen, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2092 +#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2093 /*! * cache_walk: Minimum on-disk page image size seen, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2093 +#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2094 /*! * cache_walk: Number of pages never visited by eviction server, only * reported if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2094 +#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2095 /*! * cache_walk: On-disk page image sizes smaller than a single allocation * unit, only reported if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2095 +#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2096 /*! * cache_walk: Pages created in memory and never written, only reported * if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2096 +#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2097 /*! * cache_walk: Pages currently queued for eviction, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2097 +#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2098 /*! * cache_walk: Pages that could not be queued for eviction, only reported * if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2098 +#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2099 /*! * cache_walk: Refs skipped during cache traversal, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2099 +#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2100 /*! * cache_walk: Size of the root page, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2100 +#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2101 /*! * cache_walk: Total number of pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES 2101 +#define WT_STAT_DSRC_CACHE_STATE_PAGES 2102 /*! checkpoint-cleanup: pages added for eviction */ -#define WT_STAT_DSRC_CC_PAGES_EVICT 2102 +#define WT_STAT_DSRC_CC_PAGES_EVICT 2103 /*! checkpoint-cleanup: pages removed */ -#define WT_STAT_DSRC_CC_PAGES_REMOVED 2103 +#define WT_STAT_DSRC_CC_PAGES_REMOVED 2104 /*! checkpoint-cleanup: pages skipped during tree walk */ -#define WT_STAT_DSRC_CC_PAGES_WALK_SKIPPED 2104 +#define WT_STAT_DSRC_CC_PAGES_WALK_SKIPPED 2105 /*! checkpoint-cleanup: pages visited */ -#define WT_STAT_DSRC_CC_PAGES_VISITED 2105 +#define WT_STAT_DSRC_CC_PAGES_VISITED 2106 /*! * compression: compressed page maximum internal page size prior to * compression */ -#define WT_STAT_DSRC_COMPRESS_PRECOMP_INTL_MAX_PAGE_SIZE 2106 +#define WT_STAT_DSRC_COMPRESS_PRECOMP_INTL_MAX_PAGE_SIZE 2107 /*! * compression: compressed page maximum leaf page size prior to * compression */ -#define WT_STAT_DSRC_COMPRESS_PRECOMP_LEAF_MAX_PAGE_SIZE 2107 +#define WT_STAT_DSRC_COMPRESS_PRECOMP_LEAF_MAX_PAGE_SIZE 2108 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2108 +#define WT_STAT_DSRC_COMPRESS_READ 2109 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2109 +#define WT_STAT_DSRC_COMPRESS_WRITE 2110 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2110 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2111 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2111 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2112 /*! cursor: Total number of entries skipped by cursor next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_TOTAL 2112 +#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_TOTAL 2113 /*! cursor: Total number of entries skipped by cursor prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV_SKIP_TOTAL 2113 +#define WT_STAT_DSRC_CURSOR_PREV_SKIP_TOTAL 2114 /*! * cursor: Total number of entries skipped to position the history store * cursor */ -#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2114 +#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2115 /*! cursor: bulk loaded cursor insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2115 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2116 /*! cursor: cache cursors reuse count */ -#define WT_STAT_DSRC_CURSOR_REOPEN 2116 +#define WT_STAT_DSRC_CURSOR_REOPEN 2117 /*! cursor: close calls that result in cache */ -#define WT_STAT_DSRC_CURSOR_CACHE 2117 +#define WT_STAT_DSRC_CURSOR_CACHE 2118 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2118 +#define WT_STAT_DSRC_CURSOR_CREATE 2119 /*! * cursor: cursor next calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2119 +#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2120 /*! cursor: cursor next calls that skip less than 100 entries */ -#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2120 +#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2121 /*! * cursor: cursor prev calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2121 +#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2122 /*! cursor: cursor prev calls that skip less than 100 entries */ -#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2122 +#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2123 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2123 +#define WT_STAT_DSRC_CURSOR_INSERT 2124 /*! cursor: insert key and value bytes */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2124 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2125 /*! cursor: modify */ -#define WT_STAT_DSRC_CURSOR_MODIFY 2125 +#define WT_STAT_DSRC_CURSOR_MODIFY 2126 /*! cursor: modify key and value bytes affected */ -#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES 2126 +#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES 2127 /*! cursor: modify value bytes modified */ -#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES_TOUCH 2127 +#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES_TOUCH 2128 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2128 +#define WT_STAT_DSRC_CURSOR_NEXT 2129 /*! cursor: open cursor count */ -#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2129 +#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2130 /*! cursor: operation restarted */ -#define WT_STAT_DSRC_CURSOR_RESTART 2130 +#define WT_STAT_DSRC_CURSOR_RESTART 2131 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2131 +#define WT_STAT_DSRC_CURSOR_PREV 2132 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2132 +#define WT_STAT_DSRC_CURSOR_REMOVE 2133 /*! cursor: remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2133 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2134 /*! cursor: reserve calls */ -#define WT_STAT_DSRC_CURSOR_RESERVE 2134 +#define WT_STAT_DSRC_CURSOR_RESERVE 2135 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2135 +#define WT_STAT_DSRC_CURSOR_RESET 2136 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2136 +#define WT_STAT_DSRC_CURSOR_SEARCH 2137 /*! cursor: search history store calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_HS 2137 +#define WT_STAT_DSRC_CURSOR_SEARCH_HS 2138 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2138 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2139 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2139 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2140 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2140 +#define WT_STAT_DSRC_CURSOR_UPDATE 2141 /*! cursor: update key and value bytes */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2141 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2142 /*! cursor: update value size change */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES_CHANGED 2142 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES_CHANGED 2143 /*! reconciliation: approximate byte size of timestamps in pages written */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2143 +#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2144 /*! * reconciliation: approximate byte size of transaction IDs in pages * written */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2144 +#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2145 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2145 +#define WT_STAT_DSRC_REC_DICTIONARY 2146 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2146 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2147 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2147 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2148 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2148 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2149 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2149 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2150 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2150 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2151 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2151 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2152 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2152 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2153 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2153 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2154 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2154 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2155 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2155 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2156 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2156 +#define WT_STAT_DSRC_REC_PAGES 2157 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2157 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2158 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2158 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2159 /*! * reconciliation: pages written including an aggregated newest start * durable timestamp */ -#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2159 +#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2160 /*! * reconciliation: pages written including an aggregated newest stop * durable timestamp */ -#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2160 +#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2161 /*! * reconciliation: pages written including an aggregated newest stop * timestamp */ -#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2161 +#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2162 /*! * reconciliation: pages written including an aggregated newest stop * transaction ID */ -#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2162 +#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2163 /*! * reconciliation: pages written including an aggregated oldest start * timestamp */ -#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2163 +#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2164 /*! * reconciliation: pages written including an aggregated oldest start * transaction ID */ -#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TXN 2164 +#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TXN 2165 /*! reconciliation: pages written including an aggregated prepare */ -#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2165 +#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2166 /*! reconciliation: pages written including at least one prepare */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_PREPARED 2166 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_PREPARED 2167 /*! * reconciliation: pages written including at least one start durable * timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2167 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2168 /*! reconciliation: pages written including at least one start timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TS 2168 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TS 2169 /*! * reconciliation: pages written including at least one start transaction * ID */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2169 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2170 /*! * reconciliation: pages written including at least one stop durable * timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2170 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2171 /*! reconciliation: pages written including at least one stop timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2171 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2172 /*! * reconciliation: pages written including at least one stop transaction * ID */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2172 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2173 /*! reconciliation: records written including a prepare */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_PREPARED 2173 +#define WT_STAT_DSRC_REC_TIME_WINDOW_PREPARED 2174 /*! reconciliation: records written including a start durable timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2174 +#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2175 /*! reconciliation: records written including a start timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2175 +#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2176 /*! reconciliation: records written including a start transaction ID */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2176 +#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2177 /*! reconciliation: records written including a stop durable timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2177 +#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2178 /*! reconciliation: records written including a stop timestamp */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2178 +#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2179 /*! reconciliation: records written including a stop transaction ID */ -#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2179 +#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2180 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2180 +#define WT_STAT_DSRC_SESSION_COMPACT 2181 /*! transaction: race to read prepared update retry */ -#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2181 +#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2182 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2182 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2183 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 69b807eab5c..46dce80113c 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -232,7 +232,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn) conn = S2C(session); log = conn->log; - log->ckpt_lsn = *ckpt_lsn; + WT_ASSIGN_LSN(&log->ckpt_lsn, ckpt_lsn); if (conn->log_cond != NULL) __wt_cond_signal(session, conn->log_cond); /* @@ -262,9 +262,9 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_RET(__wt_log_force_write(session, 1, NULL)); __wt_log_wrlsn(session, NULL); if (start) - *lsn = log->write_start_lsn; + WT_ASSIGN_LSN(lsn, &log->write_start_lsn); else - *lsn = log->write_lsn; + WT_ASSIGN_LSN(lsn, &log->write_lsn); return (0); } @@ -285,14 +285,14 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) */ if (__wt_log_cmp(&session->bg_sync_lsn, lsn) > 0) return; - session->bg_sync_lsn = *lsn; + WT_ASSIGN_LSN(&session->bg_sync_lsn, lsn); /* * Advance the logging subsystem background sync LSN if needed. */ __wt_spin_lock(session, &log->log_sync_lock); if (__wt_log_cmp(lsn, &log->bg_sync_lsn) > 0) - log->bg_sync_lsn = *lsn; + WT_ASSIGN_LSN(&log->bg_sync_lsn, lsn); __wt_spin_unlock(session, &log->log_sync_lock); __wt_cond_signal(session, conn->log_file_cond); } @@ -334,7 +334,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); time_stop = __wt_clock(session); fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start); - log->sync_dir_lsn = *min_lsn; + WT_ASSIGN_LSN(&log->sync_dir_lsn, min_lsn); WT_STAT_CONN_INCR(session, log_sync_dir); WT_STAT_CONN_INCRV(session, log_sync_dir_duration, fsync_duration_usecs); } @@ -354,7 +354,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_ERR(__wt_fsync(session, log_fh, true)); time_stop = __wt_clock(session); fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start); - log->sync_lsn = *min_lsn; + WT_ASSIGN_LSN(&log->sync_lsn, min_lsn); WT_STAT_CONN_INCR(session, log_sync); WT_STAT_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs); __wt_cond_signal(session, log->log_sync_cond); @@ -816,7 +816,7 @@ __log_file_header(WT_SESSION_IMPL *session, WT_FH *fh, WT_LSN *end_lsn, bool pre */ WT_ERR(__wt_fsync(session, tmp.slot_fh, true)); if (end_lsn != NULL) - *end_lsn = tmp.slot_end_lsn; + WT_ASSIGN_LSN(end_lsn, &tmp.slot_end_lsn); err: __wt_scr_free(session, &buf); @@ -1153,7 +1153,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) if (log->log_fh == NULL) log->log_close_fh = NULL; else { - log->log_close_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&log->log_close_lsn, &log->alloc_lsn); WT_PUBLISH(log->log_close_fh, log->log_fh); } log->fileid++; @@ -1209,7 +1209,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) if (log->fileid == 1) WT_INIT_LSN(&logrec_lsn); else - logrec_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&logrec_lsn, &log->alloc_lsn); /* * We need to setup the LSNs. Set the end LSN and alloc LSN to the end of the header. */ @@ -1222,7 +1222,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) WT_RET(__wt_log_system_record(session, log_fh, &logrec_lsn)); WT_SET_LSN(&log->alloc_lsn, log->fileid, log->first_record); } - end_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&end_lsn, &log->alloc_lsn); WT_PUBLISH(log->log_fh, log_fh); /* @@ -1231,11 +1231,11 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) */ if (conn_open) { WT_RET(__wt_fsync(session, log->log_fh, true)); - log->sync_lsn = end_lsn; - log->write_lsn = end_lsn; - log->write_start_lsn = end_lsn; + WT_ASSIGN_LSN(&log->sync_lsn, &end_lsn); + WT_ASSIGN_LSN(&log->write_lsn, &end_lsn); + WT_ASSIGN_LSN(&log->write_start_lsn, &end_lsn); } - log->dirty_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&log->dirty_lsn, &log->alloc_lsn); if (created != NULL) *created = create_log; return (0); @@ -1338,7 +1338,7 @@ __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) /* * We need to set the release LSN earlier, before a log file change. */ - slot->slot_release_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&slot->slot_release_lsn, &log->alloc_lsn); /* * Make sure that the size can fit in the file. Proactively switch if it cannot. This reduces, * but does not eliminate, log files that exceed the maximum file size. We want to minimize the @@ -1703,7 +1703,7 @@ again: } } } - log->trunc_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&log->trunc_lsn, &log->alloc_lsn); FLD_SET(conn->log_flags, WT_CONN_LOG_EXISTED); } @@ -1930,8 +1930,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_STAT_CONN_INCR(session, log_release_write_lsn); __log_wait_for_earlier_slot(session, slot); - log->write_start_lsn = slot->slot_start_lsn; - log->write_lsn = slot->slot_end_lsn; + WT_ASSIGN_LSN(&log->write_start_lsn, &slot->slot_start_lsn); + WT_ASSIGN_LSN(&log->write_lsn, &slot->slot_end_lsn); WT_ASSERT(session, slot != log->active_slot); __wt_cond_signal(session, log->log_write_cond); @@ -1973,7 +1973,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * Record the current end of our update after the lock. That is how far our calls can * guarantee. */ - sync_lsn = slot->slot_end_lsn; + WT_ASSIGN_LSN(&sync_lsn, &slot->slot_end_lsn); /* * Check if we have to sync the parent directory. Some combinations of sync flags may result * in the log file not yet stable in its parent directory. Do that now if needed. @@ -1987,7 +1987,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); time_stop = __wt_clock(session); fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start); - log->sync_dir_lsn = sync_lsn; + WT_ASSIGN_LSN(&log->sync_dir_lsn, &sync_lsn); WT_STAT_CONN_INCR(session, log_sync_dir); WT_STAT_CONN_INCRV(session, log_sync_dir_duration, fsync_duration_usecs); } @@ -2005,7 +2005,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) time_stop = __wt_clock(session); fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start); WT_STAT_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs); - log->sync_lsn = sync_lsn; + WT_ASSIGN_LSN(&log->sync_lsn, &sync_lsn); __wt_cond_signal(session, log->log_sync_cond); } /* @@ -2089,11 +2089,11 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, lastlog = 0; if (log != NULL) { allocsize = log->allocsize; - end_lsn = log->alloc_lsn; - start_lsn = log->first_lsn; + WT_ASSIGN_LSN(&end_lsn, &log->alloc_lsn); + WT_ASSIGN_LSN(&start_lsn, &log->first_lsn); if (lsnp == NULL) { if (LF_ISSET(WT_LOGSCAN_FROM_CKP)) - start_lsn = log->ckpt_lsn; + WT_ASSIGN_LSN(&start_lsn, &log->ckpt_lsn); else if (!LF_ISSET(WT_LOGSCAN_FIRST)) WT_RET_MSG(session, WT_ERROR, "WT_LOGSCAN_FIRST not set"); } @@ -2152,13 +2152,13 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, * smallest LSN, start from the beginning of the log. */ if (!WT_IS_INIT_LSN(lsnp)) - start_lsn = *lsnp; + WT_ASSIGN_LSN(&start_lsn, lsnp); } WT_ERR(__log_open_verify(session, start_lsn.l.file, &log_fh, &prev_lsn, NULL, &need_salvage)); if (need_salvage) WT_ERR_MSG(session, WT_ERROR, "log file requires salvage"); WT_ERR(__wt_filesize(session, log_fh, &log_size)); - rd_lsn = start_lsn; + WT_ASSIGN_LSN(&rd_lsn, &start_lsn); if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA)) __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS, "Recovering log %" PRIu32 " through %" PRIu32, rd_lsn.l.file, end_lsn.l.file); @@ -2307,7 +2307,7 @@ advance: * remember any error returns, but don't skip to the error handler. */ if (log != NULL) - log->trunc_lsn = rd_lsn; + WT_ASSIGN_LSN(&log->trunc_lsn, &rd_lsn); /* * If the user asked for a specific LSN and it is not a valid LSN, return WT_NOTFOUND. */ @@ -2360,7 +2360,7 @@ advance: * We have a valid log record. If it is not the log file header, invoke the callback. */ WT_STAT_CONN_INCR(session, log_scan_records); - next_lsn = rd_lsn; + WT_ASSIGN_LSN(&next_lsn, &rd_lsn); next_lsn.l.offset += rdup_len; if (rd_lsn.l.offset != 0) { /* @@ -2387,7 +2387,7 @@ advance: if (LF_ISSET(WT_LOGSCAN_ONE)) break; } - rd_lsn = next_lsn; + WT_ASSIGN_LSN(&rd_lsn, &next_lsn); } /* Truncate if we're in recovery. */ @@ -2804,7 +2804,7 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) * We need to flush out the current slot first to get the real end of log LSN in log->alloc_lsn. */ WT_RET(__wt_log_flush_lsn(session, &lsn, false)); - last_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&last_lsn, &log->alloc_lsn); /* * If the last write caused a switch to a new log file, we should only wait for the last write @@ -2812,7 +2812,7 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) * because the write LSN doesn't switch into the new file until it contains a record. */ if (last_lsn.l.offset == log->first_record) - last_lsn = log->log_close_lsn; + WT_ASSIGN_LSN(&last_lsn, &log->log_close_lsn); /* * Wait until all current outstanding writes have been written to the file system. diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index ef1a6bea19f..abb6f6c2f41 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -71,7 +71,8 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) * are reset when the slot is freed. See log_slot_free. */ slot->slot_unbuffered = 0; - slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&slot->slot_start_lsn, &log->alloc_lsn); + WT_ASSIGN_LSN(&slot->slot_end_lsn, &slot->slot_start_lsn); slot->slot_start_offset = log->alloc_lsn.l.offset; slot->slot_last_offset = log->alloc_lsn.l.offset; slot->slot_fh = log->log_fh; @@ -143,7 +144,7 @@ retry: WT_STAT_CONN_INCR(session, log_slot_closes); if (WT_LOG_SLOT_DONE(new_state)) *releasep = true; - slot->slot_end_lsn = slot->slot_start_lsn; + WT_ASSIGN_LSN(&slot->slot_end_lsn, &slot->slot_start_lsn); /* * A thread setting the unbuffered flag sets the unbuffered size after setting the flag. There could * be a delay between a thread setting the flag, a thread closing the slot, and the original thread @@ -182,7 +183,7 @@ retry: /* * XXX Would like to change so one piece of code advances the LSN. */ - log->alloc_lsn = slot->slot_end_lsn; + WT_ASSIGN_LSN(&log->alloc_lsn, &slot->slot_end_lsn); WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file); return (0); } @@ -214,7 +215,7 @@ __log_slot_dirty_max_check(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) current->l.offset - last_sync->l.offset > conn->log_dirty_max) { /* Schedule the asynchronous sync */ F_SET(slot, WT_SLOT_SYNC_DIRTY); - log->dirty_lsn = slot->slot_release_lsn; + WT_ASSIGN_LSN(&log->dirty_lsn, &slot->slot_release_lsn); } } @@ -460,7 +461,7 @@ __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) * called after a log file switch. The release LSN is usually the same as the slot_start_lsn * except around a log file switch. */ - slot->slot_release_lsn = log->alloc_lsn; + WT_ASSIGN_LSN(&slot->slot_release_lsn, &log->alloc_lsn); __wt_log_slot_activate(session, slot); log->active_slot = slot; log->pool_index = 0; diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index 6626f789da7..4ddf926d124 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -495,6 +495,27 @@ int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep) { + WT_DECL_RET; + char *config; + + config = NULL; + + WT_ERR(__wt_metadata_search(session, fname, &config)); + WT_ERR(__wt_meta_ckptlist_get_with_config(session, update, ckptbasep, config)); + +err: + __wt_free(session, config); + return (ret); +} + +/* + * __wt_meta_ckptlist_get_with_config -- + * Provided a metadata config, load all available checkpoint information for a file. + */ +int +__wt_meta_ckptlist_get_with_config( + WT_SESSION_IMPL *session, bool update, WT_CKPT **ckptbasep, const char *config) +{ WT_CKPT *ckpt, *ckptbase; WT_CONFIG ckptconf; WT_CONFIG_ITEM k, v; @@ -502,18 +523,13 @@ __wt_meta_ckptlist_get( WT_DECL_RET; size_t allocated, slot; uint64_t most_recent; - char *config; *ckptbasep = NULL; ckptbase = NULL; allocated = slot = 0; - config = NULL; conn = S2C(session); - /* Retrieve the metadata information for the file. */ - WT_RET(__wt_metadata_search(session, fname, &config)); - /* Load any existing checkpoints into the array. */ if ((ret = __wt_config_getones(session, config, "checkpoint", &v)) == 0) { __wt_config_subinit(session, &ckptconf, &v); @@ -571,7 +587,6 @@ __wt_meta_ckptlist_get( err: __wt_meta_ckptlist_free(session, &ckptbase); } - __wt_free(session, config); return (ret); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 2500e3d2865..ede22518c26 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -208,7 +208,9 @@ __posix_fs_remove( #ifdef __linux__ /* Flush the backing directory to guarantee the remove. */ + WT_RET(__wt_log_printf(session, "REMOVE: posix_directory_sync %s", name)); WT_RET(__posix_directory_sync(session, name)); + WT_RET(__wt_log_printf(session, "REMOVE: DONE posix_directory_sync %s", name)); #endif return (0); } @@ -248,7 +250,9 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha * not provide the guarantee or only provide the guarantee with specific mount options. Flush * both of the from/to directories until it's a performance problem. */ + WT_RET(__wt_log_printf(session, "RENAME: posix_directory_sync %s", from)); WT_RET(__posix_directory_sync(session, from)); + WT_RET(__wt_log_printf(session, "RENAME: DONE posix_directory_sync %s", from)); /* * In almost all cases, we're going to be renaming files in the same directory, we can at least @@ -807,8 +811,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha /* * Durability: some filesystems require a directory sync to be confident the file will appear. */ - if (LF_ISSET(WT_FS_OPEN_DURABLE)) + if (LF_ISSET(WT_FS_OPEN_DURABLE)) { + WT_ERR(__wt_log_printf(session, "OPEN/CREATE: posix_directory_sync %s", name)); WT_ERR(__posix_directory_sync(session, name)); + WT_ERR(__wt_log_printf(session, "OPEN/CREATE: DONE posix_directory_sync %s", name)); + } #endif WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index c3d7417ff14..5971429a324 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -45,6 +45,58 @@ __wt_direct_io_size_check( } /* + * __check_imported_ts -- + * Check the aggregated timestamps for each checkpoint in a file that we've imported. We're not + * allowed to import files with timestamps ahead of our oldest timestamp since a subsequent + * rollback to stable could result in data loss and historical reads could yield unexpected + * values. Therefore, this function should return non-zero to callers to signify that this is + * the case. + */ +static int +__check_imported_ts(WT_SESSION_IMPL *session, const char *uri, const char *config) +{ + WT_CKPT *ckptbase, *ckpt; + WT_DECL_RET; + WT_TXN_GLOBAL *txn_global; + + ckptbase = NULL; + txn_global = &S2C(session)->txn_global; + + WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_with_config(session, false, &ckptbase, config), true); + if (ret == WT_NOTFOUND) + WT_ERR_MSG(session, EINVAL, + "%s: import could not find any checkpoint information in supplied metadata", uri); + + /* Now iterate over each checkpoint and compare the aggregate timestamps with our oldest. */ + WT_CKPT_FOREACH (ckptbase, ckpt) { + if (ckpt->ta.newest_start_durable_ts > txn_global->oldest_timestamp) + WT_ERR_MSG(session, EINVAL, + "%s: import found aggregated newest start durable timestamp newer than the current " + "oldest timestamp, newest_start_durable_ts=%" PRIu64 ", oldest_ts=%" PRIu64, + uri, ckpt->ta.newest_start_durable_ts, txn_global->oldest_timestamp); + + /* + * No need to check "newest stop" here as "newest stop durable" serves that purpose. When a + * file has at least one record without a stop timestamp, "newest stop" will be set to max + * whereas "newest stop durable" refers to the newest non-max timestamp which is more useful + * to us in terms of comparing with oldest. + */ + if (ckpt->ta.newest_stop_durable_ts > txn_global->oldest_timestamp) { + WT_ASSERT(session, ckpt->ta.newest_stop_durable_ts != WT_TS_MAX); + WT_ERR_MSG(session, EINVAL, + "%s: import found aggregated newest stop durable timestamp newer than the current " + "oldest timestamp, newest_stop_durable_ts=%" PRIu64 ", oldest_ts=%" PRIu64, + uri, ckpt->ta.newest_stop_durable_ts, txn_global->oldest_timestamp); + } + } + +err: + if (ckptbase != NULL) + __wt_meta_ckptlist_free(session, &ckptbase); + return (ret); +} + +/* * __create_file -- * Create a new 'file:' object. */ @@ -104,6 +156,11 @@ __create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const c * reconstruct the configuration metadata from the file. */ if (import) { + /* First verify that the data to import exists on disk. */ + WT_IGNORE_RET(__wt_fs_exist(session, filename, &exists)); + if (!exists) + WT_ERR_MSG(session, ENOENT, "%s", uri); + import_repair = __wt_config_getones(session, config, "import.repair", &cval) == 0 && cval.val != 0; if (!import_repair) { @@ -131,19 +188,20 @@ __create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const c uri); } } - } - - if (import) { - WT_IGNORE_RET(__wt_fs_exist(session, filename, &exists)); - if (!exists) - WT_ERR_MSG(session, ENOENT, "%s: attempted to import file that does not exist", uri); } else { /* Create the file. */ WT_ERR(__wt_block_manager_create(session, filename, allocsize)); - } - if (WT_META_TRACKING(session)) - WT_ERR(__wt_meta_track_fileop(session, NULL, uri)); + /* + * Track the creation of this file. + * + * If something down the line fails, we're going to need to roll this back. Specifically do + * NOT track the op in the import case since we do not want to wipe a data file just because + * we fail to import it. + */ + if (WT_META_TRACKING(session)) + WT_ERR(__wt_meta_track_fileop(session, NULL, uri)); + } /* * If creating an ordinary file, append the file ID and current version numbers to the passed-in @@ -163,6 +221,13 @@ __create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const c } else { /* TO-DO: WT-6691 */ } + + /* + * Ensure that the timestamps in the imported data file are not in the future relative to + * our oldest timestamp. + */ + if (import) + WT_ERR(__check_imported_ts(session, filename, fileconf)); } /* @@ -591,14 +656,15 @@ static int __create_table(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) { WT_CONFIG conf; - WT_CONFIG_ITEM cgkey, cgval, cval; + WT_CONFIG_ITEM cgkey, cgval, ckey, cval; WT_DECL_RET; WT_TABLE *table; size_t cgsize; - int ncolgroups; + int ncolgroups, nkeys; char *tableconf, *cgname; const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL}; const char *tablename; + bool import, import_repair; cgname = NULL; table = NULL; @@ -608,14 +674,39 @@ __create_table(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const tablename = uri; WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:"); + import = __wt_config_getones(session, config, "import.enabled", &cval) == 0 && cval.val != 0; /* Check if the table already exists. */ if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) { - if (exclusive) + /* + * Regardless of the 'exclusive' flag, we should raise an error if we try to import an + * existing URI rather than just silently returning. + */ + if (exclusive || import) WT_TRET(EEXIST); goto err; } + if (import) { + import_repair = + __wt_config_getones(session, config, "import.repair", &cval) == 0 && cval.val != 0; + /* + * If this is an import but not a repair, check that the exported table metadata is provided + * in the config. + */ + if (!import_repair) { + __wt_config_init(session, &conf, config); + for (nkeys = 0; (ret = __wt_config_next(&conf, &ckey, &cval)) == 0; nkeys++) + ; + if (nkeys == 1) + WT_ERR_MSG(session, EINVAL, + "%s: import requires that the table configuration is specified or the " + "'repair' option is provided", + uri); + WT_ERR_NOTFOUND_OK(ret, false); + } + } + WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval)); __wt_config_subinit(session, &conf, &cval); for (ncolgroups = 0; (ret = __wt_config_next(&conf, &cgkey, &cgval)) == 0; ncolgroups++) diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index daee3be92a8..c86111af6e1 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -861,60 +861,6 @@ err: } /* - * __session_rebalance -- - * WT_SESSION->rebalance method. - */ -static int -__session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config) -{ - WT_DECL_RET; - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)wt_session; - - SESSION_API_CALL(session, rebalance, config, cfg); - - /* In-memory ignores rebalance operations. */ - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - goto err; - - /* Block out checkpoints to avoid spurious EBUSY errors. */ - WT_WITH_CHECKPOINT_LOCK(session, - WT_WITH_SCHEMA_LOCK(session, - ret = __wt_schema_worker( - session, uri, __wt_bt_rebalance, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE))); - -err: - if (ret != 0) - WT_STAT_CONN_INCR(session, session_table_rebalance_fail); - else - WT_STAT_CONN_INCR(session, session_table_rebalance_success); - API_END_RET_NOTFOUND_MAP(session, ret); -} - -/* - * __session_rebalance_readonly -- - * WT_SESSION->rebalance method; readonly version. - */ -static int -__session_rebalance_readonly(WT_SESSION *wt_session, const char *uri, const char *config) -{ - WT_DECL_RET; - WT_SESSION_IMPL *session; - - WT_UNUSED(uri); - WT_UNUSED(config); - - session = (WT_SESSION_IMPL *)wt_session; - SESSION_API_CALL_NOCONF(session, rebalance); - - WT_STAT_CONN_INCR(session, session_table_rebalance_fail); - ret = __wt_session_notsup(session); -err: - API_END_RET(session, ret); -} - -/* * __session_rename -- * WT_SESSION->rename method. */ @@ -2034,19 +1980,18 @@ __open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const stds = {NULL, NULL, __session_close, __session_reconfigure, __wt_session_strerror, __session_open_cursor, __session_alter, __session_create, __session_import, __wt_session_compact, __session_drop, __session_join, __session_log_flush, - __session_log_printf, __session_rebalance, __session_rename, __session_reset, - __session_salvage, __session_truncate, __session_upgrade, __session_verify, - __session_begin_transaction, __session_commit_transaction, __session_prepare_transaction, - __session_rollback_transaction, __session_timestamp_transaction, __session_query_timestamp, - __session_checkpoint, __session_transaction_pinned_range, __session_transaction_sync, - __wt_session_breakpoint}, + __session_log_printf, __session_rename, __session_reset, __session_salvage, + __session_truncate, __session_upgrade, __session_verify, __session_begin_transaction, + __session_commit_transaction, __session_prepare_transaction, __session_rollback_transaction, + __session_timestamp_transaction, __session_query_timestamp, __session_checkpoint, + __session_transaction_pinned_range, __session_transaction_sync, __wt_session_breakpoint}, stds_readonly = {NULL, NULL, __session_close, __session_reconfigure, __wt_session_strerror, __session_open_cursor, __session_alter_readonly, __session_create_readonly, __session_import_readonly, __wt_session_compact_readonly, __session_drop_readonly, __session_join, __session_log_flush_readonly, __session_log_printf_readonly, - __session_rebalance_readonly, __session_rename_readonly, __session_reset, - __session_salvage_readonly, __session_truncate_readonly, __session_upgrade_readonly, - __session_verify, __session_begin_transaction, __session_commit_transaction, + __session_rename_readonly, __session_reset, __session_salvage_readonly, + __session_truncate_readonly, __session_upgrade_readonly, __session_verify, + __session_begin_transaction, __session_commit_transaction, __session_prepare_transaction_readonly, __session_rollback_transaction, __session_timestamp_transaction, __session_query_timestamp, __session_checkpoint_readonly, __session_transaction_pinned_range, __session_transaction_sync_readonly, diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index 69656bb02a4..b702190f6f0 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -244,12 +244,11 @@ __wt_session_release_dhandle(WT_SESSION_IMPL *session) } /* - * Close the handle if we are finishing a bulk load or rebalance or if the handle is set to - * discard on release. Bulk loads and rebalanced trees are special because they may have huge - * root pages in memory, and we need to push those pages out of the cache. The only way to do - * that is to close the handle. + * Close the handle if we are finishing a bulk load or if the handle is set to discard on + * release. Bulk loads are special because they may have huge root pages in memory, and we need + * to push those pages out of the cache. The only way to do that is to close the handle. */ - if (btree != NULL && F_ISSET(btree, WT_BTREE_BULK | WT_BTREE_REBALANCE)) { + if (btree != NULL && F_ISSET(btree, WT_BTREE_BULK)) { WT_ASSERT( session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !F_ISSET(dhandle, WT_DHANDLE_DISCARD)); /* diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index bda39f03b3c..f416a2d7f8c 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -62,6 +62,7 @@ static const char *const __stats_dsrc_desc[] = { "cache: eviction walks gave up because they saw too many pages and found no candidates", "cache: eviction walks gave up because they saw too many pages and found too few candidates", "cache: eviction walks reached end of tree", + "cache: eviction walks restarted", "cache: eviction walks started from root of tree", "cache: eviction walks started from saved location in tree", "cache: hazard pointer blocked page eviction", @@ -287,6 +288,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cache_eviction_walks_gave_up_no_targets = 0; stats->cache_eviction_walks_gave_up_ratio = 0; stats->cache_eviction_walks_ended = 0; + stats->cache_eviction_walk_restart = 0; stats->cache_eviction_walk_from_root = 0; stats->cache_eviction_walk_saved_pos = 0; stats->cache_eviction_hazard = 0; @@ -496,6 +498,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to) to->cache_eviction_walks_gave_up_no_targets += from->cache_eviction_walks_gave_up_no_targets; to->cache_eviction_walks_gave_up_ratio += from->cache_eviction_walks_gave_up_ratio; to->cache_eviction_walks_ended += from->cache_eviction_walks_ended; + to->cache_eviction_walk_restart += from->cache_eviction_walk_restart; to->cache_eviction_walk_from_root += from->cache_eviction_walk_from_root; to->cache_eviction_walk_saved_pos += from->cache_eviction_walk_saved_pos; to->cache_eviction_hazard += from->cache_eviction_hazard; @@ -701,6 +704,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to) to->cache_eviction_walks_gave_up_ratio += WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio); to->cache_eviction_walks_ended += WT_STAT_READ(from, cache_eviction_walks_ended); + to->cache_eviction_walk_restart += WT_STAT_READ(from, cache_eviction_walk_restart); to->cache_eviction_walk_from_root += WT_STAT_READ(from, cache_eviction_walk_from_root); to->cache_eviction_walk_saved_pos += WT_STAT_READ(from, cache_eviction_walk_saved_pos); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); @@ -900,6 +904,7 @@ static const char *const __stats_connection_desc[] = { "cache: eviction walks gave up because they saw too many pages and found no candidates", "cache: eviction walks gave up because they saw too many pages and found too few candidates", "cache: eviction walks reached end of tree", + "cache: eviction walks restarted", "cache: eviction walks started from root of tree", "cache: eviction walks started from saved location in tree", "cache: eviction worker thread active", @@ -1220,8 +1225,6 @@ static const char *const __stats_connection_desc[] = { "session: table drop successful calls", "session: table import failed calls", "session: table import successful calls", - "session: table rebalance failed calls", - "session: table rebalance successful calls", "session: table rename failed calls", "session: table rename successful calls", "session: table salvage failed calls", @@ -1424,6 +1427,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_walks_gave_up_no_targets = 0; stats->cache_eviction_walks_gave_up_ratio = 0; stats->cache_eviction_walks_ended = 0; + stats->cache_eviction_walk_restart = 0; stats->cache_eviction_walk_from_root = 0; stats->cache_eviction_walk_saved_pos = 0; /* not clearing cache_eviction_active_workers */ @@ -1735,8 +1739,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing session_table_drop_success */ /* not clearing session_table_import_fail */ /* not clearing session_table_import_success */ - /* not clearing session_table_rebalance_fail */ - /* not clearing session_table_rebalance_success */ /* not clearing session_table_rename_fail */ /* not clearing session_table_rename_success */ /* not clearing session_table_salvage_fail */ @@ -1919,6 +1921,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->cache_eviction_walks_gave_up_ratio += WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio); to->cache_eviction_walks_ended += WT_STAT_READ(from, cache_eviction_walks_ended); + to->cache_eviction_walk_restart += WT_STAT_READ(from, cache_eviction_walk_restart); to->cache_eviction_walk_from_root += WT_STAT_READ(from, cache_eviction_walk_from_root); to->cache_eviction_walk_saved_pos += WT_STAT_READ(from, cache_eviction_walk_saved_pos); to->cache_eviction_active_workers += WT_STAT_READ(from, cache_eviction_active_workers); @@ -2254,8 +2257,6 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->session_table_drop_success += WT_STAT_READ(from, session_table_drop_success); to->session_table_import_fail += WT_STAT_READ(from, session_table_import_fail); to->session_table_import_success += WT_STAT_READ(from, session_table_import_success); - to->session_table_rebalance_fail += WT_STAT_READ(from, session_table_rebalance_fail); - to->session_table_rebalance_success += WT_STAT_READ(from, session_table_rebalance_success); to->session_table_rename_fail += WT_STAT_READ(from, session_table_rename_fail); to->session_table_rename_success += WT_STAT_READ(from, session_table_rename_success); to->session_table_salvage_fail += WT_STAT_READ(from, session_table_salvage_fail); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 8d8d3431219..7984b98f785 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -1155,7 +1155,7 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session) continue; } - /* Search for prepared updates, so that they will be restored, if moved to lookaside. */ + /* Search for prepared updates. */ if (F_ISSET(txn, WT_TXN_PREPARE)) WT_ERR(__txn_search_prepared_op(session, op, &cursor, &upd)); else diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index fd230dab529..10a48c25553 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -330,7 +330,7 @@ __txn_log_recover(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *lsnp, WT_LS * stop at that LSN. */ if (r->metadata_only) - r->max_rec_lsn = *next_lsnp; + WT_ASSIGN_LSN(&r->max_rec_lsn, next_lsnp); else if (__wt_log_cmp(lsnp, &r->max_rec_lsn) >= 0) return (0); @@ -491,7 +491,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) else WT_RET_MSG( r->session, EINVAL, "Failed to parse checkpoint LSN '%.*s'", (int)cval.len, cval.str); - r->files[fileid].ckpt_lsn = lsn; + WT_ASSIGN_LSN(&r->files[fileid].ckpt_lsn, &lsn); __wt_verbose(r->session, WT_VERB_RECOVERY, "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file, @@ -499,7 +499,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) && (WT_IS_MAX_LSN(&r->max_ckpt_lsn) || __wt_log_cmp(&lsn, &r->max_ckpt_lsn) > 0)) - r->max_ckpt_lsn = lsn; + WT_ASSIGN_LSN(&r->max_ckpt_lsn, &lsn); /* Update the base write gen based on this file's configuration. */ return (__wt_metadata_update_base_write_gen(r->session, config)); @@ -727,7 +727,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) * Start at the last checkpoint LSN referenced in the metadata. If we see the end of a * checkpoint while scanning, we will change the full scan to start from there. */ - r.ckpt_lsn = metafile->ckpt_lsn; + WT_ASSIGN_LSN(&r.ckpt_lsn, &metafile->ckpt_lsn); ret = __wt_log_scan( session, &metafile->ckpt_lsn, WT_LOGSCAN_RECOVER_METADATA, __txn_log_recover, &r); } @@ -795,7 +795,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) /* * Create the history store as we might need it while applying log records in recovery. */ - WT_ERR(__wt_hs_create(session, cfg)); + WT_ERR(__wt_hs_open(session, cfg)); } /* diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 479bedc84d2..56904666a8a 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -1298,9 +1298,12 @@ int __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckpt) { WT_DECL_RET; + WT_TXN_GLOBAL *txn_global; WT_UNUSED(cfg); + txn_global = &S2C(session)->txn_global; + /* * Don't use the connection's default session: we are working on data handles and (a) don't want * to cache all of them forever, plus (b) can't guarantee that no other method will be called @@ -1319,6 +1322,10 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE); WT_RET(ret); + /* Rollback the global durable timestamp to the stable timestamp. */ + txn_global->has_durable_timestamp = txn_global->has_stable_timestamp; + txn_global->durable_timestamp = txn_global->stable_timestamp; + /* * If the configuration is not in-memory, forcibly log a checkpoint after rollback to stable to * ensure that both in-memory and on-disk versions are the same unless caller requested for no diff --git a/src/third_party/wiredtiger/src/utilities/util.h b/src/third_party/wiredtiger/src/utilities/util.h index fc3e3b93128..ac5bae200c7 100644 --- a/src/third_party/wiredtiger/src/utilities/util.h +++ b/src/third_party/wiredtiger/src/utilities/util.h @@ -45,7 +45,6 @@ int util_loadtext(WT_SESSION *, int, char *[]); int util_printlog(WT_SESSION *, int, char *[]); int util_read(WT_SESSION *, int, char *[]); int util_read_line(WT_SESSION *, ULINE *, bool, bool *); -int util_rebalance(WT_SESSION *, int, char *[]); int util_rename(WT_SESSION *, int, char *[]); int util_salvage(WT_SESSION *, int, char *[]); int util_stat(WT_SESSION *, int, char *[]); diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index 3d83839962f..880d4048ad4 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -45,10 +45,10 @@ usage(void) */ "list", "list database objects", "load", "load an object", "loadtext", "load an object from a text file", "printlog", "display the database log", "read", - "read values from an object", "rebalance", "rebalance an object", "rename", - "rename an object", "salvage", "salvage a file", "stat", "display statistics for an object", - "truncate", "truncate an object, removing all content", "upgrade", "upgrade an object", - "verify", "verify an object", "write", "write values to an object", NULL, NULL}; + "read values from an object", "rename", "rename an object", "salvage", "salvage a file", + "stat", "display statistics for an object", "truncate", + "truncate an object, removing all content", "upgrade", "upgrade an object", "verify", + "verify an object", "write", "write values to an object", NULL, NULL}; fprintf(stderr, "WiredTiger Data Engine (version %d.%d)\n", WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); @@ -222,8 +222,6 @@ main(int argc, char *argv[]) case 'r': if (strcmp(command, "read") == 0) func = util_read; - else if (strcmp(command, "rebalance") == 0) - func = util_rebalance; else if (strcmp(command, "rename") == 0) func = util_rename; break; diff --git a/src/third_party/wiredtiger/src/utilities/util_rebalance.c b/src/third_party/wiredtiger/src/utilities/util_rebalance.c deleted file mode 100644 index 82d719b2aa9..00000000000 --- a/src/third_party/wiredtiger/src/utilities/util_rebalance.c +++ /dev/null @@ -1,53 +0,0 @@ -/*- - * Copyright (c) 2014-2020 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "util.h" - -static int -usage(void) -{ - util_usage("rebalance uri", NULL, NULL); - return (1); -} - -int -util_rebalance(WT_SESSION *session, int argc, char *argv[]) -{ - WT_DECL_RET; - int ch; - char *uri; - - uri = NULL; - while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) - switch (ch) { - case '?': - default: - return (usage()); - } - argc -= __wt_optind; - argv += __wt_optind; - - /* The remaining argument is the table name. */ - if (argc != 1) - return (usage()); - if ((uri = util_uri(session, *argv, "table")) == NULL) - return (1); - - if ((ret = session->rebalance(session, uri, NULL)) != 0) - (void)util_err(session, ret, "session.rebalance: %s", uri); - else { - /* - * Verbose configures a progress counter, move to the next line. - */ - if (verbose) - printf("\n"); - } - - free(uri); - return (ret); -} diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh index b633782c8a4..83f1b1f6cef 100755 --- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh +++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh @@ -28,6 +28,12 @@ $TEST_WRAPPER ./t -T 6 -t r echo "checkpoint: 6 row-store tables, named checkpoint" $TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r +echo "checkpoint: 6 row-store tables with prepare" +$TEST_WRAPPER ./t -T 6 -t r -p + +echo "checkpoint: 6 row-store tables, named checkpoint with prepare" +$TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r -p + # Temporarily disabled #echo "checkpoint: row-store tables, stress history store. Sweep and timestamps" #$TEST_WRAPPER ./t -t r -W 3 -r 2 -D -s -x -n 100000 -k 100000 -C cache_size=100MB diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index 5785494487a..334bbdf45fd 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -65,7 +65,7 @@ main(int argc, char *argv[]) g.sweep_stress = g.use_timestamps = false; runs = 1; - while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:n:r:sT:t:W:x")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:n:pr:sT:t:W:x")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -91,6 +91,9 @@ main(int argc, char *argv[]) case 'n': /* operations */ g.nops = (u_int)atoi(__wt_optarg); break; + case 'p': /* prepare */ + g.prepare = true; + break; case 'r': /* runs */ runs = atoi(__wt_optarg); break; @@ -343,6 +346,7 @@ usage(void) "\t-k set number of keys to load\n" "\t-l specify a log file\n" "\t-n set number of operations each thread does\n" + "\t-p use prepare\n" "\t-r set number of runs (0 for continuous)\n" "\t-T specify a table configuration\n" "\t-t set a file type ( col | mix | row | lsm )\n" diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 92dd47dda33..fd6daa03403 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -55,7 +55,7 @@ typedef struct { char *home; /* Home directory */ const char *checkpoint_name; /* Checkpoint name */ WT_CONNECTION *conn; /* WiredTiger connection */ - bool debug_mode; /* Lookaside stress test */ + bool debug_mode; /* History store stress test */ u_int nkeys; /* Keys to load */ u_int nops; /* Operations per thread */ FILE *logfp; /* Message log file. */ @@ -68,6 +68,7 @@ typedef struct { u_int ts_oldest; /* Current oldest timestamp */ u_int ts_stable; /* Current stable timestamp */ bool use_timestamps; /* Use txn timestamps */ + bool prepare; /* Use prepare transactions */ COOKIE *cookies; /* Per-thread info */ WT_RWLOCK clock_lock; /* Clock synchronization */ wt_thread_t checkpoint_thread; /* Checkpoint thread */ diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c index 55cd5dba91d..98a3f541535 100644 --- a/src/third_party/wiredtiger/test/checkpoint/workers.c +++ b/src/third_party/wiredtiger/test/checkpoint/workers.c @@ -263,11 +263,23 @@ real_worker(void) goto err; } else if (ret == 0) { next_rnd = __wt_random(&rnd); - if (next_rnd % 7 != 0) { + if (next_rnd % 7 == 0) { if (g.use_timestamps) { if (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) == 0) { - testutil_check( - __wt_snprintf(buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1)); + next_rnd = __wt_random(&rnd); + if (g.prepare && next_rnd % 2 == 0) { + testutil_check(__wt_snprintf( + buf, sizeof(buf), "prepare_timestamp=%x", g.ts_stable + 1)); + if ((ret = session->prepare_transaction(session, buf)) != 0) { + (void)log_print_err("real_worker:prepare_transaction", ret, 1); + goto err; + } + testutil_check(__wt_snprintf(buf, sizeof(buf), + "durable_timestamp=%x,commit_timestamp=%x", g.ts_stable + 3, + g.ts_stable + 1)); + } else + testutil_check(__wt_snprintf( + buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1)); __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); if ((ret = session->commit_transaction(session, buf)) != 0) { (void)log_print_err("real_worker:commit_transaction", ret, 1); diff --git a/src/third_party/wiredtiger/test/csuite/import/smoke.sh b/src/third_party/wiredtiger/test/csuite/import/smoke.sh index db73e28eb99..6bff4c7f656 100755 --- a/src/third_party/wiredtiger/test/csuite/import/smoke.sh +++ b/src/third_party/wiredtiger/test/csuite/import/smoke.sh @@ -40,7 +40,6 @@ format() checkpoints=1 \ data_source=file \ ops=0 \ - rebalance=0 \ salvage=0 \ threads=4 \ timer=2 \ diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c index 5e21751781d..2212befaadb 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c +++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c @@ -765,6 +765,24 @@ check_schema(WT_SESSION *session, uint64_t lastid, uint32_t threadid, uint32_t f } } +static void +kill_child(pid_t pid) +{ + int status; + + /* + * The child is stopped, it won't process an abort until it is continued. First signal the + * abort, then signal continue so that the child process will process the abort and dump core. + */ + printf("Send abort to child process ID %d\n", (int)pid); + if (kill(pid, SIGABRT) != 0) + testutil_die(errno, "kill"); + if (kill(pid, SIGCONT) != 0) + testutil_die(errno, "kill"); + if (waitpid(pid, &status, 0) == -1) + testutil_die(errno, "waitpid"); +} + /* * check_db -- * Make a copy of the database and verify its contents. @@ -779,10 +797,10 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla uint64_t gotid, id; uint64_t *lastid; uint32_t gotth, kvsize, th, threadmap; - int status; char checkdir[4096], dbgdir[4096], savedir[4096]; char *gotkey, *gotvalue, *keybuf, *p; char **large_arr; + bool fatal; keybuf = dcalloc(datasize, 1); lastid = dcalloc(nth, sizeof(uint64_t)); @@ -803,29 +821,32 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla printf( "Copy database home directory using direct I/O to run recovery,\n" "along with a saved 'pre-recovery' copy.\n"); - copy_directory(home, checkdir, directio); - /* Copy the original home directory explicitly without direct I/O. */ - copy_directory(home, dbgdir, false); - copy_directory(checkdir, savedir, false); + /* + * Copy the original home directory explicitly without direct I/O. Copy this first because + * copying with directio may abort and we want to see what the original copy saw. + */ + fatal = copy_directory(home, dbgdir, false); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", home, dbgdir, false); + kill_child(pid); + } + fatal = copy_directory(home, checkdir, directio); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", home, checkdir, directio); + kill_child(pid); + } + fatal = copy_directory(checkdir, savedir, false); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", checkdir, savedir, false); + kill_child(pid); + } printf("Open database, run recovery and verify content\n"); ret = wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn); /* If this fails, abort the child process before we die so we can see what it was doing. */ if (ret != 0) { - if (pid != 0) { - /* - * The child is stopped, it won't process an abort until it is continued. First signal - * the abort, then signal continue so that the child process will process the abort and - * dump core. - */ - printf("Send abort to child process ID %d\n", (int)pid); - if (kill(pid, SIGABRT) != 0) - testutil_die(errno, "kill"); - if (kill(pid, SIGCONT) != 0) - testutil_die(errno, "kill"); - if (waitpid(pid, &status, 0) == -1) - testutil_die(errno, "waitpid"); - } + if (pid != 0) + kill_child(pid); testutil_check(ret); } testutil_check(conn->open_session(conn, NULL, NULL, &session)); diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c index 2d4a4aa845b..df8b65ce248 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c +++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c @@ -43,7 +43,7 @@ * copy_directory -- * Copy a directory, using direct IO if indicated. */ -void +bool copy_directory(const char *fromdir, const char *todir, bool directio) { struct dirent *dp; @@ -52,9 +52,10 @@ copy_directory(const char *fromdir, const char *todir, bool directio) size_t blksize, bufsize, readbytes, n, remaining; ssize_t ioret; uintptr_t bufptr; - int openflags, rfd, wfd; + int enoent, openflags, rfd, wfd; u_char *buf, *orig_buf; char fromfile[4096], tofile[4096]; + bool fatal; #ifdef O_DIRECT openflags = directio ? O_DIRECT : 0; @@ -65,6 +66,8 @@ copy_directory(const char *fromdir, const char *todir, bool directio) orig_buf = dcalloc(COPY_BUF_SIZE, sizeof(u_char)); buf = NULL; blksize = bufsize = 0; + enoent = 0; + fatal = false; dirp = opendir(todir); if (dirp != NULL) { @@ -101,8 +104,19 @@ copy_directory(const char *fromdir, const char *todir, bool directio) * delivered in between those calls so the file may no longer exist but reading the * directory will still return its entry. Handle that case and skip the file if it happens. */ - if (rfd < 0 && errno == ENOENT) + if (rfd < 0 && errno == ENOENT) { + ++enoent; + /* + * At most there can be one thread in the middle of drop due to the schema lock. So if + * we find more than one missing file, we have a fatal and unexpected situation. We want + * to know all the files in this. So note them here and fail later. + */ + printf("COPY_DIR: direct:%d ENOENT %d: Source file %s not found.\n", directio, enoent, + dp->d_name); + if (enoent > 1) + fatal = true; continue; + } testutil_assertfmt(rfd >= 0, "Open of source %s failed with %d\n", fromfile, errno); wfd = open(tofile, O_WRONLY | O_CREAT, 0666); testutil_assertfmt(wfd >= 0, "Open of dest %s failed with %d\n", tofile, errno); @@ -152,4 +166,5 @@ copy_directory(const char *fromdir, const char *todir, bool directio) } testutil_check(closedir(dirp)); free(orig_buf); + return (fatal); } diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.h b/src/third_party/wiredtiger/test/csuite/random_directio/util.h index c0d6cc6db8a..a849150d52c 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/util.h +++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.h @@ -30,5 +30,4 @@ * util.h * Utility functions for test that simulates system crashes. */ - -extern void copy_directory(const char *, const char *, bool); +extern bool copy_directory(const char *, const char *, bool); diff --git a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c index 4d60b3c39fb..fbcc89ca05a 100644 --- a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c @@ -122,7 +122,6 @@ typedef struct { #define CREATE_UNQ "create_unique" #define CURSOR "cursor" #define DROP "drop" -#define REBALANCE "rebalance" #define UPGRADE "upgrade" #define VERIFY "verify" @@ -388,25 +387,6 @@ test_drop(THREAD_DATA *td, int force) } /* - * test_rebalance -- - * Rebalance a tree. - */ -static void -test_rebalance(THREAD_DATA *td) -{ - WT_DECL_RET; - WT_SESSION *session; - - testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session)); - - if ((ret = session->rebalance(session, uri, NULL)) != 0) - if (ret != ENOENT && ret != EBUSY) - testutil_die(ret, "session.rebalance"); - - testutil_check(session->close(session, NULL)); -} - -/* * test_upgrade -- * Upgrade a tree. */ @@ -680,14 +660,10 @@ thread_run(void *arg) test_drop(td, __wt_random(&rnd) & 1); break; case 6: - WT_PUBLISH(th_ts[td->info].op, REBALANCE); - test_rebalance(td); - break; - case 7: WT_PUBLISH(th_ts[td->info].op, UPGRADE); test_upgrade(td); break; - case 8: + case 7: WT_PUBLISH(th_ts[td->info].op, VERIFY); test_verify(td); break; diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c index 014b734e9b1..e99206309b3 100644 --- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c @@ -63,6 +63,7 @@ static char home[1024]; /* Program working dir */ #define MAX_VAL 1024 #define MIN_TH 5 #define MIN_TIME 10 +#define PREPARE_DURABLE_AHEAD_COMMIT 10 #define PREPARE_FREQ 5 #define PREPARE_PCT 10 #define PREPARE_YIELD (PREPARE_FREQ * 10) @@ -257,7 +258,7 @@ thread_run(void *arg) uint64_t i, active_ts; char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL]; char kname[64], tscfg[64], uri[128]; - bool use_prep; + bool durable_ahead_commit, use_prep; __wt_random_init(&rnd); memset(cbuf, 0, sizeof(cbuf)); @@ -285,6 +286,7 @@ thread_run(void *arg) * transactions. */ use_prep = (use_ts && td->info % PREPARE_PCT == 0) ? true : false; + durable_ahead_commit = false; /* * For the prepared case we have two sessions so that the oplog session can have its own @@ -394,8 +396,17 @@ thread_run(void *arg) testutil_check(prepared_session->prepare_transaction(prepared_session, tscfg)); if (i % PREPARE_YIELD == 0) __wt_yield(); + /* + * Make half of the prepared transactions' durable timestamp larger than their + * commit timestamp. + */ + durable_ahead_commit = i % PREPARE_DURABLE_AHEAD_COMMIT == 0; testutil_check(__wt_snprintf(tscfg, sizeof(tscfg), - "commit_timestamp=%" PRIx64 ",durable_timestamp=%" PRIx64, active_ts, active_ts)); + "commit_timestamp=%" PRIx64 ",durable_timestamp=%" PRIx64, active_ts, + durable_ahead_commit ? active_ts + 4 : active_ts)); + /* Ensure the global timestamp is not behind the all durable timestamp. */ + if (durable_ahead_commit) + __wt_atomic_addv64(&global_ts, 3); } else testutil_check( __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts)); @@ -413,10 +424,9 @@ thread_run(void *arg) cur_local->set_value(cur_local, &data); testutil_check(cur_local->insert(cur_local)); - /* - * Save the timestamp and key separately for checking later. - */ - if (fprintf(fp, "%" PRIu64 " %" PRIu64 "\n", active_ts, i) < 0) + /* Save the timestamps and key separately for checking later. */ + if (fprintf(fp, "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", active_ts, + durable_ahead_commit ? active_ts + 4 : active_ts, i) < 0) testutil_die(EIO, "fprintf"); if (0) { @@ -590,7 +600,7 @@ main(int argc, char *argv[]) WT_SESSION *session; pid_t pid; uint64_t absent_coll, absent_local, absent_oplog, absent_shadow, count, key, last_key; - uint64_t stable_fp, stable_val; + uint64_t commit_fp, durable_fp, stable_val; uint32_t i, nth, timeout; int ch, status, ret; const char *working_dir; @@ -782,17 +792,17 @@ main(int argc, char *argv[]) * have been recovered. */ for (last_key = INVALID_KEY;; ++count, last_key = key) { - ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n", &stable_fp, &key); + ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "%" SCNu64 "\n", &commit_fp, &durable_fp, &key); if (last_key == INVALID_KEY) { c_rep[i].first_key = key; l_rep[i].first_key = key; o_rep[i].first_key = key; } - if (ret != EOF && ret != 2) { + if (ret != EOF && ret != 3) { /* * If we find a partial line, consider it like an EOF. */ - if (ret == 1 || ret == 0) + if (ret == 2 || ret == 1 || ret == 0) break; testutil_die(errno, "fscanf"); } @@ -823,13 +833,13 @@ main(int argc, char *argv[]) testutil_die(ret, "shadow search success"); /* - * If we don't find a record, the stable timestamp written to our file better be + * If we don't find a record, the durable timestamp written to our file better be * larger than the saved one. */ - if (!inmem && stable_fp != 0 && stable_fp <= stable_val) { - printf("%s: COLLECTION no record with key %" PRIu64 " record ts %" PRIu64 - " <= stable ts %" PRIu64 "\n", - fname, key, stable_fp, stable_val); + if (!inmem && durable_fp != 0 && durable_fp <= stable_val) { + printf("%s: COLLECTION no record with key %" PRIu64 + " record durable ts %" PRIu64 " <= stable ts %" PRIu64 "\n", + fname, key, durable_fp, stable_val); absent_coll++; } if (c_rep[i].first_miss == INVALID_KEY) @@ -849,14 +859,14 @@ main(int argc, char *argv[]) */ c_rep[i].exist_key = key; fatal = true; - } else if (!inmem && stable_fp != 0 && stable_fp > stable_val) { + } else if (!inmem && commit_fp != 0 && commit_fp > stable_val) { /* - * If we found a record, the stable timestamp written to our file better be no + * If we found a record, the commit timestamp written to our file better be no * larger than the checkpoint one. */ - printf("%s: COLLECTION record with key %" PRIu64 " record ts %" PRIu64 + printf("%s: COLLECTION record with key %" PRIu64 " commit record ts %" PRIu64 " > stable ts %" PRIu64 "\n", - fname, key, stable_fp, stable_val); + fname, key, commit_fp, stable_val); fatal = true; } else if ((ret = cur_shadow->search(cur_shadow)) != 0) /* Collection and shadow both have the data. */ diff --git a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c index d10869263c0..0ca9d329cbb 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c @@ -91,10 +91,10 @@ static const char *const list[] = {",cache_overhead=13", ",cache_overhead=27", " ",verbose=(\"compact\")", ",verbose=(\"evict\")", ",verbose=(\"evictserver\")", ",verbose=(\"fileops\")", ",verbose=(\"handleops\")", ",verbose=(\"log\")", ",verbose=(\"lsm\")", ",verbose=(\"lsm_manager\")", ",verbose=(\"metadata\")", ",verbose=(\"mutex\")", - ",verbose=(\"overflow\")", ",verbose=(\"read\")", ",verbose=(\"rebalance\")", - ",verbose=(\"reconcile\")", ",verbose=(\"recovery\")", ",verbose=(\"salvage\")", - ",verbose=(\"shared_cache\")", ",verbose=(\"split\")", ",verbose=(\"transaction\")", - ",verbose=(\"verify\")", ",verbose=(\"version\")", ",verbose=(\"write\")", ",verbose=()"}; + ",verbose=(\"overflow\")", ",verbose=(\"read\")", ",verbose=(\"reconcile\")", + ",verbose=(\"recovery\")", ",verbose=(\"salvage\")", ",verbose=(\"shared_cache\")", + ",verbose=(\"split\")", ",verbose=(\"transaction\")", ",verbose=(\"verify\")", + ",verbose=(\"version\")", ",verbose=(\"write\")", ",verbose=()"}; static int handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message) diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index b62cbabff56..0e01d500bc8 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -285,8 +285,10 @@ functions: set -o errexit set -o verbose - # Dump stderr/stdout contents generated by the C libraries onto console for Python tests - find "WT_TEST" -name "std*.txt" ! -empty -printf "\nContents from '%p':\n\n" -exec cat {} \; + if [ -d "WT_TEST" ]; then + # Dump stderr/stdout contents generated by the C libraries onto console for Python tests + find "WT_TEST" -name "std*.txt" ! -empty -exec sh -c "echo 'Contents from {}:'; cat {}" \; + fi "checkpoint test": command: shell.exec @@ -1911,10 +1913,9 @@ tasks: - func: "format test" vars: extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=snappy logging=1 logging_compression=snappy logging_prealloc=1 - # FIXME-WT-6410: temporarily disable rebalance test - # - func: "format test" - # vars: - # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row alter=1 backups=1 compaction=1 data_extend=1 prepare=1 rebalance=1 salvage=1 statistics=1 statistics_server=1 verify=1 + - func: "format test" + vars: + extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row alter=1 backups=1 compaction=1 data_extend=1 prepare=1 salvage=1 statistics=1 statistics_server=1 verify=1 - func: "format test" vars: extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row firstfit=1 internal_key_truncation=1 diff --git a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh index bf5f9fed505..8dde932716d 100755 --- a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh +++ b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh @@ -75,7 +75,6 @@ run_format() args+="leak_memory=1 " # Faster runs args+="logging=1 " # Test log compatibility args+="logging_compression=snappy " # We only built with snappy, force the choice - args+="rebalance=0 " # Faster runs args+="rows=1000000 " args+="salvage=0 " # Faster runs args+="timer=4 " diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c index b6073f333ab..214f25c0c2e 100644 --- a/src/third_party/wiredtiger/test/fops/file.c +++ b/src/third_party/wiredtiger/test/fops/file.c @@ -231,21 +231,6 @@ obj_checkpoint(void) } void -obj_rebalance(void) -{ - WT_SESSION *session; - int ret; - - testutil_check(conn->open_session(conn, NULL, NULL, &session)); - - if ((ret = session->rebalance(session, uri, NULL)) != 0) - if (ret != ENOENT && ret != EBUSY) - testutil_die(ret, "session.rebalance"); - - testutil_check(session->close(session, NULL)); -} - -void obj_upgrade(void) { WT_SESSION *session; diff --git a/src/third_party/wiredtiger/test/fops/fops.c b/src/third_party/wiredtiger/test/fops/fops.c index ca68c765b05..9f94e9383e6 100644 --- a/src/third_party/wiredtiger/test/fops/fops.c +++ b/src/third_party/wiredtiger/test/fops/fops.c @@ -39,7 +39,6 @@ typedef struct { int create_unique; /* session.create of new file */ int cursor; /* session.open_cursor */ int drop; /* session.drop */ - int rebalance; /* session.rebalance */ int upgrade; /* session.upgrade */ int verify; /* session.verify */ } STATS; @@ -99,7 +98,7 @@ fop(void *arg) __wt_random_init(&rnd); for (i = 0; i < nops; ++i, __wt_yield()) - switch (__wt_random(&rnd) % 10) { + switch (__wt_random(&rnd) % 9) { case 0: ++s->bulk; obj_bulk(); @@ -125,18 +124,14 @@ fop(void *arg) obj_upgrade(); break; case 6: - ++s->rebalance; - obj_rebalance(); - break; - case 7: ++s->verify; obj_verify(); break; - case 8: + case 7: ++s->bulk_unique; obj_bulk_unique(__wt_random(&rnd) & 1); break; - case 9: + case 8: ++s->create_unique; obj_create_unique(__wt_random(&rnd) & 1); break; @@ -162,7 +157,7 @@ print_stats(u_int nthreads) "\t" "bulk %3d, checkpoint %3d, create %3d, cursor %3d,\n" "\t" - "drop %3d, rebalance %3d, upgrade %3d, verify %3d\n", + "drop %3d, upgrade %3d, verify %3d\n", id, s->bulk + s->bulk_unique, s->ckpt, s->create + s->create_unique, s->cursor, s->drop, - s->rebalance, s->upgrade, s->verify); + s->upgrade, s->verify); } diff --git a/src/third_party/wiredtiger/test/fops/thread.h b/src/third_party/wiredtiger/test/fops/thread.h index 16518ee20c4..5f8ac9c1125 100644 --- a/src/third_party/wiredtiger/test/fops/thread.h +++ b/src/third_party/wiredtiger/test/fops/thread.h @@ -48,6 +48,5 @@ void obj_create(void); void obj_create_unique(int); void obj_cursor(void); void obj_drop(int); -void obj_rebalance(void); void obj_upgrade(void); void obj_verify(void); diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am index 8d0c3971494..aa49dc6d732 100644 --- a/src/third_party/wiredtiger/test/format/Makefile.am +++ b/src/third_party/wiredtiger/test/format/Makefile.am @@ -5,7 +5,7 @@ AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = t t_SOURCES =\ alter.c backup.c bulk.c checkpoint.c compact.c config.c config_compat.c hs.c kv.c ops.c \ - random.c rebalance.c salvage.c snap.c t.c trace.c util.c wts.c + random.c salvage.c snap.c t.c trace.c util.c wts.c t_LDADD = $(top_builddir)/test/utility/libtest_util.la t_LDADD +=$(top_builddir)/libwiredtiger.la diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index d19f5cd447d..5cb994e5c81 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -623,11 +623,6 @@ config_directio(void) * format just hung, and the 15-minute timeout isn't effective. We could play games to handle * child process termination, but it's not worth the effort. */ - if (g.c_rebalance) { - if (config_is_perm("ops.rebalance")) - testutil_die(EINVAL, "direct I/O is incompatible with rebalance configurations"); - config_single("ops.rebalance=off", false); - } if (g.c_salvage) { if (config_is_perm("ops.salvage")) testutil_die(EINVAL, "direct I/O is incompatible with salvage configurations"); @@ -707,8 +702,6 @@ config_in_memory(void) return; if (config_is_perm("ops.hs_cursor")) return; - if (config_is_perm("ops.rebalance")) - return; if (config_is_perm("ops.salvage")) return; if (config_is_perm("ops.verify")) @@ -740,8 +733,6 @@ config_in_memory_reset(void) config_single("ops.hs_cursor=off", false); if (!config_is_perm("logging")) config_single("logging=off", false); - if (!config_is_perm("ops.rebalance")) - config_single("ops.rebalance=off", false); if (!config_is_perm("ops.salvage")) config_single("ops.salvage=off", false); if (!config_is_perm("ops.verify")) @@ -927,12 +918,6 @@ config_transaction(void) testutil_die(EINVAL, "timestamps require transaction frequency set to 100"); } - /* FIXME-WT-6410: temporarily disable rebalance with timestamps. */ - if (g.c_txn_timestamps && g.c_rebalance) { - if (config_is_perm("ops.rebalance")) - testutil_die(EINVAL, "rebalance cannot run with timestamps"); - config_single("ops.rebalance=off", false); - } /* FIXME-WT-6431: temporarily disable salvage with timestamps. */ if (g.c_txn_timestamps && g.c_salvage) { if (config_is_perm("ops.salvage")) diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index f1aa17e85fd..605ae4655fb 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -251,9 +251,6 @@ static CONFIG c[] = { NULL}, /* 100% */ - {"ops.rebalance", "configure rebalance", C_BOOL, 100, 1, 0, &g.c_rebalance, NULL}, - - /* 100% */ {"ops.salvage", "configure salvage", C_BOOL, 100, 1, 0, &g.c_salvage, NULL}, /* 100% */ diff --git a/src/third_party/wiredtiger/test/format/config_compat.c b/src/third_party/wiredtiger/test/format/config_compat.c index 93616108769..69011fddfea 100644 --- a/src/third_party/wiredtiger/test/format/config_compat.c +++ b/src/third_party/wiredtiger/test/format/config_compat.c @@ -149,8 +149,6 @@ static const char *list[] = { "ops.random_cursor", "read_pct=", "ops.pct.read", - "rebalance=", - "ops.rebalance", "repeat_data_pct=", "btree.repeat_data_pct", "reverse=", diff --git a/src/third_party/wiredtiger/test/format/config_compat.sed b/src/third_party/wiredtiger/test/format/config_compat.sed index 73d867a0d55..5b138afb355 100644 --- a/src/third_party/wiredtiger/test/format/config_compat.sed +++ b/src/third_party/wiredtiger/test/format/config_compat.sed @@ -57,7 +57,6 @@ s/^ops.pct.read=/read_pct=/ s/^ops.pct.write=/write_pct=/ s/^ops.prepare=/prepare=/ s/^ops.random_cursor=/random_cursor=/ -s/^ops.rebalance=/rebalance=/ s/^ops.salvage=/salvage=/ s/^ops.truncate=/truncate=/ s/^ops.verify=/verify=/ diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index c6045c291b3..05645d4c37a 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -202,7 +202,6 @@ typedef struct { uint32_t c_quiet; uint32_t c_random_cursor; uint32_t c_read_pct; - uint32_t c_rebalance; uint32_t c_repeat_data_pct; uint32_t c_reverse; uint32_t c_rows; @@ -435,7 +434,6 @@ void wts_dump(const char *, bool); void wts_load(void); void wts_open(const char *, WT_CONNECTION **, WT_SESSION **, bool); void wts_read_scan(void); -void wts_rebalance(void); void wts_reopen(void); void wts_salvage(void); void wts_stats(void); diff --git a/src/third_party/wiredtiger/test/format/rebalance.c b/src/third_party/wiredtiger/test/format/rebalance.c deleted file mode 100644 index c17540d1a18..00000000000 --- a/src/third_party/wiredtiger/test/format/rebalance.c +++ /dev/null @@ -1,68 +0,0 @@ -/*- - * Public Domain 2014-2020 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "format.h" - -#define REBALANCE_COPY_CMD "../../wt -h %s dump -f %s/REBALANCE.%s %s" -#define REBALANCE_CMP_CMD "cmp %s/REBALANCE.orig %s/REBALANCE.new > /dev/null" - -void -wts_rebalance(void) -{ - WT_CONNECTION *conn; - WT_SESSION *session; - size_t len; - char *cmd; - - if (g.c_rebalance == 0) - return; - - track("rebalance", 0ULL, NULL); - - /* Dump the current object */ - len = strlen(g.home) * 2 + strlen(g.uri) + strlen(REBALANCE_COPY_CMD) + 100; - cmd = dmalloc(len); - testutil_check(__wt_snprintf(cmd, len, REBALANCE_COPY_CMD, g.home, g.home, "orig", g.uri)); - testutil_checkfmt(system(cmd), "command failed: %s", cmd); - - /* Open the database, rebalance and verify the object, then close the database. */ - wts_open(g.home, &conn, &session, true); - testutil_check(session->rebalance(session, g.uri, NULL)); - testutil_check(session->verify(session, g.uri, "strict")); - wts_close(&conn, &session); - - /* Dump the rebalanced object. */ - testutil_check(__wt_snprintf(cmd, len, REBALANCE_COPY_CMD, g.home, g.home, "new", g.uri)); - testutil_checkfmt(system(cmd), "command failed: %s", cmd); - - /* Compare the old/new dumps of the object. */ - testutil_check(__wt_snprintf(cmd, len, REBALANCE_CMP_CMD, g.home, g.home)); - testutil_checkfmt(system(cmd), "command failed: %s", cmd); - - free(cmd); -} diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh index 82162874d9e..256789d1a1f 100755 --- a/src/third_party/wiredtiger/test/format/smoke.sh +++ b/src/third_party/wiredtiger/test/format/smoke.sh @@ -17,6 +17,3 @@ args="$args runs.threads=4 " # $TEST_WRAPPER ./t $args runs.type=var $TEST_WRAPPER ./t $args runs.type=row -# Force a rebalance to occur with statistics logging to test the utility -# FIXME-WT-6410: temporarily disable running rebalance with timestamps -# $TEST_WRAPPER ./t $args runs.type=row statistics.server=1 ops.rebalance=1 diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c index 8a447b806c3..b1ae8c4ca02 100644 --- a/src/third_party/wiredtiger/test/format/t.c +++ b/src/third_party/wiredtiger/test/format/t.c @@ -248,13 +248,12 @@ main(int argc, char *argv[]) /* * Calculate how long each operations loop should run. Take any timer value and convert it to - * seconds, then allocate 15 seconds to do initialization, verification, rebalance and/or - * salvage tasks after the operations loop finishes. This is not intended to be exact in any - * way, just enough to get us into an acceptable range of run times. The reason for this is - * because we want to consume the legitimate run-time, but we also need to do the end-of-run - * checking in all cases, even if we run out of time, otherwise it won't get done. So, in - * summary pick a reasonable time and then don't check for timer expiration once the main - * operations loop completes. + * seconds, then allocate 15 seconds to do initialization, verification and/or salvage tasks + * after the operations loop finishes. This is not intended to be exact in any way, just enough + * to get us into an acceptable range of run times. The reason for this is because we want to + * consume the legitimate run-time, but we also need to do the end-of-run checking in all cases, + * even if we run out of time, otherwise it won't get done. So, in summary pick a reasonable + * time and then don't check for timer expiration once the main operations loop completes. */ ops_seconds = g.c_timer == 0 ? 0 : ((g.c_timer * 60) - 15) / FORMAT_OPERATION_REPS; @@ -304,11 +303,6 @@ main(int argc, char *argv[]) wts_close(&g.wts_conn, &g.wts_session); /* - * Rebalance testing. - */ - TIMED_MAJOR_OP(wts_rebalance()); - - /* * Salvage testing. */ TIMED_MAJOR_OP(wts_salvage()); diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index c55721afc25..a3295d16f88 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -467,8 +467,8 @@ wts_close(WT_CONNECTION **connp, WT_SESSION **sessionp) /* * If running in-memory, there's only a single, shared WT_CONNECTION handle. Format currently * doesn't perform the operations coded to close and then re-open the database on in-memory - * databases (for example, salvage or rebalance), so the close gets all references, it doesn't - * have to avoid closing the real handle. + * databases (for example, salvage), so the close gets all references, it doesn't have to avoid + * closing the real handle. */ if (conn == g.wts_conn_inmemory) g.wts_conn_inmemory = NULL; diff --git a/src/third_party/wiredtiger/test/suite/test_bug019.py b/src/third_party/wiredtiger/test/suite/test_bug019.py index a5af1f33d0d..fc7ca13608c 100755 --- a/src/third_party/wiredtiger/test/suite/test_bug019.py +++ b/src/third_party/wiredtiger/test/suite/test_bug019.py @@ -43,6 +43,12 @@ class test_bug019(wttest.WiredTigerTestCase): # Modify rows so we write log records. We're writing a lot more than a # single log file, so we know the underlying library will churn through # log files. + def get_prealloc_used(self): + stat_cursor = self.session.open_cursor('statistics:', None, None) + prealloc = stat_cursor[stat.conn.log_prealloc_used][2] + stat_cursor.close() + return prealloc + def get_prealloc_stat(self): stat_cursor = self.session.open_cursor('statistics:', None, None) prealloc = stat_cursor[stat.conn.log_prealloc_max][2] @@ -72,11 +78,11 @@ class test_bug019(wttest.WiredTigerTestCase): # assert a file is created within 90 seconds. def prepfiles(self): for i in range(1,90): - f = fnmatch.filter(os.listdir('.'), "*Prep*") - if f: - return f - time.sleep(1.0) - self.assertFalse(not f) + f = fnmatch.filter(os.listdir('.'), "*Prep*") + if f: + return + time.sleep(1.0) + self.fail('No pre-allocated files created after 90 seconds') # There was a bug where pre-allocated log files accumulated on # Windows systems due to an issue with the directory list code. @@ -97,23 +103,22 @@ class test_bug019(wttest.WiredTigerTestCase): # Loop, making sure pre-allocation is working and the range is moving. self.pr("Check pre-allocation range is moving") - older = self.prepfiles() + # Wait for pre-allocation to start. + self.prepfiles() + used = self.get_prealloc_used() for i in range(1, 10): self.populate(self.entries) - newer = self.prepfiles() + newused = self.get_prealloc_used() + self.pr("Iteration " + str(i)) + self.pr("previous used " + str(used) + " now " + str(newused)) - # Files can be returned in any order when reading a directory, older - # pre-allocated files can persist longer than newer files when newer - # files are returned first. Confirm files are being consumed. - if set(older) < set(newer): + # Make sure we're consuming pre-allocated files. + if used >= newused: self.pr("FAILURE on Iteration " + str(i)) - self.pr("FAILURE: Older") - self.pr(str(older)) - self.pr("FAILURE: Newer") - self.pr(str(newer)) - self.assertFalse(set(older) < set(newer)) + self.pr("FAILURE: previous used " + str(used) + " now " + str(newused)) + self.assertTrue(used < newused) + used = newused - older = newer self.session.checkpoint() # Wait for a long time for pre-allocate to drop in an idle system diff --git a/src/third_party/wiredtiger/test/suite/test_gc01.py b/src/third_party/wiredtiger/test/suite/test_gc01.py index 591d142d3ba..717c6f434bb 100755 --- a/src/third_party/wiredtiger/test/suite/test_gc01.py +++ b/src/third_party/wiredtiger/test/suite/test_gc01.py @@ -79,7 +79,7 @@ class test_gc_base(wttest.WiredTigerTestCase): self.assertGreaterEqual(c[stat.conn.cc_pages_removed][2], 0) c.close() -# Test that checkpoint cleans the obsolete lookaside pages. +# Test that checkpoint cleans the obsolete history store pages. class test_gc01(test_gc_base): # Force a small cache. conn_config = ('cache_size=50MB,eviction_updates_trigger=95,eviction_updates_target=80,' diff --git a/src/third_party/wiredtiger/test/suite/test_import01.py b/src/third_party/wiredtiger/test/suite/test_import01.py index f6c67a82c3b..7b7878650e8 100644 --- a/src/third_party/wiredtiger/test/suite/test_import01.py +++ b/src/third_party/wiredtiger/test/suite/test_import01.py @@ -27,34 +27,53 @@ # OTHER DEALINGS IN THE SOFTWARE. # # test_import01.py -# Import a file into a running database. +# Import a file into a running database for the following scenarios: +# - The source database and destination database are different. +# - The source database and destination database are the same. -import os, re, shutil +import os, random, re, shutil, string import wiredtiger, wttest -def timestamp_str(t): - return '%x' % t +# Shared base class used by import tests. +class test_import_base(wttest.WiredTigerTestCase): -class test_import01(wttest.WiredTigerTestCase): - conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' - session_config = 'isolation=snapshot' + # Insert or update a key/value at the supplied timestamp. + def update(self, uri, key, value, ts): + cursor = self.session.open_cursor(uri) + self.session.begin_transaction() + if type(value) in [list, tuple]: + cursor.set_key(key) + cursor.set_value(*value) + cursor.insert() + else: + cursor[key] = value + self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts)) + cursor.close() - def update(self, uri, key, value, commit_ts): + def delete(self, uri, key, ts): cursor = self.session.open_cursor(uri) self.session.begin_transaction() - cursor[key] = value - self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts)) + cursor.set_key(key) + self.assertEqual(0, cursor.remove()) + self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts)) cursor.close() - def check(self, uri, key, value, read_ts): + # Verify the specified key/value is visible at the supplied timestamp. + def check_record(self, uri, key, value): cursor = self.session.open_cursor(uri) - self.session.begin_transaction('read_timestamp=' + timestamp_str(read_ts)) cursor.set_key(key) self.assertEqual(0, cursor.search()) self.assertEqual(value, cursor.get_value()) - self.session.rollback_transaction() cursor.close() + # Verify a range of records/timestamps. + def check(self, uri, keys, values): + for i in range(len(keys)): + if type(values[i]) in [tuple]: + self.check_record(uri, keys[i], list(values[i])) + else: + self.check_record(uri, keys[i], values[i]) + # We know the ID can be different between configs, so just remove it from comparison. # Everything else should be the same. def config_compare(self, aconf, bconf): @@ -66,60 +85,68 @@ class test_import01(wttest.WiredTigerTestCase): re.findall('\w+=\(.*?\)+', b)) self.assertTrue(a.sort() == b.sort()) - # Helper for populating a database to simulate importing files into an existing database. - def populate(self): - # Create file:test_import01_[1-100]. - for fileno in range(1, 100): - uri = 'file:test_import01_{}'.format(fileno) + # Populate a database with N tables, each having M rows. + def populate(self, ntables, nrows): + for table in range(0, ntables): + uri = 'table:test_import_{}'.format( + ''.join(random.choice(string.ascii_letters) for i in range(10))) self.session.create(uri, 'key_format=i,value_format=S') cursor = self.session.open_cursor(uri) - # Insert keys [1-100] with value 'foo'. - for key in range(1, 100): - cursor[key] = 'foo' + for key in range(0, nrows): + cursor[key] = 'value_{}_{}'.format(table, key) cursor.close() - def copy_file(self, file_name, old_dir, new_dir): - old_path = os.path.join(old_dir, file_name) - if os.path.isfile(old_path) and "WiredTiger.lock" not in file_name and \ + # Copy a file from a source directory to a destination directory. + def copy_file(self, file_name, src_dir, dest_dir): + src_path = os.path.join(src_dir, file_name) + if os.path.isfile(src_path) and "WiredTiger.lock" not in file_name and \ "Tmplog" not in file_name and "Preplog" not in file_name: - shutil.copy(old_path, new_dir) + shutil.copy(src_path, dest_dir) - def test_file_import(self): - original_db_file = 'original_db_file' - uri = 'file:' + original_db_file + # Convert a WiredTiger timestamp to a string. + def timestamp_str(self, t): + return '%x' % t - create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' - self.session.create(uri, create_config) +# test_import01 +class test_import01(test_import_base): - key1 = b'1' - key2 = b'2' - key3 = b'3' - key4 = b'4' - value1 = b'\x01\x02aaa\x03\x04' - value2 = b'\x01\x02bbb\x03\x04' - value3 = b'\x01\x02ccc\x03\x04' - value4 = b'\x01\x02ddd\x03\x04' + conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' + session_config = 'isolation=snapshot' - # Add some data. - self.update(uri, key1, value1, 10) - self.update(uri, key2, value2, 20) + original_db_file = 'original_db_file' + uri = 'file:' + original_db_file - # Perform a checkpoint. - self.session.checkpoint() + nrows = 100 + ntables = 10 + keys = [b'1', b'2', b'3', b'4', b'5', b'6'] + values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04', + b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04'] + ts = [10*k for k in range(1, len(keys)+1)] + create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' - # Add more data. - self.update(uri, key3, value3, 30) - self.update(uri, key4, value4, 40) + def test_file_import(self): + self.session.create(self.uri, self.create_config) - # Perform a checkpoint. + # Add data and perform a checkpoint. + min_idx = 0 + max_idx = len(self.keys) // 3 + for i in range(min_idx, max_idx): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) + self.session.checkpoint() + + # Add more data and checkpoint again. + min_idx = max_idx + max_idx = 2*len(self.keys) // 3 + for i in range(min_idx, max_idx): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) self.session.checkpoint() # Export the metadata for the table. c = self.session.open_cursor('metadata:', None, None) - original_db_file_config = c[uri] + original_db_file_config = c[self.uri] c.close() - self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config) + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) # Close the connection. self.close_conn() @@ -132,107 +159,94 @@ class test_import01(wttest.WiredTigerTestCase): self.session = self.setUpSessionOpen(self.conn) # Make a bunch of files and fill them with data. - self.populate() + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[max_idx])) # Copy over the datafiles for the object we want to import. - self.copy_file(original_db_file, '.', newdir) + self.copy_file(self.original_db_file, '.', newdir) # Contruct the config string. import_config = 'import=(enabled,repair=false,file_metadata=(' + \ original_db_file_config + '))' # Import the file. - self.session.create(uri, import_config) + self.session.create(self.uri, import_config) # Verify object. - self.session.verify(uri) + self.session.verify(self.uri) # Check that the previously inserted values survived the import. - self.check(uri, key1, value1, 10) - self.check(uri, key2, value2, 20) - self.check(uri, key3, value3, 30) - self.check(uri, key4, value4, 40) + self.check(self.uri, self.keys[:max_idx], self.values[:max_idx]) # Compare configuration metadata. c = self.session.open_cursor('metadata:', None, None) - current_db_file_config = c[uri] + current_db_file_config = c[self.uri] c.close() self.config_compare(original_db_file_config, current_db_file_config) - key5 = b'5' - key6 = b'6' - value5 = b'\x01\x02eee\x03\x04' - value6 = b'\x01\x02fff\x03\x04' - - # Add some data and check that the file operates as usual after importing. - self.update(uri, key5, value5, 50) - self.update(uri, key6, value6, 60) - - self.check(uri, key5, value5, 50) - self.check(uri, key6, value6, 60) + # Add some data and check that the table operates as usual after importing. + min_idx = max_idx + max_idx = len(self.keys) + for i in range(min_idx, max_idx): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) + self.check(self.uri, self.keys, self.values) # Perform a checkpoint. self.session.checkpoint() def test_file_import_dropped_file(self): - original_db_file = 'original_db_file' - uri = 'file:' + original_db_file - - create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' - self.session.create(uri, create_config) - - key1 = b'1' - key2 = b'2' - value1 = b'\x01\x02aaa\x03\x04' - value2 = b'\x01\x02bbb\x03\x04' + self.session.create(self.uri, self.create_config) - # Add some data. - self.update(uri, key1, value1, 10) - self.update(uri, key2, value2, 20) - - # Perform a checkpoint. + # Add data and perform a checkpoint. + for i in range(0, len(self.keys)): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) self.session.checkpoint() # Export the metadata for the table. c = self.session.open_cursor('metadata:', None, None) - original_db_file_config = c[uri] + original_db_file_config = c[self.uri] c.close() - self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config) + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) # Make a bunch of files and fill them with data. - self.populate() + self.populate(self.ntables, self.nrows) + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1])) # Make a copy of the data file that we're about to drop. backup_dir = 'BACKUP' shutil.rmtree(backup_dir, ignore_errors=True) os.mkdir(backup_dir) - self.copy_file(original_db_file, '.', backup_dir) + self.copy_file(self.original_db_file, '.', backup_dir) # Drop the table. # We'll be importing it back into our database shortly. - self.session.drop(uri) + self.session.drop(self.uri) # Now copy it back to our database directory. - self.copy_file(original_db_file, backup_dir, '.') + self.copy_file(self.original_db_file, backup_dir, '.') # Contruct the config string. import_config = 'import=(enabled,repair=false,file_metadata=(' + \ original_db_file_config + '))' # Import the file. - self.session.create(uri, import_config) + self.session.create(self.uri, import_config) # Verify object. - self.session.verify(uri) + self.session.verify(self.uri) # Check that the previously inserted values survived the import. - self.check(uri, key1, value1, 10) - self.check(uri, key2, value2, 20) + self.check(self.uri, self.keys, self.values) # Compare configuration metadata. c = self.session.open_cursor('metadata:', None, None) - current_db_file_config = c[uri] + current_db_file_config = c[self.uri] c.close() self.config_compare(original_db_file_config, current_db_file_config) diff --git a/src/third_party/wiredtiger/test/suite/test_import02.py b/src/third_party/wiredtiger/test/suite/test_import02.py index 6fda30d8899..9eba57e4d42 100644 --- a/src/third_party/wiredtiger/test/suite/test_import02.py +++ b/src/third_party/wiredtiger/test/suite/test_import02.py @@ -31,58 +31,31 @@ import os, shutil import wiredtiger, wttest +from test_import01 import test_import_base -def timestamp_str(t): - return '%x' % t - -class test_import02(wttest.WiredTigerTestCase): +class test_import02(test_import_base): conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' session_config = 'isolation=snapshot' - def update(self, uri, key, value, commit_ts): - cursor = self.session.open_cursor(uri) - self.session.begin_transaction() - cursor[key] = value - self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts)) - cursor.close() + original_db_file = 'original_db_file' + uri = 'file:' + original_db_file - # Helper for populating a database to simulate importing files into an existing database. - def populate(self): - # Create file:test_import02_[1-100]. - for fileno in range(1, 100): - uri = 'file:test_import02_{}'.format(fileno) - self.session.create(uri, 'key_format=i,value_format=S') - cursor = self.session.open_cursor(uri) - # Insert keys [1-100] with value 'foo'. - for key in range(1, 100): - cursor[key] = 'foo' - cursor.close() - - def copy_file(self, file_name, old_dir, new_dir): - old_path = os.path.join(old_dir, file_name) - if os.path.isfile(old_path) and "WiredTiger.lock" not in file_name and \ - "Tmplog" not in file_name and "Preplog" not in file_name: - shutil.copy(old_path, new_dir) + nrows = 100 + ntables = 10 + keys = [b'1', b'2', b'3', b'4', b'5', b'6'] + values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04', + b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04'] + ts = [10*k for k in range(1, len(keys)+1)] + create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' # The cases where 'file_metadata' is empty or the config option itself is missing entirely are # almost identical. Let's capture this in a helper and call them from each test. def no_metadata_helper(self, import_config): - original_db_file = 'original_db_file' - uri = 'file:' + original_db_file - - create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' - self.session.create(uri, create_config) - - key1 = b'1' - key2 = b'2' - value1 = b'\x01\x02aaa\x03\x04' - value2 = b'\x01\x02bbb\x03\x04' - - # Add some data. - self.update(uri, key1, value1, 10) - self.update(uri, key2, value2, 20) + self.session.create(self.uri, self.create_config) - # Perform a checkpoint. + # Add data and perform a checkpoint. + for i in range(0, len(self.keys)): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) self.session.checkpoint() # Close the connection. @@ -95,15 +68,18 @@ class test_import02(wttest.WiredTigerTestCase): self.conn = self.setUpConnectionOpen(newdir) self.session = self.setUpSessionOpen(self.conn) + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1])) + # Copy over the datafiles for the object we want to import. - self.copy_file(original_db_file, '.', newdir) + self.copy_file(self.original_db_file, '.', newdir) # Import the file. # Since we need "file_metadata" without the "repair" option, we should expect an error here. with self.expectedStderrPattern( 'file:original_db_file: import requires that \'file_metadata\' is specified'): self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: self.session.create(uri, import_config)) + lambda: self.session.create(self.uri, import_config)) def test_file_import_empty_metadata(self): self.no_metadata_helper('import=(enabled,repair=false,file_metadata="")') @@ -112,34 +88,25 @@ class test_import02(wttest.WiredTigerTestCase): self.no_metadata_helper('import=(enabled,repair=false)') def test_file_import_existing_uri(self): - original_db_file = 'original_db_file' - uri = 'file:' + original_db_file - - create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' - self.session.create(uri, create_config) - - key1 = b'1' - key2 = b'2' - - value1 = b'\x01\x02aaa\x03\x04' - value2 = b'\x01\x02bbb\x03\x04' + self.session.create(self.uri, self.create_config) - # Add some data. - self.update(uri, key1, value1, 10) - self.update(uri, key2, value2, 20) - - # Perform a checkpoint. + # Add data and perform a checkpoint. + for i in range(0, len(self.keys)): + self.update(self.uri, self.keys[i], self.values[i], self.ts[i]) self.session.checkpoint() # Export the metadata for the table. c = self.session.open_cursor('metadata:', None, None) - original_db_file_config = c[uri] + original_db_file_config = c[self.uri] c.close() - self.printVerbose(3, '\nFILE CONFIG\n' + original_db_file_config) + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) # Make a bunch of files and fill them with data. - self.populate() + self.populate(self.ntables, self.nrows) + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1])) # Contruct the config string. import_config = 'import=(enabled,repair=false,file_metadata=(' + \ @@ -148,24 +115,23 @@ class test_import02(wttest.WiredTigerTestCase): # Try to import the file even though it already exists in our database. # We should get an error back. self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: self.session.create(uri, import_config)) + lambda: self.session.create(self.uri, import_config)) def test_import_file_missing_file(self): - original_db_file = 'original_db_file' - uri = 'file:' + original_db_file - # Make a bunch of files and fill them with data. - self.populate() - + self.populate(self.ntables, self.nrows) self.session.checkpoint() # Export the metadata for one of the files we made. # We just need an example of what a file configuration would typically look like. - c = self.session.open_cursor('metadata:', None, None) - example_db_file_config = c['file:test_import02_1'] - c.close() + cursor = self.session.open_cursor('metadata:', None, None) + for k, v in cursor: + if k.startswith('table:'): + example_db_file_config = cursor[k] + break + cursor.close() - self.printVerbose(3, '\nFILE CONFIG\n' + example_db_file_config) + self.printVerbose(3, '\nFile configuration:\n' + example_db_file_config) # Contruct the config string. import_config = 'import=(enabled,repair=false,file_metadata=(' + \ @@ -173,7 +139,5 @@ class test_import02(wttest.WiredTigerTestCase): # Try to import a file that doesn't exist on disk. # We should get an error back. - with self.expectedStderrPattern( - 'file:original_db_file: attempted to import file that does not exist'): - self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: self.session.create(uri, import_config)) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.create(self.uri, import_config), '/No such file or directory/') diff --git a/src/third_party/wiredtiger/test/suite/test_import03.py b/src/third_party/wiredtiger/test/suite/test_import03.py new file mode 100644 index 00000000000..749615274cd --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_import03.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_import03.py +# Import a table into a running database. + +import os, random, shutil +from wtscenario import make_scenarios +from test_import01 import test_import_base + +class test_import03(test_import_base): + conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' + session_config = 'isolation=snapshot' + + ntables = 10 + nrows = 100 + scenarios = make_scenarios([ + ('simple_table', dict( + is_simple = True, + keys = [k for k in range(1, nrows+1)], + values = random.sample(range(1000000), k=nrows), + config = 'key_format=r,value_format=i')), + ('table_with_named_columns', dict( + is_simple = False, + keys = [k for k in range(1, 7)], + values = [('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3), + ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)], + config = 'columns=(id,country,capital,population),key_format=r,value_format=SSi')), + ]) + + # Test something table specific like a projection. + def check_projections(self, uri, keys, values): + for i in range(0, len(keys)): + self.check_record(uri + '(country,capital)', + keys[i], [values[i][0], values[i][1]]) + self.check_record(uri + '(country,population)', + keys[i], [values[i][0], values[i][2]]) + self.check_record(uri + '(capital,population)', + keys[i], [values[i][1], values[i][2]]) + + def test_table_import(self): + # Add some data and checkpoint. + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + original_db_table = 'original_db_table' + uri = 'table:' + original_db_table + create_config = 'allocation_size=512,log=(enabled=true),' + self.config + self.session.create(uri, create_config) + + keys = self.keys + values = self.values + ts = [10*k for k in range(1, len(keys)+1)] + + # Add data and perform a checkpoint. + min_idx = 0 + max_idx = len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Add more data and checkpoint again. + min_idx = max_idx + max_idx = 2*len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Export the metadata for the table. + original_db_file_uri = 'file:' + original_db_table + '.wt' + c = self.session.open_cursor('metadata:', None, None) + original_db_table_config = c[uri] + original_db_file_config = c[original_db_file_uri] + c.close() + + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) + self.printVerbose(3, '\nTable configuration:\n' + original_db_table_config) + + # Contruct the config string. + import_config = '{},import=(enabled,repair=false,file_metadata=({}))'.format( + original_db_table_config, original_db_file_config) + + # Close the connection. + self.close_conn() + + # Create a new database and connect to it. + newdir = 'IMPORT_DB' + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + self.conn = self.setUpConnectionOpen(newdir) + self.session = self.setUpSessionOpen(self.conn) + + # Make a bunch of files and fill them with data. + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx])) + + # Copy over the datafiles for the object we want to import. + self.copy_file(original_db_table + '.wt', '.', newdir) + + # Import the table. + self.session.create(uri, import_config) + + # Verify object. + self.session.verify(uri) + + # Check that the previously inserted values survived the import. + self.check(uri, keys[:max_idx], values[:max_idx]) + + # Check against projections when the table is not simple. + if not self.is_simple: + self.check_projections(uri, keys[:max_idx], values[:max_idx]) + + # Compare configuration metadata. + c = self.session.open_cursor('metadata:', None, None) + current_db_table_config = c[uri] + c.close() + self.config_compare(original_db_table_config, current_db_table_config) + + # Add some data and check that the table operates as usual after importing. + min_idx = max_idx + max_idx = len(keys) + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.check(uri, keys, values) + if not self.is_simple: + self.check_projections(uri, keys, values) + + # Perform a checkpoint. + self.session.checkpoint() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_import04.py b/src/third_party/wiredtiger/test/suite/test_import04.py new file mode 100644 index 00000000000..14202754c36 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_import04.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_import04.py +# Test success and failure scenarios for importing a table into a running database. +# 1. Attempt to import a table into a destination database where a table object of +# that name already exists. +# Expected outcome: FAILURE +# 2. Drop a table from a database without removing the data files, then attempt to +# import that table into the same database. +# Expected outcome: SUCCESS +# 3. Attempt to import a table into a destination database where the required data +# files do not exist in the destination database directory. +# Expected outcome: FAILURE +# 4. Attempt to import a table into a destination database without specifying the +# exported table configuration. +# Expected outcome: FAILURE +# 5. Attempt to import a table into a destination database without specifying the +# exported file configuration. +# Expected outcome: FAILURE +# 6. Attempt to import a table into a destination database with the exported +# configuration strings supplied, the required data files are present and the +# table object does not already exist in the destination database. +# Expected outcome: SUCCESS + +import os, random, shutil +import wiredtiger, wttest +from wtscenario import make_scenarios +from test_import01 import test_import_base + +class test_import04(test_import_base): + conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' + session_config = 'isolation=snapshot' + + ntables = 10 + nrows = 100 + scenarios = make_scenarios([ + ('simple_table', dict( + is_simple = True, + keys=[k for k in range(1, nrows+1)], + values=random.sample(range(1000000), k=nrows), + config='key_format=r,value_format=i')), + ('table_with_named_columns', dict( + is_simple = False, + keys=[k for k in range(1, 7)], + values=[('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3), + ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)], + config='columns=(id,country,capital,population),key_format=r,value_format=SSi')), + ]) + + # Test table projections. + def check_projections(self, uri, keys, values): + for i in range(0, len(keys)): + self.check_record(uri + '(country,capital)', + keys[i], [values[i][0], values[i][1]]) + self.check_record(uri + '(country,population)', + keys[i], [values[i][0], values[i][2]]) + self.check_record(uri + '(capital,population)', + keys[i], [values[i][1], values[i][2]]) + + def test_table_import(self): + # Add some data and checkpoint. + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Create the target table for import tests. + original_db_table = 'original_db_table' + uri = 'table:' + original_db_table + create_config = 'allocation_size=512,log=(enabled=true),' + self.config + self.session.create(uri, create_config) + + keys = self.keys + values = self.values + ts = [10*k for k in range(1, len(keys)+1)] + + # Add data and perform a checkpoint. + min_idx = 0 + max_idx = len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Add more data and checkpoint again. + min_idx = max_idx + max_idx = 2*len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Check the inserted values are in the table. + self.check(uri, keys[:max_idx], values[:max_idx]) + + # Check against projections when the table is not simple. + if not self.is_simple: + self.check_projections(uri, keys[:max_idx], values[:max_idx]) + + # Export the metadata for the table. + original_db_file_uri = 'file:' + original_db_table + '.wt' + c = self.session.open_cursor('metadata:', None, None) + original_db_table_config = c[uri] + original_db_file_config = c[original_db_file_uri] + c.close() + + # Close the connection. + self.close_conn() + + # Construct the config string from the exported metadata. + import_config = '{},import=(enabled,file_metadata=({}))'.format( + original_db_table_config, original_db_file_config) + + # Reopen the connection, add some data and attempt to import the table. We expect + # this to fail. + self.conn = self.setUpConnectionOpen('.') + self.session = self.setUpSessionOpen(self.conn) + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx])) + + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, import_config)) + + # Drop the table without removing the data files then attempt to import. We expect + # this operation to succeed. + self.session.drop(uri, 'remove_files=false') + # Verify the table is dropped. + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.open_cursor(uri, None, None)) + self.session.create(uri, import_config) + + self.close_conn() + + # Create a new database, connect and populate. + newdir = 'IMPORT_DB' + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + self.conn = self.setUpConnectionOpen(newdir) + self.session = self.setUpSessionOpen(self.conn) + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx])) + + # Attempt to import the table before copying the file. We expect this to fail. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, import_config), '/No such file or directory/') + + # Copy over the datafiles for the object we want to import. + self.copy_file(original_db_table + '.wt', '.', newdir) + + # Construct the config string incorrectly by omitting the table config. + no_table_config = 'import=(enabled,file_metadata=({}))'.format(original_db_file_config) + + # Attempt to import the table. We expect this to fail. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, no_table_config), '/Invalid argument/') + + # Construct the config string incorrectly by omitting the file_metadata and attempt to + # import the table. We expect this to fail. + no_file_config = '{},import=(enabled)'.format(original_db_table_config) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, no_file_config), '/Invalid argument/') + + # Import the table. + self.session.create(uri, import_config) + + # Verify object. + self.session.verify(uri) + + # Check that the previously inserted values survived the import. + self.check(uri, keys[:max_idx], values[:max_idx]) + if not self.is_simple: + self.check_projections(uri, keys[:max_idx], values[:max_idx]) + + # Compare configuration metadata. + c = self.session.open_cursor('metadata:', None, None) + current_db_table_config = c[uri] + c.close() + self.config_compare(original_db_table_config, current_db_table_config) + + # Add some data and check that the table operates as usual after importing. + min_idx = max_idx + max_idx = len(keys) + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.check(uri, keys, values) + if not self.is_simple: + self.check_projections(uri, keys, values) + + # Perform a checkpoint. + self.session.checkpoint() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_import05.py b/src/third_party/wiredtiger/test/suite/test_import05.py new file mode 100644 index 00000000000..5282018aaf1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_import05.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_import05.py +# Error conditions when trying to import files with timestamps past oldest. + +import os, shutil +import wiredtiger +from wtscenario import make_scenarios +from test_import01 import test_import_base + +class test_import05(test_import_base): + conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' + session_config = 'isolation=snapshot' + + ntables = 10 + nrows = 100 + keys = [b'1', b'2', b'3', b'4', b'5', b'6'] + values = [b'\x01\x02aaa\x03\x04', b'\x01\x02bbb\x03\x04', b'\x01\x02ccc\x03\x04', + b'\x01\x02ddd\x03\x04', b'\x01\x02eee\x03\x04', b'\x01\x02fff\x03\x04'] + ts = [10*k for k in range(1, len(keys)+1)] + scenarios = make_scenarios([ + ('insert', dict(op_type='insert')), + ('delete', dict(op_type='delete')), + ]) + + def test_file_import_ts_past_oldest(self): + original_db_file = 'original_db_file' + uri = 'file:' + original_db_file + create_config = 'allocation_size=512,key_format=u,log=(enabled=true),value_format=u' + self.session.create(uri, create_config) + + # Add data and perform a checkpoint. + # We're inserting everything EXCEPT the last record. + for i in range(0, len(self.keys) - 1): + self.update(uri, self.keys[i], self.values[i], self.ts[i]) + + self.session.checkpoint() + + # Place the last insert/delete. + # We also want to check that a stop timestamp later than oldest will prevent imports. In the + # delete case, we should use the last timestamp in our data set and use it delete the first + # key we inserted. + if self.op_type == 'insert': + self.update(uri, self.keys[-1], self.values[-1], self.ts[-1]) + else: + self.assertEqual(self.op_type, 'delete') + self.delete(uri, self.keys[0], self.ts[-1]) + + # Perform a checkpoint. + self.session.checkpoint() + + # Export the metadata for the table. + c = self.session.open_cursor('metadata:', None, None) + original_db_file_config = c[uri] + c.close() + + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) + + # Close the connection. + self.close_conn() + + # Create a new database and connect to it. + newdir = 'IMPORT_DB' + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + self.conn = self.setUpConnectionOpen(newdir) + self.session = self.setUpSessionOpen(self.conn) + + # Copy over the datafiles for the object we want to import. + self.copy_file(original_db_file, '.', newdir) + + # Contruct the config string. + import_config = 'import=(enabled,repair=false,file_metadata=(' + \ + original_db_file_config + '))' + + # Create error pattern. Depending on the situation, we substitute a different timestamp into + # error message to check against. + error_pattern = \ + 'import found aggregated {} timestamp newer than the current oldest timestamp' + + # Now begin trying to import the file. + # + # Since we haven't set oldest (and it defaults to 0), we're expecting an error here as the + # table has timestamps past 0. + # + # Start timestamps get checked first so that's the error msg we expect. + error_msg = error_pattern.format('newest start durable') + + with self.expectedStderrPattern(error_msg): + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, import_config)) + + # Place the oldest timestamp just BEFORE the last insert/delete we made. + # + # The table we're importing had an operation past this point so we're still expecting an + # error. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1] - 1)) + + # If our latest operation was an insert, we're expecting it to complain about the aggregated + # start timestamp whereas if we did a delete, we should expect it to complain about stop. + error_msg = error_pattern.format( + 'newest start durable' if self.op_type == 'insert' else 'newest stop durable') + + with self.expectedStderrPattern(error_msg): + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.create(uri, import_config)) + + # Now place oldest equal to the last insert/delete we made. This should succeed since all + # of our aggregated timestamps are now equal to or behind oldest. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(self.ts[-1])) + self.session.create(uri, import_config) diff --git a/src/third_party/wiredtiger/test/suite/test_prepare02.py b/src/third_party/wiredtiger/test/suite/test_prepare02.py index c10223940fe..99405e61296 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare02.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare02.py @@ -70,8 +70,6 @@ class test_prepare02(wttest.WiredTigerTestCase, suite_subprocess): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.log_flush("sync=on"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.rebalance("table:mytable", None), msg) - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.rename("table:mytable", "table:mynewtable", None), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.session.reset(), msg) diff --git a/src/third_party/wiredtiger/test/suite/test_prepare12.py b/src/third_party/wiredtiger/test/suite/test_prepare12.py new file mode 100644 index 00000000000..37c564ae723 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_prepare12.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +def timestamp_str(t): + return '%x' % t + +# test_prepare12.py +# Test update restore of a page with prepared update. +class test_prepare12(wttest.WiredTigerTestCase): + conn_config = 'cache_size=2MB,statistics=(all)' + session_config = 'isolation=snapshot' + + def test_prepare_update_restore(self): + uri = "table:test_prepare12" + self.session.create(uri, 'key_format=i,value_format=S') + + # Prepare a transaction + cursor = self.session.open_cursor(uri, None) + self.session.begin_transaction() + cursor[0] = 'a' + self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(1)) + + # Insert an uncommitted key + session2 = self.conn.open_session(None) + cursor2 = session2.open_cursor(uri, None) + session2.begin_transaction() + cursor2[1] = 'b' + + # Insert a bunch of other content to fill the database to trigger eviction. + session3 = self.conn.open_session(None) + cursor3 = session3.open_cursor(uri, None) + for i in range(2, 100): + session3.begin_transaction() + cursor3[i] = 'a' * 500 + session3.commit_transaction() + + # Commit the prepared update + self.session.commit_transaction('commit_timestamp=' + timestamp_str(1) + ',durable_timestamp=' + timestamp_str(2)) + + # Read the prepared update + self.session.begin_transaction('read_timestamp=' + timestamp_str(2)) + self.assertEqual(cursor[0], 'a') diff --git a/src/third_party/wiredtiger/test/suite/test_readonly03.py b/src/third_party/wiredtiger/test/suite/test_readonly03.py index 6f0f0a489eb..18216425cbd 100644 --- a/src/third_party/wiredtiger/test/suite/test_readonly03.py +++ b/src/third_party/wiredtiger/test/suite/test_readonly03.py @@ -44,7 +44,7 @@ class test_readonly03(wttest.WiredTigerTestCase, suite_subprocess): conn_params_rd = 'readonly=true,operation_tracking=(enabled=false),' session_ops = [ 'alter', 'create', 'compact', 'drop', 'log_flush', - 'log_printf', 'rebalance', 'rename', 'salvage', 'truncate', 'upgrade', ] + 'log_printf', 'rename', 'salvage', 'truncate', 'upgrade', ] cursor_ops = [ 'insert', 'remove', 'update', ] def setUpConnectionOpen(self, dir): @@ -105,9 +105,6 @@ class test_readonly03(wttest.WiredTigerTestCase, suite_subprocess): elif op == 'log_printf': self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.log_printf("test"), msg) - elif op == 'rebalance': - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.rebalance(self.uri, None), msg) elif op == 'rename': self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.rename(self.uri, self.uri2, None), msg) diff --git a/src/third_party/wiredtiger/test/suite/test_rebalance.py b/src/third_party/wiredtiger/test/suite/test_rebalance.py deleted file mode 100644 index 6c9d74d87b9..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_rebalance.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2020 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -import os, time -import wiredtiger, wttest -from wtdataset import SimpleDataSet, ComplexDataSet -from wtscenario import make_scenarios - -# test_rebalance.py -# session level rebalance operation -class test_rebalance(wttest.WiredTigerTestCase): - name = 'test_rebalance' - - # Use small pages so we generate some internal layout - # Setup LSM so multiple chunks are present - config = 'allocation_size=512,internal_page_max=512' + \ - ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)' - - scenarios = make_scenarios([ - ('file', dict(uri='file:')), - ('table', dict(uri='table:')), - ('lsm', dict(uri='lsm:')) - ]) - - # Populate an object, then rebalance it. - def rebalance(self, dataset, with_cursor): - uri = self.uri + self.name - ds = dataset(self, uri, 10000, config=self.config) - ds.populate() - - # Force to disk, we don't rebalance in-memory objects. - self.reopen_conn() - - # Open cursors should cause failure. - if with_cursor: - cursor = self.session.open_cursor(uri, None, None) - self.assertRaises(wiredtiger.WiredTigerError, - lambda: self.session.rebalance(uri, None)) - cursor.close() - - self.session.rebalance(uri, None) - self.session.drop(uri) - - # Test rebalance of an object. - def test_rebalance(self): - # Simple file or table object. - self.rebalance(SimpleDataSet, False) - self.rebalance(SimpleDataSet, True) - - # A complex, multi-file table object. - if self.uri == "table:": - self.rebalance(ComplexDataSet, False) - self.rebalance(ComplexDataSet, True) - -if __name__ == '__main__': - wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_txn24.py b/src/third_party/wiredtiger/test/suite/test_txn24.py deleted file mode 100644 index 0e239ec77c6..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_txn24.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2020 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# test_txn24.py -# Transactions and eviction: Test if using snapshot isolation for eviction threads helps with -# cache stuck issue. -# - -import wiredtiger, wttest -import time - -class test_txn24(wttest.WiredTigerTestCase): - - session_config = 'isolation=snapshot' - - def conn_config(self): - # We want to either eliminate or keep the application thread role in eviction to minimum. - # This will ensure that the dedicated eviction threads are doing the heavy lifting. - return 'cache_size=100MB,eviction_target=80,eviction_dirty_target=5,eviction_trigger=100,\ - eviction_updates_target=5,eviction_dirty_trigger=99,eviction_updates_trigger=100,\ - eviction=(threads_max=4)' - - def test_snapshot_isolation_and_eviction(self): - - # Create and populate a table. - uri = "table:test_txn24" - table_params = 'key_format=i,value_format=S' - default_val = 'ABCD' * 60 - new_val = 'YYYY' * 60 - final_val = 'ZZZZ' * 60 - n_rows = 480000 - - self.session.create(uri, table_params) - cursor = self.session.open_cursor(uri, None) - for i in range(0, n_rows): - cursor[i] = default_val - cursor.close() - - # Perform a checkpoint. There should be no dirty content in the cache after this. - self.session.checkpoint() - - # Start a transaction, make an update and keep it running. - cursor = self.session.open_cursor(uri, None) - self.session.begin_transaction('isolation=snapshot') - cursor[0] = new_val - - # Start few sessions and transactions, make updates and try committing them. - session2 = self.setUpSessionOpen(self.conn) - cursor2 = session2.open_cursor(uri) - start_row = int(n_rows/4) - for i in range(0, 120): - session2.begin_transaction('isolation=snapshot') - for j in range(0,1000): - cursor2[start_row] = new_val - start_row += 1 - session2.commit_transaction() - - session3 = self.setUpSessionOpen(self.conn) - cursor3 = session3.open_cursor(uri) - start_row = int(n_rows/2) - for i in range(0, 120): - session3.begin_transaction('isolation=snapshot') - for j in range(0,1000): - cursor3[start_row] = new_val - start_row += 1 - session3.commit_transaction() - - # At this point in time, we have made roughly 90% cache dirty. If we are not using - # snaphsots for eviction threads, the cache state will remain like this forever and we may - # never reach this part of code. We might get a rollback error by now or WT will panic with - # cache stuck error. - # - # Even if we dont get an error by now and if we try to insert new data at this point in - # time, dirty cache usage will exceed 100% if eviction threads are not using snapshot - # isolation. In that case, we will eventually get a rollback error for sure. - - session4 = self.setUpSessionOpen(self.conn) - cursor4 = session4.open_cursor(uri) - start_row = 1 - for i in range(0, 120): - session4.begin_transaction('isolation=snapshot') - for j in range(0,1000): - cursor4[start_row] = new_val - start_row += 1 - session4.commit_transaction() - - # If we have done all operations error free so far, eviction threads have been successful. - - self.session.commit_transaction() - cursor.close() - self.session.close() - - cursor2.close() - session2.close() - - cursor3.close() - session3.close() - - cursor4.close() - session4.close() - -if __name__ == '__main__': - wttest.run() |