summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-03-04 14:45:58 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-04 04:15:21 +0000
commitff9995ed5cf2d72b67bb5520c9f71a9acfa27457 (patch)
treea54879ecd835fdbfdb0da3bcc9191bd607ad7d48
parent2f11ef616efad0986a76325c624cdcc7ef65bc43 (diff)
downloadmongo-ff9995ed5cf2d72b67bb5520c9f71a9acfa27457.tar.gz
Import wiredtiger: a5fd80d29c69f12c01f412fb6d8d7930cecc8758 from branch mongodb-5.0
ref: 563ccc601f..a5fd80d29c for: 4.9.0 WT-7164 Merge "HS cursor restructure" feature branch into develop
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_void4
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py4
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c122
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c35
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_hs.c588
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c20
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c4
-rw-r--r--src/third_party/wiredtiger/src/history/hs_conn.c12
-rw-r--r--src/third_party/wiredtiger/src/history/hs_cursor.c234
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c548
-rw-r--r--src/third_party/wiredtiger/src/history/hs_verify.c62
-rw-r--r--src/third_party/wiredtiger/src/include/api.h90
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor_inline.h26
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h36
-rw-r--r--src/third_party/wiredtiger/src/include/session.h2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn_inline.h4
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in700
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c20
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c3
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c3
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c144
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c144
-rw-r--r--src/third_party/wiredtiger/test/format/t.c2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_cursor13.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs05.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_util21.py85
33 files changed, 1292 insertions, 1629 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index ebf39d669e3..2fb3a6e3f4d 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -565,6 +565,7 @@ calloc
cas
catfmt
cb
+cbt
ccc
ccr
cd
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index 0e9890acf78..70a938da4b8 100755
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -135,7 +135,9 @@ func_ok()
-e '/int zlib_terminate$/d' \
-e '/int zstd_error$/d' \
-e '/int zstd_pre_size$/d' \
- -e '/int zstd_terminate$/d'
+ -e '/int zstd_terminate$/d' \
+ -e '/int __wt_curhs_search_near_after$/d' \
+ -e '/int __wt_curhs_search_near_before$/d'
}
for f in `find bench ext src test -name '*.c' -o -name '*_inline.h'`; do
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index e6eab22645d..7a25fa521a4 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -303,9 +303,7 @@ connection_stats = [
CursorStat('cursor_modify_bytes', 'cursor modify key and value bytes affected', 'size'),
CursorStat('cursor_modify_bytes_touch', 'cursor modify value bytes modified', 'size'),
CursorStat('cursor_next', 'cursor next calls'),
- CursorStat('cursor_next_hs_tombstone_rts', 'cursor next calls that skip due to a globally visible history store tombstone in rollback to stable'),
CursorStat('cursor_prev', 'cursor prev calls'),
- CursorStat('cursor_prev_hs_tombstone_rts', 'cursor prev calls that skip due to a globally visible history store tombstone in rollback to stable'),
CursorStat('cursor_remove', 'cursor remove calls'),
CursorStat('cursor_remove_bytes', 'cursor remove key bytes removed', 'size'),
CursorStat('cursor_reopen', 'cursors reused from cache'),
@@ -874,7 +872,7 @@ conn_dsrc_stats = [
TxnStat('txn_rts_hs_removed', 'rollback to stable updates removed from history store'),
TxnStat('txn_rts_hs_restore_updates', 'rollback to stable restored updates from history store'),
TxnStat('txn_rts_hs_restore_tombstones', 'rollback to stable restored tombstones from history store'),
- TxnStat('txn_rts_hs_stop_older_than_newer_start', 'rollback to stable hs records with stop timestamps older than newer records'),
+ TxnStat('txn_rts_hs_stop_older_than_newer_start', 'rollback to stable history store records with stop timestamps older than newer records'),
TxnStat('txn_rts_inconsistent_ckpt', 'rollback to stable inconsistent checkpoint'),
TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'),
TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index b4883db4d35..ecd052a77d2 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.0",
- "commit": "563ccc601f5689a16a3f41743398329b8a3aedf7"
+ "commit": "a5fd80d29c69f12c01f412fb6d8d7930cecc8758"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 00e29bbbee5..c9acfff3628 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -40,18 +40,18 @@ struct __wt_dbg {
static const /* Output separator */
char *const sep = "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
-static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool);
+static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool, WT_CURSOR *);
static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
static int __debug_modify(WT_DBG *, const uint8_t *);
static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
static int __debug_page_col_fix(WT_DBG *, WT_REF *);
static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t);
-static int __debug_page_col_var(WT_DBG *, WT_REF *);
+static int __debug_page_col_var(WT_DBG *, WT_REF *, WT_CURSOR *);
static int __debug_page_metadata(WT_DBG *, WT_REF *);
static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t);
-static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *);
+static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *, WT_CURSOR *);
static int __debug_ref(WT_DBG *, WT_REF *);
-static int __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
+static int __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *, WT_CURSOR *);
static int __debug_tree(WT_SESSION_IMPL *, WT_REF *, const char *, uint32_t);
static int __debug_update(WT_DBG *, WT_UPDATE *, bool);
static int __debug_wrapup(WT_DBG *);
@@ -285,9 +285,6 @@ __debug_wrapup(WT_DBG *ds)
session = ds->session;
msg = ds->msg;
- if (session->hs_cursor != NULL)
- WT_TRET(__wt_hs_cursor_close(session));
-
__wt_scr_free(session, &ds->key);
__wt_scr_free(session, &ds->hs_key);
__wt_scr_free(session, &ds->hs_value);
@@ -421,7 +418,7 @@ __debug_hs_cursor(WT_DBG *ds, WT_CURSOR *hs_cursor)
uint32_t hs_btree_id;
char time_string[WT_TIME_STRING_SIZE];
- cbt = (WT_CURSOR_BTREE *)hs_cursor;
+ cbt = __wt_curhs_get_cbt(hs_cursor);
session = ds->session;
WT_TIME_WINDOW_INIT(&tw);
@@ -463,16 +460,12 @@ __debug_hs_cursor(WT_DBG *ds, WT_CURSOR *hs_cursor)
* Dump any HS records associated with the key.
*/
static int
-__debug_hs_key(WT_DBG *ds)
+__debug_hs_key(WT_DBG *ds, WT_CURSOR *hs_cursor)
{
WT_BTREE *btree;
- WT_CURSOR *hs_cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- wt_timestamp_t older_start_ts;
- uint64_t hs_counter;
uint32_t hs_btree_id;
- int cmp, exact;
session = ds->session;
btree = S2BT(session);
@@ -482,26 +475,12 @@ __debug_hs_key(WT_DBG *ds)
* Open a history store cursor positioned at the end of the data store key (the newest record)
* and iterate backwards until we reach a different key or btree.
*/
- hs_cursor = session->hs_cursor;
- hs_cursor->set_key(hs_cursor, hs_btree_id, ds->key, WT_TS_MAX, WT_TXN_MAX);
- ret = hs_cursor->search_near(hs_cursor, &exact);
-
- /* If we jumped to the next key, go back to the previous key. */
- if (ret == 0 && exact > 0)
- ret = hs_cursor->prev(hs_cursor);
-
- for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
- WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, ds->t1, &older_start_ts, &hs_counter));
-
- if (hs_btree_id != btree->id)
- break;
-
- WT_RET(__wt_compare(session, NULL, ds->key, ds->t1, &cmp));
- if (cmp != 0)
- break;
+ hs_cursor->set_key(hs_cursor, 4, hs_btree_id, ds->key, WT_TS_MAX, WT_TXN_MAX);
+ ret = __wt_curhs_search_near_before(session, hs_cursor);
+ for (; ret == 0; ret = hs_cursor->prev(hs_cursor))
WT_RET(__debug_hs_cursor(ds, hs_cursor));
- }
+
return (ret == WT_NOTFOUND ? 0 : ret);
}
@@ -970,19 +949,19 @@ __wt_debug_cursor_page(void *cursor_arg, const char *ofile)
* Dump the history store tree given a user cursor.
*/
int
-__wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
+__wt_debug_cursor_tree_hs(void *session_arg, const char *ofile)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CURSOR_BTREE *cbt;
+ WT_BTREE *hs_btree;
+ WT_CURSOR *hs_cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = CUR2S(cursor_arg);
-
- WT_RET(__wt_hs_cursor_open(session));
- cbt = (WT_CURSOR_BTREE *)session->hs_cursor;
- WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_debug_tree_all(session, NULL, NULL, ofile));
- WT_TRET(__wt_hs_cursor_close(session));
+ session = (WT_SESSION_IMPL *)session_arg;
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ hs_btree = __wt_curhs_get_btree(hs_cursor);
+ WT_WITH_BTREE(session, hs_btree, ret = __wt_debug_tree_all(session, NULL, NULL, ofile));
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
@@ -1017,9 +996,11 @@ __debug_tree(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile, uint32_t
static int
__debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
{
+ WT_CURSOR *hs_cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ hs_cursor = NULL;
session = ds->session;
WT_RET(__wt_scr_alloc(session, 100, &ds->key));
@@ -1028,43 +1009,47 @@ __debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
* doesn't work, we may be running in-memory.
*/
if (!WT_IS_HS(session->dhandle)) {
- if (session->hs_cursor != NULL || __wt_hs_cursor_open(session) == 0) {
- WT_RET(__wt_scr_alloc(session, 0, &ds->hs_key));
- WT_RET(__wt_scr_alloc(session, 0, &ds->hs_value));
- }
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ WT_ERR(__wt_scr_alloc(session, 0, &ds->hs_key));
+ WT_ERR(__wt_scr_alloc(session, 0, &ds->hs_value));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
}
/* Dump the page metadata. */
WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
- WT_RET(ret);
+ WT_ERR(ret);
/* Dump the page. */
switch (ref->page->type) {
case WT_PAGE_COL_FIX:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_fix(ds, ref));
+ WT_ERR(__debug_page_col_fix(ds, ref));
break;
case WT_PAGE_COL_INT:
WT_WITH_PAGE_INDEX(session, ret = __debug_page_col_int(ds, ref->page, flags));
- WT_RET(ret);
+ WT_ERR(ret);
break;
case WT_PAGE_COL_VAR:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_var(ds, ref));
+ WT_ERR(__debug_page_col_var(ds, ref, hs_cursor));
break;
case WT_PAGE_ROW_INT:
WT_WITH_PAGE_INDEX(session, ret = __debug_page_row_int(ds, ref->page, flags));
- WT_RET(ret);
+ WT_ERR(ret);
break;
case WT_PAGE_ROW_LEAF:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_row_leaf(ds, ref->page));
+ WT_ERR(__debug_page_row_leaf(ds, ref->page, hs_cursor));
break;
default:
- return (__wt_illegal_value(session, ref->page->type));
+ WT_ERR(__wt_illegal_value(session, ref->page->type));
}
- return (0);
+err:
+ if (hs_cursor != NULL)
+ WT_TRET(hs_cursor->close(hs_cursor));
+ return (ret);
}
/*
@@ -1209,11 +1194,11 @@ __debug_page_col_fix(WT_DBG *ds, WT_REF *ref)
if (WT_COL_UPDATE_SINGLE(page) != NULL) {
WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(ds, WT_COL_UPDATE_SINGLE(page), "update", true));
+ WT_RET(__debug_col_skip(ds, WT_COL_UPDATE_SINGLE(page), "update", true, NULL));
}
if (WT_COL_APPEND(page) != NULL) {
WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", true));
+ WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", true, NULL));
}
return (0);
}
@@ -1254,7 +1239,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory WT_PAGE_COL_VAR page.
*/
static int
-__debug_page_col_var(WT_DBG *ds, WT_REF *ref)
+__debug_page_col_var(WT_DBG *ds, WT_REF *ref, WT_CURSOR *hs_cursor)
{
WT_CELL *cell;
WT_CELL_UNPACK_KV *unpack, _unpack;
@@ -1283,17 +1268,17 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
p = ds->key->mem;
WT_RET(__wt_vpack_uint(&p, 0, recno));
ds->key->size = WT_PTRDIFF(p, ds->key->mem);
- WT_RET(__debug_hs_key(ds));
+ WT_RET(__debug_hs_key(ds, hs_cursor));
}
if ((update = WT_COL_UPDATE(page, cip)) != NULL)
- WT_RET(__debug_col_skip(ds, update, "update", false));
+ WT_RET(__debug_col_skip(ds, update, "update", false, hs_cursor));
recno += rle;
}
if (WT_COL_APPEND(page) != NULL) {
WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", false));
+ WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", false, hs_cursor));
}
return (0);
@@ -1337,7 +1322,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory WT_PAGE_ROW_LEAF page.
*/
static int
-__debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
+__debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page, WT_CURSOR *hs_cursor)
{
WT_CELL_UNPACK_KV *unpack, _unpack;
WT_INSERT_HEAD *insert;
@@ -1353,7 +1338,7 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
* Dump any K/V pairs inserted into the page before the first from-disk key on the page.
*/
if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- WT_RET(__debug_row_skip(ds, insert));
+ WT_RET(__debug_row_skip(ds, insert, hs_cursor));
/* Dump the page's K/V pairs. */
WT_ROW_FOREACH (page, rip, i) {
@@ -1366,11 +1351,11 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
WT_RET(__debug_update(ds, upd, false));
- if (!WT_IS_HS(session->dhandle) && session->hs_cursor != NULL)
- WT_RET(__debug_hs_key(ds));
+ if (!WT_IS_HS(session->dhandle) && hs_cursor != NULL)
+ WT_RET(__debug_hs_key(ds, hs_cursor));
if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- WT_RET(__debug_row_skip(ds, insert));
+ WT_RET(__debug_row_skip(ds, insert, hs_cursor));
}
return (0);
}
@@ -1380,7 +1365,8 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
* Dump a column-store skiplist.
*/
static int
-__debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte)
+__debug_col_skip(
+ WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte, WT_CURSOR *hs_cursor)
{
WT_INSERT *ins;
WT_SESSION_IMPL *session;
@@ -1392,11 +1378,11 @@ __debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte
WT_RET(ds->f(ds, "\t%s %" PRIu64 "\n", tag, WT_INSERT_RECNO(ins)));
WT_RET(__debug_update(ds, ins->upd, hexbyte));
- if (!WT_IS_HS(session->dhandle) && session->hs_cursor != NULL) {
+ if (!WT_IS_HS(session->dhandle) && hs_cursor != NULL) {
p = ds->key->mem;
WT_RET(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(ins)));
ds->key->size = WT_PTRDIFF(p, ds->key->mem);
- WT_RET(__debug_hs_key(ds));
+ WT_RET(__debug_hs_key(ds, hs_cursor));
}
}
return (0);
@@ -1407,7 +1393,7 @@ __debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte
* Dump an insert list.
*/
static int
-__debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head)
+__debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head, WT_CURSOR *hs_cursor)
{
WT_INSERT *ins;
WT_SESSION_IMPL *session;
@@ -1418,9 +1404,9 @@ __debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head)
WT_RET(__debug_item_key(ds, "insert", WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
WT_RET(__debug_update(ds, ins->upd, false));
- if (!WT_IS_HS(session->dhandle) && session->hs_cursor != NULL) {
+ if (!WT_IS_HS(session->dhandle) && hs_cursor != NULL) {
WT_RET(__wt_buf_set(session, ds->key, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
- WT_RET(__debug_hs_key(ds));
+ WT_RET(__debug_hs_key(ds, hs_cursor));
}
}
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 3faf1c84aa3..a4d82c3d904 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -71,7 +71,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
return (0);
}
- WT_RET(__wt_hs_cursor_cache(session));
+ WT_RET(__wt_curhs_cache(session));
(void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1);
ret = __wt_evict(session, ref, previous_state, 0);
(void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1);
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 0bf0fb8672a..cf2aca0fc87 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -278,9 +278,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (ret == 0 && (ckpt + 1)->name == NULL && !skip_hs) {
/* Open a history store cursor. */
- WT_ERR(__wt_hs_cursor_open(session));
WT_TRET(__wt_hs_verify_one(session));
- WT_TRET(__wt_hs_cursor_close(session));
/*
* We cannot error out here. If we got an error verifying the history store, we need
* to follow through with reacquiring the exclusive call below. We'll error out
@@ -778,11 +776,12 @@ __verify_key_hs(
wt_timestamp_t older_start_ts, older_stop_ts;
uint64_t hs_counter;
uint32_t hs_btree_id;
- int cmp, exact;
char ts_string[2][WT_TS_INT_STRING_SIZE];
btree = S2BT(session);
hs_btree_id = btree->id;
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
/*
* Set the data store timestamp and transactions to initiate timestamp range verification. Since
@@ -795,36 +794,23 @@ __verify_key_hs(
* Open a history store cursor positioned at the end of the data store key (the newest record)
* and iterate backwards until we reach a different key or btree.
*/
- hs_cursor = session->hs_cursor;
- hs_cursor->set_key(hs_cursor, hs_btree_id, tmp1, WT_TS_MAX, WT_TXN_MAX);
- ret = hs_cursor->search_near(hs_cursor, &exact);
-
- /* If we jumped to the next key, go back to the previous key. */
- if (ret == 0 && exact > 0)
- ret = hs_cursor->prev(hs_cursor);
+ hs_cursor->set_key(hs_cursor, 4, hs_btree_id, tmp1, WT_TS_MAX, UINT64_MAX);
+ ret = __wt_curhs_search_near_before(session, hs_cursor);
for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
- WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, vs->tmp2, &older_start_ts, &hs_counter));
-
- if (hs_btree_id != btree->id)
- break;
-
- WT_RET(__wt_compare(session, NULL, tmp1, vs->tmp2, &cmp));
- if (cmp != 0)
- break;
-
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, vs->tmp2, &older_start_ts, &hs_counter));
/* Verify the newer record's start is later than the older record's stop. */
if (newer_start_ts < older_stop_ts) {
- WT_RET_MSG(session, WT_ERROR,
+ WT_ERR_MSG(session, WT_ERROR,
"key %s has a overlap of timestamp ranges between history store stop timestamp %s "
"being newer than a more recent timestamp range having start timestamp %s",
__wt_buf_set_printable(session, tmp1->data, tmp1->size, vs->tmp2),
- __verify_timestamp_to_pretty_string(older_stop_ts, ts_string[0]),
- __verify_timestamp_to_pretty_string(newer_start_ts, ts_string[1]));
+ __wt_timestamp_to_string(older_stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(newer_start_ts, ts_string[1]));
}
if (vs->stable_timestamp != WT_TS_NONE)
- WT_RET(
+ WT_ERR(
__verify_ts_stable_cmp(session, tmp1, NULL, 0, older_start_ts, older_stop_ts, vs));
/*
@@ -833,7 +819,8 @@ __verify_key_hs(
*/
newer_start_ts = older_start_ts;
}
-
+err:
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret == WT_NOTFOUND ? 0 : ret);
#else
WT_UNUSED(session);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c
index f5574b3c1ce..d4cda2065c6 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_hs.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c
@@ -8,15 +8,19 @@
#include "wt_internal.h"
+static int __curhs_file_cursor_next(WT_SESSION_IMPL *, WT_CURSOR *);
+static int __curhs_file_cursor_open(WT_SESSION_IMPL *, WT_CURSOR **);
+static int __curhs_file_cursor_prev(WT_SESSION_IMPL *, WT_CURSOR *);
+static int __curhs_file_cursor_search_near(WT_SESSION_IMPL *, WT_CURSOR *, int *);
static int __curhs_prev_visible(WT_SESSION_IMPL *, WT_CURSOR_HS *);
static int __curhs_next_visible(WT_SESSION_IMPL *, WT_CURSOR_HS *);
-
+static int __curhs_search_near_helper(WT_SESSION_IMPL *, WT_CURSOR *, bool);
/*
- * __hs_cursor_open_int --
+ * __curhs_file_cursor_open --
* Open a new history store table cursor, internal function.
*/
static int
-__hs_cursor_open_int(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
+__curhs_file_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -34,12 +38,12 @@ __hs_cursor_open_int(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
}
/*
- * __wt_hs_cursor_cache --
+ * __wt_curhs_cache --
* Cache a new history store table cursor. Open and then close a history store cursor without
* saving it in the session.
*/
int
-__wt_hs_cursor_cache(WT_SESSION_IMPL *session)
+__wt_curhs_cache(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR *cursor;
@@ -70,45 +74,17 @@ __wt_hs_cursor_cache(WT_SESSION_IMPL *session)
(session->dhandle != NULL && WT_IS_METADATA(S2BT(session)->dhandle)) ||
session == conn->default_session)
return (0);
- WT_RET(__hs_cursor_open_int(session, &cursor));
+ WT_RET(__curhs_file_cursor_open(session, &cursor));
WT_RET(cursor->close(cursor));
return (0);
}
/*
- * __wt_hs_cursor_open --
- * Open a new history store table cursor wrapper function.
- */
-int
-__wt_hs_cursor_open(WT_SESSION_IMPL *session)
-{
- /* Not allowed to open a cursor if you already have one */
- WT_ASSERT(session, session->hs_cursor == NULL);
-
- return (__hs_cursor_open_int(session, &session->hs_cursor));
-}
-
-/*
- * __wt_hs_cursor_close --
- * Discard a history store cursor.
- */
-int
-__wt_hs_cursor_close(WT_SESSION_IMPL *session)
-{
- /* Should only be called when session has an open history store cursor */
- WT_ASSERT(session, session->hs_cursor != NULL);
-
- WT_RET(session->hs_cursor->close(session->hs_cursor));
- session->hs_cursor = NULL;
- return (0);
-}
-
-/*
- * __wt_hs_cursor_next --
+ * __curhs_file_cursor_next --
* Execute a next operation on a history store cursor with the appropriate isolation level.
*/
-int
-__wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+static int
+__curhs_file_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
WT_DECL_RET;
@@ -117,11 +93,11 @@ __wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
}
/*
- * __wt_hs_cursor_prev --
+ * __curhs_file_cursor_prev --
* Execute a prev operation on a history store cursor with the appropriate isolation level.
*/
-int
-__wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+static int
+__curhs_file_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
WT_DECL_RET;
@@ -130,12 +106,12 @@ __wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
}
/*
- * __wt_hs_cursor_search_near --
+ * __curhs_file_cursor_search_near --
* Execute a search near operation on a history store cursor with the appropriate isolation
* level.
*/
-int
-__wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp)
+static int
+__curhs_file_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp)
{
WT_DECL_RET;
@@ -145,8 +121,34 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa
}
/*
+ * __curhs_set_key_ptr --
+ * Copy the key buffer pointer from file cursor to the history store cursor.
+ */
+static inline void
+__curhs_set_key_ptr(WT_CURSOR *hs_cursor, WT_CURSOR *file_cursor)
+{
+ hs_cursor->key.data = file_cursor->key.data;
+ hs_cursor->key.size = file_cursor->key.size;
+ WT_ASSERT(CUR2S(file_cursor), F_ISSET(file_cursor, WT_CURSTD_KEY_SET));
+ F_SET(hs_cursor, F_MASK(file_cursor, WT_CURSTD_KEY_SET));
+}
+
+/*
+ * __curhs_set_value_ptr --
+ * Copy the value buffer pointer from file cursor to the history store cursor.
+ */
+static inline void
+__curhs_set_value_ptr(WT_CURSOR *hs_cursor, WT_CURSOR *file_cursor)
+{
+ hs_cursor->value.data = file_cursor->value.data;
+ hs_cursor->value.size = file_cursor->value.size;
+ WT_ASSERT(CUR2S(file_cursor), F_ISSET(file_cursor, WT_CURSTD_VALUE_SET));
+ F_SET(hs_cursor, F_MASK(file_cursor, WT_CURSTD_VALUE_SET));
+}
+
+/*
* __curhs_next --
- * WT_CURSOR->next method for the hs cursor type.
+ * WT_CURSOR->next method for the history store cursor type.
*/
static int
__curhs_next(WT_CURSOR *cursor)
@@ -160,7 +162,7 @@ __curhs_next(WT_CURSOR *cursor)
file_cursor = hs_cursor->file_cursor;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, next, CUR2BT(file_cursor));
- WT_ERR(__wt_hs_cursor_next(session, file_cursor));
+ WT_ERR(__curhs_file_cursor_next(session, file_cursor));
/*
* We need to check if the history store record is visible to the current session. If not, the
* __curhs_next_visible() will also keep iterating forward through the records until it finds a
@@ -168,6 +170,9 @@ __curhs_next(WT_CURSOR *cursor)
*/
WT_ERR(__curhs_next_visible(session, hs_cursor));
+ __curhs_set_key_ptr(cursor, file_cursor);
+ __curhs_set_value_ptr(cursor, file_cursor);
+
if (0) {
err:
WT_TRET(cursor->reset(cursor));
@@ -177,7 +182,7 @@ err:
/*
* __curhs_prev --
- * WT_CURSOR->prev method for the hs cursor type.
+ * WT_CURSOR->prev method for the history store cursor type.
*/
static int
__curhs_prev(WT_CURSOR *cursor)
@@ -191,7 +196,7 @@ __curhs_prev(WT_CURSOR *cursor)
file_cursor = hs_cursor->file_cursor;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, prev, CUR2BT(file_cursor));
- WT_ERR(__wt_hs_cursor_prev(session, file_cursor));
+ WT_ERR(__curhs_file_cursor_prev(session, file_cursor));
/*
* We need to check if the history store record is visible to the current session. If not, the
* __curhs_prev_visible() will also keep iterating backwards through the records until it finds
@@ -199,6 +204,9 @@ __curhs_prev(WT_CURSOR *cursor)
*/
WT_ERR(__curhs_prev_visible(session, hs_cursor));
+ __curhs_set_key_ptr(cursor, file_cursor);
+ __curhs_set_value_ptr(cursor, file_cursor);
+
if (0) {
err:
WT_TRET(cursor->reset(cursor));
@@ -208,7 +216,7 @@ err:
/*
* __curhs_close --
- * WT_CURSOR->close method for the hs cursor type.
+ * WT_CURSOR->close method for the history store cursor type.
*/
static int
__curhs_close(WT_CURSOR *cursor)
@@ -216,7 +224,6 @@ __curhs_close(WT_CURSOR *cursor)
WT_CURSOR *file_cursor;
WT_CURSOR_HS *hs_cursor;
WT_DECL_RET;
- WT_ITEM *datastore_key;
WT_SESSION_IMPL *session;
hs_cursor = (WT_CURSOR_HS *)cursor;
@@ -224,11 +231,11 @@ __curhs_close(WT_CURSOR *cursor)
CURSOR_API_CALL_PREPARE_ALLOWED(
cursor, session, close, file_cursor == NULL ? NULL : CUR2BT(file_cursor));
err:
+ __wt_scr_free(session, &hs_cursor->datastore_key);
if (file_cursor != NULL)
WT_TRET(file_cursor->close(file_cursor));
- datastore_key = &hs_cursor->datastore_key;
- __wt_scr_free(session, &datastore_key);
__wt_cursor_close(cursor);
+ --session->hs_cursor_counter;
API_END_RET(session, ret);
}
@@ -252,9 +259,15 @@ __curhs_reset(WT_CURSOR *cursor)
ret = file_cursor->reset(file_cursor);
WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
hs_cursor->btree_id = 0;
- hs_cursor->datastore_key.data = NULL;
- hs_cursor->datastore_key.size = 0;
+ hs_cursor->datastore_key->data = NULL;
+ hs_cursor->datastore_key->size = 0;
hs_cursor->flags = 0;
+ cursor->key.data = NULL;
+ cursor->key.size = 0;
+ cursor->value.data = NULL;
+ cursor->value.size = 0;
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
err:
API_END_RET(session, ret);
@@ -262,7 +275,7 @@ err:
/*
* __curhs_set_key --
- * WT_CURSOR->set_key method for the hs cursor type.
+ * WT_CURSOR->set_key method for the history store cursor type.
*/
static void
__curhs_set_key(WT_CURSOR *cursor, ...)
@@ -282,6 +295,7 @@ __curhs_set_key(WT_CURSOR *cursor, ...)
start_ts = WT_TS_NONE;
counter = 0;
+ hs_cursor->flags = 0;
va_start(ap, cursor);
arg_count = va_arg(ap, uint32_t);
@@ -292,11 +306,11 @@ __curhs_set_key(WT_CURSOR *cursor, ...)
if (arg_count > 1) {
datastore_key = va_arg(ap, WT_ITEM *);
WT_IGNORE_RET(__wt_buf_set(
- session, &hs_cursor->datastore_key, datastore_key->data, datastore_key->size));
+ session, hs_cursor->datastore_key, datastore_key->data, datastore_key->size));
F_SET(hs_cursor, WT_HS_CUR_KEY_SET);
} else {
- hs_cursor->datastore_key.data = NULL;
- hs_cursor->datastore_key.size = 0;
+ hs_cursor->datastore_key->data = NULL;
+ hs_cursor->datastore_key->size = 0;
F_CLR(hs_cursor, WT_HS_CUR_KEY_SET);
}
@@ -315,7 +329,9 @@ __curhs_set_key(WT_CURSOR *cursor, ...)
va_end(ap);
file_cursor->set_key(
- file_cursor, hs_cursor->btree_id, &hs_cursor->datastore_key, start_ts, counter);
+ file_cursor, hs_cursor->btree_id, hs_cursor->datastore_key, start_ts, counter);
+
+ __curhs_set_key_ptr(cursor, file_cursor);
}
/*
@@ -342,8 +358,8 @@ __curhs_prev_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
- for (; ret == 0; ret = __wt_hs_cursor_prev(session, file_cursor)) {
- WT_ERR(file_cursor->get_key(file_cursor, &btree_id, &datastore_key, &start_ts, &counter));
+ for (; ret == 0; ret = __curhs_file_cursor_prev(session, file_cursor)) {
+ WT_ERR(file_cursor->get_key(file_cursor, &btree_id, datastore_key, &start_ts, &counter));
/* Stop before crossing over to the next btree. */
if (F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET) && btree_id != hs_cursor->btree_id) {
@@ -356,7 +372,7 @@ __curhs_prev_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
* have crossed over the desired key and not found the record we are looking for.
*/
if (F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
- WT_ERR(__wt_compare(session, NULL, datastore_key, &hs_cursor->datastore_key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, datastore_key, hs_cursor->datastore_key, &cmp));
if (cmp != 0) {
ret = WT_NOTFOUND;
goto err;
@@ -379,6 +395,12 @@ __curhs_prev_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
if (F_ISSET(std_cursor, WT_CURSTD_HS_READ_COMMITTED))
break;
+ /*
+ * If we are using a history store cursor and haven't set the WT_CURSTD_HS_READ_COMMITTED
+ * flag then we must have a snapshot, assert that we do.
+ */
+ WT_ASSERT(session, F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT));
+
if (__wt_txn_tw_stop_visible(session, &cbt->upd_value->tw)) {
/*
* If the stop time point of a record is visible to us, we won't be able to see anything
@@ -425,8 +447,8 @@ __curhs_next_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
- for (; ret == 0; ret = __wt_hs_cursor_next(session, file_cursor)) {
- WT_ERR(file_cursor->get_key(file_cursor, &btree_id, &datastore_key, &start_ts, &counter));
+ for (; ret == 0; ret = __curhs_file_cursor_next(session, file_cursor)) {
+ WT_ERR(file_cursor->get_key(file_cursor, &btree_id, datastore_key, &start_ts, &counter));
/* Stop before crossing over to the next btree. */
if (F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET) && btree_id != hs_cursor->btree_id) {
@@ -439,7 +461,7 @@ __curhs_next_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
* have crossed over the desired key and not found the record we are looking for.
*/
if (F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
- WT_ERR(__wt_compare(session, NULL, datastore_key, &hs_cursor->datastore_key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, datastore_key, hs_cursor->datastore_key, &cmp));
if (cmp != 0) {
ret = WT_NOTFOUND;
goto err;
@@ -463,6 +485,12 @@ __curhs_next_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
break;
/*
+ * If we are using a history store cursor and haven't set the WT_CURSTD_HS_READ_COMMITTED
+ * flag then we must have a snapshot, assert that we do.
+ */
+ WT_ASSERT(session, F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT));
+
+ /*
* If the stop time point of a record is visible to us, check the next one.
*/
if (__wt_txn_tw_stop_visible(session, &cbt->upd_value->tw))
@@ -479,170 +507,267 @@ err:
}
/*
+ * __wt_curhs_search_near_before --
+ * Set the cursor position at the requested position or before it.
+ */
+int
+__wt_curhs_search_near_before(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+{
+ return (__curhs_search_near_helper(session, cursor, true));
+}
+
+/*
+ * __wt_curhs_search_near_after --
+ * Set the cursor position at the requested position or after it.
+ */
+int
+__wt_curhs_search_near_after(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+{
+ return (__curhs_search_near_helper(session, cursor, false));
+}
+
+/*
+ * __curhs_search_near_helper --
+ * Helper function to set the cursor position based on search criteria.
+ */
+static int
+__curhs_search_near_helper(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool before)
+{
+ WT_DECL_ITEM(srch_key);
+ WT_DECL_RET;
+ int cmp;
+
+ WT_RET(__wt_scr_alloc(session, 0, &srch_key));
+ WT_ERR(__wt_buf_set(session, srch_key, cursor->key.data, cursor->key.size));
+ WT_ERR(cursor->search_near(cursor, &cmp));
+ if (before) {
+ /*
+ * If we want to land on a key that is smaller or equal to the specified key, keep walking
+ * backwards as there may be content inserted concurrently.
+ */
+ if (cmp > 0) {
+ while ((ret = cursor->prev(cursor)) == 0) {
+ WT_STAT_CONN_INCR(session, cursor_skip_hs_cur_position);
+ WT_STAT_DATA_INCR(session, cursor_skip_hs_cur_position);
+ WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp));
+ /*
+ * Exit if we have found a key that is smaller than or equal to the specified key.
+ */
+ if (cmp <= 0)
+ break;
+ }
+ }
+ } else {
+ /*
+ * If we want to land on a key that is larger or equal to the specified key, keep walking
+ * forwards as there may be content inserted concurrently.
+ */
+ if (cmp < 0) {
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_STAT_CONN_INCR(session, cursor_skip_hs_cur_position);
+ WT_STAT_DATA_INCR(session, cursor_skip_hs_cur_position);
+ WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp));
+ /* Exit if we have found a key that is larger than or equal to the specified key. */
+ if (cmp >= 0)
+ break;
+ }
+ }
+ }
+
+err:
+ __wt_scr_free(session, &srch_key);
+ return (ret);
+}
+
+/*
* __curhs_search_near --
- * WT_CURSOR->search_near method for the hs cursor type.
+ * WT_CURSOR->search_near method for the history store cursor type.
*/
static int
__curhs_search_near(WT_CURSOR *cursor, int *exactp)
{
WT_CURSOR *file_cursor;
WT_CURSOR_HS *hs_cursor;
+ WT_DECL_ITEM(datastore_key);
WT_DECL_ITEM(srch_key);
WT_DECL_RET;
WT_SESSION_IMPL *session;
- int cmp;
- int exact;
+ wt_timestamp_t start_ts;
+ uint64_t counter;
+ uint32_t btree_id;
+ int exact, cmp;
hs_cursor = (WT_CURSOR_HS *)cursor;
file_cursor = hs_cursor->file_cursor;
*exactp = 0;
- cmp = 0;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, search_near, CUR2BT(file_cursor));
+ WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
WT_ERR(__wt_scr_alloc(session, 0, &srch_key));
/* At least we have the btree id set. */
WT_ASSERT(session, F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET));
WT_ERR(__wt_buf_set(session, srch_key, file_cursor->key.data, file_cursor->key.size));
/* Reset cursor if we get WT_NOTFOUND. */
- WT_ERR(__wt_hs_cursor_search_near(session, file_cursor, &exact));
+ WT_ERR(__curhs_file_cursor_search_near(session, file_cursor, &exact));
- /*
- * There are some key fields missing so we are searching a range of keys. Place the cursor at
- * the start of the range.
- */
- if (!F_ISSET(hs_cursor, WT_HS_CUR_COUNTER_SET)) {
+ if (exact >= 0) {
/*
- * If we raced with a history store insert, we may be two or more records away from our
- * target. Keep iterating forwards until we are on or past our target key.
- *
- * We can't use the cursor positioning helper that we use for regular reads since that will
- * place us at the end of a particular key/timestamp range whereas we want to be placed at
- * the beginning.
+ * We placed the file cursor before the search key. Try first to walk forwards to see if we
+ * can find a visible record. If nothing is visible, try to walk backwards.
*/
- if (exact < 0) {
- while ((ret = __wt_hs_cursor_next(session, file_cursor)) == 0) {
- WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
- if (cmp >= 0)
- break;
+ WT_ERR_NOTFOUND_OK(__curhs_next_visible(session, hs_cursor), true);
+ if (ret == WT_NOTFOUND) {
+ /*
+ * When walking backwards, first ensure we walk back to the specified btree or key space
+ * as we may have crossed the boundary. Do that in a loop as there may be content
+ * inserted concurrently.
+ */
+ while ((ret = __curhs_file_cursor_prev(session, file_cursor)) == 0) {
+ WT_ERR(
+ file_cursor->get_key(file_cursor, &btree_id, datastore_key, &start_ts, &counter));
+
+ /* We are back in the specified btree range. */
+ if (btree_id == hs_cursor->btree_id && F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
+ WT_ERR(
+ __wt_compare(session, NULL, datastore_key, hs_cursor->datastore_key, &cmp));
+
+ /* We are back in the specified key range. */
+ if (cmp == 0)
+ break;
+
+ /*
+ * We are now smaller than the key range, which indicates nothing is visible to
+ * us in the specified key range.
+ */
+ if (cmp < 0) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
+ }
+
+ /*
+ * We are now smaller than the btree range, which indicates nothing is visible to us
+ * in the specified btree range.
+ */
+ if (btree_id < hs_cursor->btree_id) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
}
+ WT_ERR(ret);
+ /*
+ * Keeping looking for the first visible update in the specified range when walking
+ * backwards.
+ */
+ WT_ERR(__curhs_prev_visible(session, hs_cursor));
/*
- * No entries greater than or equal to the key we searched for. Reset cursor if we get
- * WT_NOTFOUND.
+ * We can't find anything visible when first walking forwards so we must have found an
+ * update that is smaller than the specified key.
*/
+ *exactp = -1;
+ } else {
WT_ERR(ret);
-
- *exactp = cmp;
- } else
- *exactp = 1;
-
- WT_ERR(__curhs_next_visible(session, hs_cursor));
- }
- /* Search the closest match that is smaller or equal to the search key. */
- else {
+ /*
+ * We find an update when walking forwards. If initially we land on the same key as the
+ * specified key, exact will be 0 and we should return that. If it is not visible, we
+ * must have found a key that is larger than the specified key.
+ */
+ *exactp = exact;
+ }
+ } else {
/*
- * Because of the special visibility rules for the history store, a new key can appear in
- * between our search and the set of updates that we're interested in. Keep trying until we
- * find it.
- *
- * There may be no history store entries for the given btree id and record key if they have
- * been removed by rollback to stable.
- *
- * Note that we need to compare the raw key off the cursor to determine where we are in the
- * history store as opposed to comparing the embedded data store key since the ordering is
- * not guaranteed to be the same.
+ * We placed the file cursor after the search key. Try first to walk backwards to see if we
+ * can find a visible record. If nothing is visible, try to walk forwards.
*/
- if (exact > 0) {
+ WT_ERR_NOTFOUND_OK(__curhs_prev_visible(session, hs_cursor), true);
+ if (ret == WT_NOTFOUND) {
/*
- * It's possible that we may race with a history store insert for another key. So we may
- * be more than one record away the end of our target key/timestamp range. Keep
- * iterating backwards until we land on our key.
+ * When walking forwards, first ensure we walk back to the specified btree or key space
+ * as we may have crossed the boundary. Do that in a loop as there may be content
+ * inserted concurrently.
*/
- while ((ret = __wt_hs_cursor_prev(session, file_cursor)) == 0) {
- WT_STAT_CONN_DATA_INCR(session, cursor_skip_hs_cur_position);
-
- WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
- if (cmp <= 0)
- break;
+ while ((ret = __curhs_file_cursor_next(session, file_cursor)) == 0) {
+ WT_ERR(
+ file_cursor->get_key(file_cursor, &btree_id, datastore_key, &start_ts, &counter));
+
+ /* We are back in the specified btree range. */
+ if (btree_id == hs_cursor->btree_id && F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
+ WT_ERR(
+ __wt_compare(session, NULL, datastore_key, hs_cursor->datastore_key, &cmp));
+
+ /* We are back in the specified key range. */
+ if (cmp == 0)
+ break;
+
+ /*
+ * We are now larger than the key range, which indicates nothing is visible to
+ * us in the specified key range.
+ */
+ if (cmp > 0) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
+ }
+
+ /*
+ * We are now larger than the btree range, which indicates nothing is visible to us
+ * in the specified btree range.
+ */
+ if (btree_id > hs_cursor->btree_id) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
}
+ WT_ERR(ret);
+ /*
+ * Keeping looking for the first visible update in the specified range when walking
+ * forwards.
+ */
+ WT_ERR(__curhs_next_visible(session, hs_cursor));
+ /*
+ * We can't find anything visible when first walking backwards so we must have found an
+ * update that is larger than the specified key.
+ */
+ *exactp = 1;
+ } else {
+ WT_ERR(ret);
+ *exactp = exact;
+ }
+ }
- *exactp = cmp;
- } else
- *exactp = -1;
#ifdef HAVE_DIAGNOSTIC
- if (ret == 0) {
- WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
- WT_ASSERT(session, cmp <= 0);
- }
+ WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
+ WT_ASSERT(
+ session, (cmp == 0 && *exactp == 0) || (cmp < 0 && *exactp < 0) || (cmp > 0 && *exactp > 0));
#endif
- WT_ERR(__curhs_prev_visible(session, hs_cursor));
- }
+ __curhs_set_key_ptr(cursor, file_cursor);
+ __curhs_set_value_ptr(cursor, file_cursor);
if (0) {
err:
WT_TRET(cursor->reset(cursor));
}
+ __wt_scr_free(session, &datastore_key);
__wt_scr_free(session, &srch_key);
API_END_RET(session, ret);
}
/*
- * __curhs_get_key --
- * WT_CURSOR->get_key method for the hs cursor type.
- */
-static int
-__curhs_get_key(WT_CURSOR *cursor, ...)
-{
- WT_CURSOR *file_cursor;
- WT_CURSOR_HS *hs_cursor;
- WT_DECL_RET;
- va_list ap;
-
- hs_cursor = (WT_CURSOR_HS *)cursor;
- file_cursor = hs_cursor->file_cursor;
-
- va_start(ap, cursor);
- ret = file_cursor->get_key(file_cursor, va_arg(ap, uint32_t *), va_arg(ap, WT_ITEM **),
- va_arg(ap, wt_timestamp_t *), va_arg(ap, uint64_t *));
- va_end(ap);
-
- return (ret);
-}
-
-/*
- * __curhs_get_value --
- * WT_CURSOR->get_value method for the hs cursor type.
- */
-static int
-__curhs_get_value(WT_CURSOR *cursor, ...)
-{
- WT_CURSOR *file_cursor;
- WT_CURSOR_HS *hs_cursor;
- WT_DECL_RET;
- va_list ap;
-
- hs_cursor = (WT_CURSOR_HS *)cursor;
- file_cursor = hs_cursor->file_cursor;
-
- va_start(ap, cursor);
- ret = file_cursor->get_value(file_cursor, va_arg(ap, wt_timestamp_t *),
- va_arg(ap, wt_timestamp_t *), va_arg(ap, uint64_t *), va_arg(ap, WT_ITEM **));
- va_end(ap);
-
- return (ret);
-}
-
-/*
* __curhs_set_value --
- * WT_CURSOR->set_value method for the hs cursor type.
+ * WT_CURSOR->set_value method for the history store cursor type.
*/
static void
__curhs_set_value(WT_CURSOR *cursor, ...)
{
WT_CURSOR *file_cursor;
WT_CURSOR_HS *hs_cursor;
+ WT_ITEM *hs_val;
+ wt_timestamp_t start_ts;
+ wt_timestamp_t stop_ts;
+ uint64_t type;
va_list ap;
hs_cursor = (WT_CURSOR_HS *)cursor;
@@ -650,14 +775,20 @@ __curhs_set_value(WT_CURSOR *cursor, ...)
va_start(ap, cursor);
hs_cursor->time_window = *va_arg(ap, WT_TIME_WINDOW *);
- file_cursor->set_value(file_cursor, va_arg(ap, wt_timestamp_t), va_arg(ap, wt_timestamp_t),
- va_arg(ap, uint64_t), va_arg(ap, WT_ITEM *));
+ stop_ts = va_arg(ap, wt_timestamp_t);
+ start_ts = va_arg(ap, wt_timestamp_t);
+ type = va_arg(ap, uint64_t);
+ hs_val = va_arg(ap, WT_ITEM *);
+
+ file_cursor->set_value(file_cursor, stop_ts, start_ts, type, hs_val);
va_end(ap);
+
+ __curhs_set_value_ptr(cursor, file_cursor);
}
/*
* __curhs_insert --
- * WT_CURSOR->insert method for the hs cursor type.
+ * WT_CURSOR->insert method for the history store cursor type.
*/
static int
__curhs_insert(WT_CURSOR *cursor)
@@ -676,6 +807,12 @@ __curhs_insert(WT_CURSOR *cursor)
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, insert, CUR2BT(file_cursor));
+ /*
+ * Disable bulk loads into history store. This would normally occur when updating a record with
+ * a cursor however the history store doesn't use cursor update, so we do it here.
+ */
+ __wt_cursor_disable_bulk(session);
+
/* Allocate a tombstone only when there is a valid stop time point. */
if (WT_TIME_WINDOW_HAS_STOP(&hs_cursor->time_window)) {
/*
@@ -701,7 +838,6 @@ __curhs_insert(WT_CURSOR *cursor)
if (hs_tombstone != NULL) {
hs_tombstone->next = hs_upd;
hs_upd = hs_tombstone;
- hs_tombstone = NULL;
}
retry:
@@ -725,7 +861,7 @@ err:
/*
* __curhs_remove --
- * WT_CURSOR->remove method for the hs cursor type.
+ * WT_CURSOR->remove method for the history store cursor type.
*/
static int
__curhs_remove(WT_CURSOR *cursor)
@@ -734,9 +870,14 @@ __curhs_remove(WT_CURSOR *cursor)
WT_CURSOR_BTREE *cbt;
WT_CURSOR_HS *hs_cursor;
WT_DECL_RET;
+ WT_ITEM hs_key;
WT_SESSION_IMPL *session;
WT_UPDATE *hs_tombstone;
+ wt_timestamp_t hs_start_ts;
+ uint64_t hs_counter;
+ uint32_t hs_btree_id;
+ WT_CLEAR(hs_key);
hs_cursor = (WT_CURSOR_HS *)cursor;
file_cursor = hs_cursor->file_cursor;
cbt = (WT_CURSOR_BTREE *)file_cursor;
@@ -745,7 +886,9 @@ __curhs_remove(WT_CURSOR *cursor)
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, insert, CUR2BT(file_cursor));
/* Remove must be called with cursor positioned. */
- WT_ASSERT(session, F_ISSET(file_cursor, WT_CURSTD_KEY_INT));
+ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_KEY_INT));
+
+ WT_ERR(cursor->get_key(cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
/*
* Since we're using internal functions to modify the row structure, we need to manually set the
@@ -765,6 +908,7 @@ __curhs_remove(WT_CURSOR *cursor)
/* Invalidate the previous value but we will hold on to the position of the key. */
F_CLR(file_cursor, WT_CURSTD_VALUE_SET);
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
if (0) {
err:
@@ -777,7 +921,7 @@ err:
/*
* __curhs_update --
- * WT_CURSOR->update method for the hs cursor type.
+ * WT_CURSOR->update method for the history store cursor type.
*/
static int
__curhs_update(WT_CURSOR *cursor)
@@ -785,15 +929,11 @@ __curhs_update(WT_CURSOR *cursor)
WT_CURSOR *file_cursor;
WT_CURSOR_BTREE *cbt;
WT_CURSOR_HS *hs_cursor;
- WT_DECL_ITEM(hs_value);
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_UPDATE *hs_tombstone, *hs_upd;
bool retry;
- uint64_t hs_upd_type;
- wt_timestamp_t hs_durable_ts, hs_stop_durable_ts;
-
hs_cursor = (WT_CURSOR_HS *)cursor;
file_cursor = hs_cursor->file_cursor;
cbt = (WT_CURSOR_BTREE *)file_cursor;
@@ -814,34 +954,12 @@ __curhs_update(WT_CURSOR *cursor)
WT_ASSERT(session, !WT_TIME_WINDOW_IS_EMPTY(&hs_cursor->time_window));
WT_ASSERT(session, WT_TIME_WINDOW_HAS_STOP(&hs_cursor->time_window));
- /*
- * Ideally we want to check if we are positioned on the newest value for user key. However, we
- * can't check if the timestamp was set to WT_TS_MAX when we searched for the key. We can can a
- * next() on cursor to confirm there is no newer value but that would disturb our cursor. A more
- * expensive method would be to search again and verify.
- */
-
/* The tombstone to represent the stop time window. */
WT_ERR(__wt_upd_alloc_tombstone(session, &hs_tombstone, NULL));
hs_tombstone->start_ts = hs_cursor->time_window.stop_ts;
hs_tombstone->durable_ts = hs_cursor->time_window.durable_stop_ts;
hs_tombstone->txnid = hs_cursor->time_window.stop_txn;
- /* Modify the existing value with a new stop timestamp. */
-
- /* Allocate a buffer for the history store value. */
- WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
-
- /* Retrieve the existing update value and stop timestamp. */
- WT_ERR(file_cursor->get_value(
- file_cursor, &hs_stop_durable_ts, &hs_durable_ts, &hs_upd_type, hs_value));
- WT_ASSERT(session, hs_stop_durable_ts == WT_TS_MAX);
- WT_ASSERT(session, (uint8_t)hs_upd_type == WT_UPDATE_STANDARD);
-
- /* Use set_value method to pack the new value. */
- file_cursor->set_value(
- file_cursor, hs_cursor->time_window.stop_ts, hs_durable_ts, hs_upd_type, hs_value);
-
WT_ERR(__wt_upd_alloc(session, &file_cursor->value, WT_UPDATE_STANDARD, &hs_upd, NULL));
hs_upd->start_ts = hs_cursor->time_window.start_ts;
hs_upd->durable_ts = hs_cursor->time_window.durable_start_ts;
@@ -850,6 +968,11 @@ __curhs_update(WT_CURSOR *cursor)
/* Connect the tombstone to the update. */
hs_tombstone->next = hs_upd;
+ /*
+ * Since we're using internal functions to modify the row structure, we need to manually set the
+ * comparison to an exact match.
+ */
+ cbt->compare = 0;
/* Make the updates and if we fail, search and try again. */
while ((ret = __wt_hs_modify(cbt, hs_tombstone)) == WT_RESTART) {
WT_WITH_PAGE_INDEX(session, ret = __wt_hs_row_search(cbt, &file_cursor->key, false));
@@ -863,11 +986,13 @@ __curhs_update(WT_CURSOR *cursor)
WT_TRET(ret);
}
+ __curhs_set_key_ptr(cursor, file_cursor);
+ __curhs_set_value_ptr(cursor, file_cursor);
+
if (0) {
err:
__wt_free(session, hs_tombstone);
__wt_free(session, hs_upd);
- __wt_scr_free(session, &hs_value);
WT_TRET(cursor->reset(cursor));
}
API_END_RET(session, ret);
@@ -880,53 +1005,54 @@ err:
int
__wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface, __curhs_get_key, /* get-key */
- __curhs_get_value, /* get-value */
- __curhs_set_key, /* set-key */
- __curhs_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __curhs_next, /* next */
- __curhs_prev, /* prev */
- __curhs_reset, /* reset */
- __wt_cursor_notsup, /* search */
- __curhs_search_near, /* search-near */
- __curhs_insert, /* insert */
- __wt_cursor_modify_value_format_notsup, /* modify */
- __curhs_update, /* update */
- __curhs_remove, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curhs_close); /* close */
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __curhs_set_key, /* set-key */
+ __curhs_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curhs_next, /* next */
+ __curhs_prev, /* prev */
+ __curhs_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __curhs_search_near, /* search-near */
+ __curhs_insert, /* insert */
+ __wt_cursor_modify_value_format_notsup, /* modify */
+ __curhs_update, /* update */
+ __curhs_remove, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curhs_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_HS *hs_cursor;
WT_DECL_RET;
- WT_ITEM *datastore_key;
+ *cursorp = NULL;
WT_RET(__wt_calloc_one(session, &hs_cursor));
+ ++session->hs_cursor_counter;
cursor = (WT_CURSOR *)hs_cursor;
*cursor = iface;
cursor->session = (WT_SESSION *)session;
cursor->key_format = WT_HS_KEY_FORMAT;
cursor->value_format = WT_HS_VALUE_FORMAT;
+ WT_ERR(__wt_strdup(session, WT_HS_URI, &cursor->uri));
/* Open the file cursor for operations on the regular history store .*/
- WT_ERR(__hs_cursor_open_int(session, &hs_cursor->file_cursor));
+ WT_ERR(__curhs_file_cursor_open(session, &hs_cursor->file_cursor));
WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp));
WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
hs_cursor->btree_id = 0;
- datastore_key = &hs_cursor->datastore_key;
- WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
+ WT_ERR(__wt_scr_alloc(session, 0, &hs_cursor->datastore_key));
hs_cursor->flags = 0;
WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
if (0) {
err:
- WT_TRET(__curhs_close(cursor));
+ WT_TRET(cursor->close(cursor));
*cursorp = NULL;
}
return (ret);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 17e559cb52b..2269a925d3d 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -285,7 +285,7 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
* busy and then opens a different file (in this case, the HS file), it can deadlock with a
* thread waiting for the first file to drain from the eviction queue. See WT-5946 for details.
*/
- WT_RET(__wt_hs_cursor_cache(session));
+ WT_RET(__wt_curhs_cache(session));
if (conn->evict_server_running && __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
/*
* Cannot use WT_WITH_PASS_LOCK because this is a try lock. Fix when that is supported. We
@@ -2330,7 +2330,6 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
- WT_CURSOR *hs_cursor_saved;
WT_DECL_RET;
WT_TRACK_OP_DECL;
WT_TXN_GLOBAL *txn_global;
@@ -2349,21 +2348,12 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
txn_shared = WT_SESSION_TXN_SHARED(session);
/*
- * If we have a history store cursor, save it. This ensures that if eviction needs to access the
- * history store, it will get its own cursor, avoiding potential problems if it were to
- * reposition or reset a history store cursor that we're in the middle of using for something
- * else.
- */
- hs_cursor_saved = session->hs_cursor;
- session->hs_cursor = NULL;
-
- /*
* Before we enter the eviction generation, make sure this session has a cached history store
* cursor, otherwise we can deadlock with a session wanting exclusive access to a handle: that
* session will have a handle list write lock and will be waiting on eviction to drain, we'll be
* inside eviction waiting on a handle list read lock to open a history store cursor.
*/
- WT_ERR(__wt_hs_cursor_cache(session));
+ WT_ERR(__wt_curhs_cache(session));
/*
* It is not safe to proceed if the eviction server threads aren't setup yet.
@@ -2464,12 +2454,6 @@ err:
done:
WT_TRACK_OP_END(session);
- /* If the caller was using a history store cursor they should have closed it by now. */
- WT_ASSERT(session, session->hs_cursor == NULL);
-
- /* Restore the caller's history store cursor. */
- session->hs_cursor = hs_cursor_saved;
-
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 8919f3d6544..5d6954cb594 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -76,7 +76,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0;
FLD_SET(evict_flags, WT_EVICT_CALL_URGENT);
- WT_RET(__wt_hs_cursor_cache(session));
+ WT_RET(__wt_curhs_cache(session));
(void)__wt_atomic_addv32(&btree->evict_busy, 1);
ret = __wt_evict(session, ref, previous_state, evict_flags);
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
@@ -131,7 +131,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint8_t previous_state, uint32
/*
* Track history store pages being force evicted while holding a history store cursor open.
*/
- if (session->hs_cursor != NULL && WT_IS_HS(session->dhandle)) {
+ if (session->hs_cursor_counter > 0 && WT_IS_HS(session->dhandle)) {
force_evict_hs = true;
WT_STAT_CONN_INCR(session, cache_eviction_force_hs);
}
diff --git a/src/third_party/wiredtiger/src/history/hs_conn.c b/src/third_party/wiredtiger/src/history/hs_conn.c
index 161aeec0030..6163d0042c7 100644
--- a/src/third_party/wiredtiger/src/history/hs_conn.c
+++ b/src/third_party/wiredtiger/src/history/hs_conn.c
@@ -55,22 +55,20 @@ __hs_cleanup_las(WT_SESSION_IMPL *session)
/*
* __wt_hs_get_btree --
- * Get the history store btree. Open a history store cursor if needed to get the btree.
+ * Get the history store btree by opening a history store cursor.
*/
int
__wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
{
+ WT_CURSOR *hs_cursor;
WT_DECL_RET;
*hs_btreep = NULL;
- WT_RET(__wt_hs_cursor_open(session));
-
- *hs_btreep = CUR2BT(session->hs_cursor);
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ *hs_btreep = __wt_curhs_get_btree(hs_cursor);
WT_ASSERT(session, *hs_btreep != NULL);
-
- WT_TRET(__wt_hs_cursor_close(session));
-
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/history/hs_cursor.c b/src/third_party/wiredtiger/src/history/hs_cursor.c
index 432fe116a72..31da7b2cc9b 100644
--- a/src/third_party/wiredtiger/src/history/hs_cursor.c
+++ b/src/third_party/wiredtiger/src/history/hs_cursor.c
@@ -87,117 +87,39 @@ __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd)
}
/*
- * __hs_cursor_position_int --
- * Internal function to position a history store cursor at the end of a set of updates for a
- * given btree id, record key and timestamp.
+ * __wt_hs_upd_time_window --
+ * Get the underlying time window of the update history store cursor is positioned at.
*/
-static int
-__hs_cursor_position_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
+void
+__wt_hs_upd_time_window(WT_CURSOR *hs_cursor, WT_TIME_WINDOW **twp)
{
- WT_DECL_ITEM(srch_key);
- WT_DECL_RET;
- int cmp, exact;
-
- /* The session should be pointing at the history store btree. */
- WT_ASSERT(session, WT_IS_HS((S2BT(session))->dhandle));
-
- if (user_srch_key == NULL)
- WT_RET(__wt_scr_alloc(session, 0, &srch_key));
- else
- srch_key = user_srch_key;
-
- /*
- * Because of the special visibility rules for the history store, a new key can appear in
- * between our search and the set of updates that we're interested in. Keep trying until we find
- * it.
- *
- * There may be no history store entries for the given btree id and record key if they have been
- * removed by WT_CONNECTION::rollback_to_stable.
- *
- * Note that we need to compare the raw key off the cursor to determine where we are in the
- * history store as opposed to comparing the embedded data store key since the ordering is not
- * guaranteed to be the same.
- */
- cursor->set_key(cursor, btree_id, key, timestamp, UINT64_MAX);
- /* Copy the raw key before searching as a basis for comparison. */
- WT_ERR(__wt_buf_set(session, srch_key, cursor->key.data, cursor->key.size));
- WT_ERR(cursor->search_near(cursor, &exact));
- if (exact > 0) {
- /*
- * It's possible that we may race with a history store insert for another key. So we may be
- * more than one record away the end of our target key/timestamp range. Keep iterating
- * backwards until we land on our key.
- */
- while ((ret = cursor->prev(cursor)) == 0) {
- WT_STAT_CONN_DATA_INCR(session, cursor_skip_hs_cur_position);
+ WT_CURSOR_BTREE *hs_cbt;
- WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp));
- if (cmp <= 0)
- break;
- }
- }
-#ifdef HAVE_DIAGNOSTIC
- if (ret == 0) {
- WT_ERR(__wt_compare(session, NULL, &cursor->key, srch_key, &cmp));
- WT_ASSERT(session, cmp <= 0);
- }
-#endif
-err:
- if (user_srch_key == NULL)
- __wt_scr_free(session, &srch_key);
- return (ret);
+ hs_cbt = __wt_curhs_get_cbt(hs_cursor);
+ *twp = &hs_cbt->upd_value->tw;
}
/*
- * __wt_hs_cursor_position --
- * Position a history store cursor at the end of a set of updates for a given btree id, record
- * key and timestamp. There may be no history store entries for the given btree id and record
- * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional
- * argument to store the key that we used to position the cursor which can be used to assess
- * where the cursor is relative to it. The function executes with isolation level set as
- * WT_ISO_READ_UNCOMMITTED.
+ * __wt_hs_find_upd --
+ * Scan the history store for a record the btree cursor wants to position on. Create an update
+ * for the record and return to the caller.
*/
int
-__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
-{
- WT_DECL_RET;
-
- WT_WITH_BTREE(session, CUR2BT(cursor),
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __hs_cursor_position_int(session, cursor, btree_id, key, timestamp, user_srch_key)));
- return (ret);
-}
-
-/*
- * __hs_find_upd_int --
- * Internal helper to scan the history store for a record the btree cursor wants to position on.
- * Create an update for the record and return to the caller. The caller may choose to optionally
- * allow prepared updates to be returned regardless of whether prepare is being ignored
- * globally. Otherwise, a prepare conflict will be returned upon reading a prepared update.
- */
-static int
-__hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
- const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare,
- WT_ITEM *base_value_buf)
+__wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
+ const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, WT_ITEM *base_value_buf)
{
WT_CURSOR *hs_cursor;
- WT_CURSOR_BTREE *hs_cbt;
WT_DECL_ITEM(hs_value);
WT_DECL_ITEM(orig_hs_value_buf);
WT_DECL_RET;
WT_ITEM hs_key, recno_key;
WT_MODIFY_VECTOR modifies;
- WT_TXN *txn;
WT_TXN_SHARED *txn_shared;
WT_UPDATE *mod_upd;
- wt_timestamp_t durable_timestamp, durable_timestamp_tmp, hs_start_ts, hs_start_ts_tmp;
+ wt_timestamp_t durable_timestamp, durable_timestamp_tmp;
wt_timestamp_t hs_stop_durable_ts, hs_stop_durable_ts_tmp, read_timestamp;
- uint64_t hs_counter, hs_counter_tmp, upd_type_full;
- uint32_t hs_btree_id;
+ uint64_t upd_type_full;
uint8_t *p, recno_key_buf[WT_INTPACK64_MAXSIZE], upd_type;
- int cmp;
bool upd_found;
hs_cursor = NULL;
@@ -205,15 +127,11 @@ __hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
orig_hs_value_buf = NULL;
WT_CLEAR(hs_key);
__wt_modify_vector_init(session, &modifies);
- txn = session->txn;
txn_shared = WT_SESSION_TXN_SHARED(session);
upd_found = false;
WT_STAT_CONN_DATA_INCR(session, cursor_search_hs);
- hs_cursor = session->hs_cursor;
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
-
/* Row-store key is as passed to us, create the column-store key as needed. */
WT_ASSERT(
session, (key == NULL && recno != WT_RECNO_OOB) || (key != NULL && recno == WT_RECNO_OOB));
@@ -226,70 +144,29 @@ __hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
key->size = WT_PTRDIFF(p, recno_key_buf);
}
- /* Allocate buffer for the history store value. */
- WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
/*
* After positioning our cursor, we're stepping backwards to find the correct update. Since the
* timestamp is part of the key, our cursor needs to go from the newest record (further in the
* history store) to the oldest (earlier in the history store) for a given key.
- */
- read_timestamp = allow_prepare ? txn->prepare_timestamp : txn_shared->read_timestamp;
-
- /*
+ *
* A reader without a timestamp should read the largest timestamp in the range, however cursor
* search near if given a 0 timestamp will place at the top of the range and hide the records
* below it. As such we need to adjust a 0 timestamp to the timestamp max value.
*/
- if (read_timestamp == WT_TS_NONE)
- read_timestamp = WT_TS_MAX;
+ read_timestamp =
+ txn_shared->read_timestamp == WT_TS_NONE ? WT_TS_MAX : txn_shared->read_timestamp;
- WT_ERR_NOTFOUND_OK(
- __wt_hs_cursor_position(session, hs_cursor, btree_id, key, read_timestamp, NULL), true);
+ hs_cursor->set_key(hs_cursor, 4, btree_id, key, read_timestamp, UINT64_MAX);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_cursor), true);
if (ret == WT_NOTFOUND) {
ret = 0;
goto done;
}
- for (;; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
- WT_ERR_NOTFOUND_OK(ret, true);
- /* If we hit the end of the table, let's get out of here. */
- if (ret == WT_NOTFOUND) {
- ret = 0;
- goto done;
- }
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
-
- /* Stop before crossing over to the next btree */
- if (hs_btree_id != btree_id)
- goto done;
-
- /*
- * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
- * have crossed over the desired key and not found the record we are looking for.
- */
- WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
- if (cmp != 0)
- goto done;
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
- WT_STAT_CONN_DATA_INCR(session, cursor_prev_hs_tombstone);
- continue;
- }
- /*
- * If the stop time point of a record is visible to us, we won't be able to see anything for
- * this entire key. Just jump straight to the end.
- */
- if (__wt_txn_tw_stop_visible(session, &hs_cbt->upd_value->tw))
- goto done;
- /* If the start time point is visible to us, let's return that record. */
- if (__wt_txn_tw_start_visible(session, &hs_cbt->upd_value->tw))
- break;
- }
+ /* Allocate buffer for the history store value. */
+ WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
WT_ERR(hs_cursor->get_value(
hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &upd_type_full, hs_value));
upd_type = (uint8_t)upd_type_full;
@@ -320,6 +197,8 @@ __hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
* visibility checks when reading in order to construct the modify chain, so we can create
* the value we expect.
*/
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+
while (upd_type == WT_UPDATE_MODIFY) {
WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL));
WT_ERR(__wt_modify_vector_push(&modifies, mod_upd));
@@ -330,7 +209,7 @@ __hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
* update here we fall back to the datastore version. If its timestamp doesn't match our
* timestamp then we return not found.
*/
- WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true);
+ WT_ERR_NOTFOUND_OK(hs_cursor->next(hs_cursor), true);
if (ret == WT_NOTFOUND) {
/*
* Fallback to the provided value as the base value.
@@ -344,47 +223,6 @@ __hs_find_upd_int(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
upd_type = WT_UPDATE_STANDARD;
break;
}
- hs_start_ts_tmp = WT_TS_NONE;
- /*
- * Make sure we use the temporary variants of these variables. We need to retain the
- * timestamps of the original modify we saw.
- *
- * We keep looking back into history store until we find a base update to apply the
- * reverse deltas on top of.
- */
- WT_ERR(hs_cursor->get_key(
- hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts_tmp, &hs_counter_tmp));
-
- if (hs_btree_id != btree_id) {
- /* Fallback to the provided value as the base value. */
- orig_hs_value_buf = hs_value;
- hs_value = base_value_buf;
- upd_type = WT_UPDATE_STANDARD;
- break;
- }
-
- WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
-
- if (cmp != 0) {
- /* Fallback to the provided value as the base value. */
- orig_hs_value_buf = hs_value;
- hs_value = base_value_buf;
- upd_type = WT_UPDATE_STANDARD;
- break;
- }
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally
- * visible fall back to the base value. This is possible in scenarios where the latest
- * updates are aborted by RTS according to stable timestamp.
- */
- if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
- /* Fallback to the provided value as the base value. */
- orig_hs_value_buf = hs_value;
- hs_value = base_value_buf;
- upd_type = WT_UPDATE_STANDARD;
- break;
- }
WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp,
&upd_type_full, hs_value));
@@ -440,26 +278,8 @@ err:
WT_ASSERT(session, ret != WT_NOTFOUND);
- return (ret);
-}
-
-/*
- * __wt_hs_find_upd --
- * Scan the history store for a record.
- */
-int
-__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno,
- WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *base_value_buf)
-{
- WT_BTREE *btree;
- WT_DECL_RET;
-
- btree = S2BT(session);
+ if (hs_cursor != NULL)
+ WT_TRET(hs_cursor->close(hs_cursor));
- WT_RET(__wt_hs_cursor_open(session));
- WT_WITH_BTREE(session, CUR2BT(session->hs_cursor),
- (ret = __hs_find_upd_int(
- session, btree->id, key, value_format, recno, upd_value, allow_prepare, base_value_buf)));
- WT_TRET(__wt_hs_cursor_close(session));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 318804eb7e1..6f523d49089 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -11,8 +11,7 @@
static int __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
uint32_t btree_id, const WT_ITEM *key, bool reinsert);
static int __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- WT_BTREE *btree, const WT_ITEM *key, wt_timestamp_t ts, uint64_t *hs_counter,
- const WT_ITEM *srch_key);
+ WT_BTREE *btree, const WT_ITEM *key, wt_timestamp_t ts, uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -61,100 +60,17 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree)
}
/*
- * __hs_insert_record_with_btree_int --
- * Internal helper for inserting history store records. If this call is successful, the cursor
- * parameter will be positioned on the newly inserted record. Otherwise, it will be reset.
- */
-static int
-__hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint64_t btree_id,
- const WT_ITEM *key, const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw,
- uint64_t counter)
-{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_UPDATE *hs_upd, *upd_local;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- hs_upd = upd_local = NULL;
-
- /* The session should be pointing at the history store btree. */
- WT_ASSERT(session, WT_IS_HS((S2BT(session))->dhandle));
-
- /*
- * Use WT_CURSOR.set_key and WT_CURSOR.set_value to create key and value items, then use them to
- * create an update chain for a direct insertion onto the history store page.
- */
- cursor->set_key(cursor, btree_id, key, tw->start_ts, counter);
- cursor->set_value(cursor, tw->durable_stop_ts, tw->durable_start_ts, (uint64_t)type, hs_value);
-
- /* Allocate a tombstone only when there is a valid stop time point. */
- if (WT_TIME_WINDOW_HAS_STOP(tw)) {
- /*
- * Insert a delete record to represent stop time point for the actual record to be inserted.
- * Set the stop time point as the commit time point of the history store delete record.
- */
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
- hs_upd->start_ts = tw->stop_ts;
- hs_upd->durable_ts = tw->durable_stop_ts;
- hs_upd->txnid = tw->stop_txn;
- }
-
- /*
- * Append to the delete record, the actual record to be inserted into the history store. Set the
- * current update start time point as the commit time point to the history store record.
- */
- WT_ERR(__wt_upd_alloc(session, &cursor->value, WT_UPDATE_STANDARD, &upd_local, NULL));
- upd_local->start_ts = tw->start_ts;
- upd_local->durable_ts = tw->durable_start_ts;
- upd_local->txnid = tw->start_txn;
-
- /* Insert the standard update as next update if there is a tombstone. */
- if (hs_upd != NULL)
- hs_upd->next = upd_local;
- else
- hs_upd = upd_local;
-
- /* Search the page and insert the updates. */
- WT_WITH_PAGE_INDEX(session, ret = __wt_hs_row_search(cbt, &cursor->key, true));
- WT_ERR(ret);
- WT_ERR(__wt_hs_modify(cbt, hs_upd));
-
- /*
- * Since the two updates (tombstone and the standard) will reconcile into a single entry, we are
- * incrementing the history store insert statistic by one.
- */
- WT_STAT_CONN_DATA_INCR(session, cache_hs_insert);
-
-err:
- if (ret != 0) {
- __wt_free_update_list(session, &hs_upd);
-
- /*
- * We did a row search, release the cursor so that the page doesn't continue being held.
- *
- * If we were successful, do NOT reset the cursor. We may want to make use of its position
- * later to remove timestamped entries.
- */
- cursor->reset(cursor);
- }
-
- return (ret);
-}
-
-/*
- * __hs_insert_record_with_btree --
+ * __hs_insert_record --
* A helper function to insert the record into the history store including stop time point.
- * Should be called with session's btree switched to the history store.
*/
static int
-__hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
- const WT_ITEM *key, const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw)
+__hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key,
+ const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw)
{
#ifdef HAVE_DIAGNOSTIC
WT_CURSOR_BTREE *hs_cbt;
#endif
WT_DECL_ITEM(hs_key);
- WT_DECL_ITEM(srch_key);
#ifdef HAVE_DIAGNOSTIC
WT_DECL_ITEM(existing_val);
#endif
@@ -164,37 +80,24 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
wt_timestamp_t durable_timestamp_diag;
wt_timestamp_t hs_stop_durable_ts_diag;
uint64_t upd_type_full_diag;
+ int cmp;
#endif
uint64_t counter, hs_counter;
uint32_t hs_btree_id;
- int cmp;
counter = 0;
/* Allocate buffers for the history store and search key. */
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
- WT_ERR(__wt_scr_alloc(session, 0, &srch_key));
#ifdef HAVE_DIAGNOSTIC
/* Allocate buffer for the existing history store value for the same key. */
WT_ERR(__wt_scr_alloc(session, 0, &existing_val));
- hs_cbt = (WT_CURSOR_BTREE *)cursor;
+ hs_cbt = __wt_curhs_get_cbt(cursor);
#endif
- /*
- * The session should be pointing at the history store btree since this is the one that we'll be
- * inserting into. The btree parameter that we're passing in should is the btree that the
- * history store content is associated with (this is where the btree id part of the history
- * store key comes from).
- */
- WT_ASSERT(session, WT_IS_HS((S2BT(session))->dhandle));
- WT_ASSERT(session, !WT_IS_HS(btree->dhandle));
-
- /*
- * Disable bulk loads into history store. This would normally occur when updating a record with
- * a cursor however the history store doesn't use cursor update, so we do it here.
- */
- __wt_cursor_disable_bulk(session);
+ /* Sanity check that the btree is not a history store btree. */
+ WT_ASSERT(session, !WT_IS_HS(btree));
/*
* Only deltas or full updates should be written to the history store. More specifically, we
@@ -207,43 +110,33 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
* timestamp. Otherwise the newly inserting history store record may fall behind the existing
* one can lead to wrong order.
*/
- WT_ERR_NOTFOUND_OK(
- __wt_hs_cursor_position(session, cursor, btree->id, key, tw->start_ts, srch_key), true);
+ cursor->set_key(cursor, 4, btree->id, key, tw->start_ts, UINT64_MAX);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, cursor), true);
+
if (ret == 0) {
WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
- /*
- * Check the whether the existing record is also from the same timestamp.
- *
- * Verify simple checks first to confirm whether the retrieved update same or not before
- * performing the expensive key comparison.
- */
- if (hs_btree_id == btree->id && tw->start_ts == hs_start_ts) {
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
#ifdef HAVE_DIAGNOSTIC
- if (cmp == 0) {
- WT_ERR(cursor->get_value(cursor, &hs_stop_durable_ts_diag, &durable_timestamp_diag,
- &upd_type_full_diag, existing_val));
- WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
- /*
- * Check if the existing HS value is same as the new value we are about to insert.
- * We can skip this check if the existing value has a globally visible stop time,
- * i.e., the value has been deleted from the HS.
- */
- if (cmp == 0)
- WT_ASSERT(session,
- (WT_TIME_WINDOW_HAS_STOP(&hs_cbt->upd_value->tw) &&
- __wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) ||
- tw->start_txn == WT_TXN_NONE ||
- tw->start_txn != hs_cbt->upd_value->tw.start_txn ||
- tw->start_ts != hs_cbt->upd_value->tw.start_ts);
- counter = hs_counter + 1;
- }
-#else
+ if (tw->start_ts == hs_start_ts) {
+ WT_ERR(cursor->get_value(cursor, &hs_stop_durable_ts_diag, &durable_timestamp_diag,
+ &upd_type_full_diag, existing_val));
+ WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
+ /*
+ * We shouldn't be inserting the same value again for the key unless coming from a
+ * different transaction. If the updates are from the same transaction, the start
+ * timestamp for each update should be different.
+ */
if (cmp == 0)
- counter = hs_counter + 1;
-#endif
+ WT_ASSERT(session,
+ tw->start_txn == WT_TXN_NONE ||
+ tw->start_txn != hs_cbt->upd_value->tw.start_txn ||
+ tw->start_ts != hs_cbt->upd_value->tw.start_ts);
+ counter = hs_counter + 1;
}
+#else
+ if (tw->start_ts == hs_start_ts)
+ counter = hs_counter + 1;
+#endif
}
/*
@@ -251,10 +144,20 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
* updates, we should remove them and reinsert them at the current timestamp.
*/
if (tw->start_ts != WT_TS_NONE) {
- WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true);
+ /*
+ * If there were no keys equal to or less than our target key, we would have received
+ * WT_NOTFOUND. In that case we need to search again with a higher timestamp as the cursor
+ * would not be positioned correctly.
+ */
+ if (ret == 0)
+ WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
+ else {
+ cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
+ }
if (ret == 0)
WT_ERR(__hs_fixup_out_of_order_from_pos(
- session, cursor, btree, key, tw->start_ts, &counter, srch_key));
+ session, cursor, btree, key, tw->start_ts, &counter));
}
#ifdef HAVE_DIAGNOSTIC
@@ -270,36 +173,20 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
}
}
#endif
- /* The tree structure can change while we try to insert the mod list, retry if that happens. */
- while ((ret = __hs_insert_record_with_btree_int(
- session, cursor, btree->id, key, type, hs_value, tw, counter)) == WT_RESTART)
- WT_STAT_CONN_DATA_INCR(session, cache_hs_insert_restart);
+
+ /* Insert the new record now. */
+ cursor->set_key(cursor, 4, btree->id, key, tw->start_ts, counter);
+ cursor->set_value(
+ cursor, tw, tw->durable_stop_ts, tw->durable_start_ts, (uint64_t)type, hs_value);
+ WT_ERR(cursor->insert(cursor));
+ WT_STAT_CONN_INCR(session, cache_hs_insert);
+ WT_STAT_DATA_INCR(session, cache_hs_insert);
+
err:
#ifdef HAVE_DIAGNOSTIC
__wt_scr_free(session, &existing_val);
#endif
__wt_scr_free(session, &hs_key);
- __wt_scr_free(session, &srch_key);
- /* We did a row search, release the cursor so that the page doesn't continue being held. */
- cursor->reset(cursor);
-
- return (ret);
-}
-
-/*
- * __hs_insert_record --
- * Temporarily switches to history store btree and calls the helper routine to insert records.
- */
-static int
-__hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key,
- const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw)
-{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- WT_WITH_BTREE(session, CUR2BT(cbt),
- ret = __hs_insert_record_with_btree(session, cursor, btree, key, type, hs_value, tw));
return (ret);
}
@@ -346,8 +233,8 @@ __hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies,
int
__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
+ WT_BTREE *btree, *hs_btree;
+ WT_CURSOR *hs_cursor;
WT_DECL_ITEM(full_value);
WT_DECL_ITEM(key);
WT_DECL_ITEM(modify_value);
@@ -372,10 +259,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
bool enable_reverse_modify, hs_inserted, squashed, ts_updates_in_hs;
btree = S2BT(session);
- cursor = session->hs_cursor;
prev_upd = NULL;
insert_cnt = 0;
WT_TIME_WINDOW_INIT(&tw);
+
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+
__wt_modify_vector_init(session, &modifies);
if (!btree->hs_entries)
@@ -560,13 +450,15 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd &&
!F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
/* We can only delete history store entries that have timestamps. */
- WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
} else if (first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
(list->ins == NULL || ts_updates_in_hs)) {
- WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
}
@@ -704,13 +596,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
enable_reverse_modify &&
__wt_calc_modify(session, prev_full_value, full_value, prev_full_value->size / 10,
entries, &nentries) == 0) {
- WT_ERR(__wt_modify_pack(cursor, entries, nentries, &modify_value));
+ WT_ERR(__wt_modify_pack(hs_cursor, entries, nentries, &modify_value));
WT_ERR(__hs_insert_record(
- session, cursor, btree, key, WT_UPDATE_MODIFY, modify_value, &tw));
+ session, hs_cursor, btree, key, WT_UPDATE_MODIFY, modify_value, &tw));
__wt_scr_free(session, &modify_value);
} else
WT_ERR(__hs_insert_record(
- session, cursor, btree, key, WT_UPDATE_STANDARD, full_value, &tw));
+ session, hs_cursor, btree, key, WT_UPDATE_STANDARD, full_value, &tw));
/* Flag the update as now in the history store. */
F_SET(upd, WT_UPDATE_HS);
@@ -730,7 +622,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
WT_STAT_CONN_SET(session, cache_hs_ondisk, hs_size);
- max_hs_size = CUR2BT(cursor)->file_max;
+ hs_btree = __wt_curhs_get_btree(hs_cursor);
+ max_hs_size = hs_btree->file_max;
if (max_hs_size != 0 && (uint64_t)hs_size > max_hs_size)
WT_ERR_PANIC(session, WT_PANIC,
"WiredTigerHS: file size of %" PRIu64 " exceeds maximum size %" PRIu64, (uint64_t)hs_size,
@@ -747,95 +640,39 @@ err:
__wt_modify_vector_free(&modifies);
__wt_scr_free(session, &full_value);
__wt_scr_free(session, &prev_full_value);
+
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
/*
- * __hs_delete_key_from_ts_int --
- * Internal helper for deleting history store content of a given key from a timestamp.
+ * __wt_hs_delete_key_from_ts --
+ * Delete history store content of a given key from a timestamp.
*/
-static int
-__hs_delete_key_from_ts_int(
- WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
+int
+__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
{
- WT_CURSOR *hs_cursor;
- WT_DECL_ITEM(srch_key);
WT_DECL_RET;
- WT_ITEM hs_key;
- wt_timestamp_t hs_start_ts;
- uint64_t hs_counter;
- uint32_t hs_btree_id;
- int cmp, exact;
-
- /* The session should be pointing at the history store btree. */
- WT_ASSERT(session, WT_IS_HS((S2BT(session))->dhandle));
+ bool hs_read_committed;
- hs_cursor = session->hs_cursor;
- WT_RET(__wt_scr_alloc(session, 0, &srch_key));
+ hs_read_committed = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ if (!hs_read_committed)
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- hs_cursor->set_key(hs_cursor, btree_id, key, ts, 0);
- WT_ERR(__wt_buf_set(session, srch_key, hs_cursor->key.data, hs_cursor->key.size));
- WT_ERR_NOTFOUND_OK(__wt_hs_cursor_search_near(session, hs_cursor, &exact), true);
+ hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, hs_cursor), true);
/* Empty history store is fine. */
- if (ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND) {
+ ret = 0;
goto done;
- /*
- * If we raced with a history store insert, we may be two or more records away from our target.
- * Keep iterating forwards until we are on or past our target key.
- *
- * We can't use the cursor positioning helper that we use for regular reads since that will
- * place us at the end of a particular key/timestamp range whereas we want to be placed at the
- * beginning.
- */
- if (exact < 0) {
- while ((ret = __wt_hs_cursor_next(session, hs_cursor)) == 0) {
- WT_ERR(__wt_compare(session, NULL, &hs_cursor->key, srch_key, &cmp));
- if (cmp >= 0)
- break;
- }
- /* No entries greater than or equal to the key we searched for. */
- WT_ERR_NOTFOUND_OK(ret, true);
- if (ret == WT_NOTFOUND)
- goto done;
}
- /* Bailing out here also means we have no history store records for our key. */
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
- if (hs_btree_id != btree_id)
- goto done;
- WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
- if (cmp != 0)
- goto done;
- WT_ASSERT(session, ts == WT_TS_NONE || hs_start_ts != WT_TS_NONE);
WT_ERR(__hs_delete_key_from_pos(session, hs_cursor, btree_id, key, reinsert));
done:
- ret = 0;
err:
- __wt_scr_free(session, &srch_key);
- return (ret);
-}
-
-/*
- * __wt_hs_delete_key_from_ts --
- * Delete history store content of a given key from a timestamp.
- */
-int
-__wt_hs_delete_key_from_ts(
- WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
-{
- WT_DECL_RET;
-
- /* If the operation can't open new handles, it should have figured that out before here. */
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
-
- /* The tree structure can change while we try to insert the mod list, retry if that happens. */
- do {
- WT_WITH_BTREE(session, CUR2BT(session->hs_cursor),
- (ret = __hs_delete_key_from_ts_int(session, btree_id, key, ts, reinsert)));
- if (ret == WT_RESTART)
- WT_STAT_CONN_DATA_INCR(session, cache_hs_insert_restart);
- } while (ret == WT_RESTART);
-
+ if (!hs_read_committed)
+ F_CLR(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
return (ret);
}
@@ -847,31 +684,29 @@ __wt_hs_delete_key_from_ts(
*/
static int
__hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_BTREE *btree,
- const WT_ITEM *key, wt_timestamp_t ts, uint64_t *counter, const WT_ITEM *srch_key)
+ const WT_ITEM *key, wt_timestamp_t ts, uint64_t *counter)
{
- WT_CURSOR *insert_cursor;
+ WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW tw;
- WT_UPDATE *tombstone;
- wt_timestamp_t hs_ts, hs_start_durable_ts, hs_stop_durable_ts;
+ WT_TIME_WINDOW tw, hs_insert_tw;
+ wt_timestamp_t hs_ts;
uint64_t hs_counter, hs_upd_type;
uint32_t hs_btree_id;
+#ifdef HAVE_DIAGNOSTIC
int cmp;
+#endif
char ts_string[5][WT_TS_INT_STRING_SIZE];
- const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
- insert_cursor = NULL;
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
+ hs_insert_cursor = NULL;
+ hs_cbt = __wt_curhs_get_cbt(hs_cursor);
WT_CLEAR(hs_key);
WT_CLEAR(hs_value);
- WT_TIME_WINDOW_INIT(&tw);
- tombstone = NULL;
-
- /* The session should be pointing at the history store btree. */
- WT_ASSERT(session, WT_IS_HS((S2BT(session))->dhandle));
+#ifndef HAVE_DIAGNOSTIC
+ WT_UNUSED(key);
+#endif
/*
* Position ourselves at the beginning of the key range that we may have to fixup. Prior to
* getting here, we've positioned our cursor at the end of a key/timestamp range and then done a
@@ -881,15 +716,15 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* to keep doing "next" until we've got a key greater than the one we attempted to position
* ourselves with.
*/
- for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
- /*
- * Prior to getting here, we've done a "search near" on our key for the timestamp we're
- * inserting and then a "next". In the regular case, our cursor will be positioned on the
- * next key and we'll break out of the first iteration in one of the conditions below.
- */
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ /* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- WT_ERR(__wt_compare(session, NULL, &hs_cursor->key, srch_key, &cmp));
- if (cmp > 0)
+ WT_ASSERT(session, hs_btree_id == btree->id);
+#ifdef HAVE_DIAGNOSTIC
+ WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
+ WT_ASSERT(session, cmp == 0);
+#endif
+ if (hs_ts > ts)
break;
}
if (ret == WT_NOTFOUND)
@@ -916,27 +751,14 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* 2 foo 3 2 ccc
* 2 foo 3 3 ddd
*/
- for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
- /*
- * Prior to getting here, we've done a "search near" on our key for the timestamp we're
- * inserting and then a "next". In the regular case, our cursor will be positioned on the
- * next key and we'll break out of the first iteration in one of the conditions below.
- */
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ /* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- if (hs_btree_id != btree->id)
- break;
-
+ WT_ASSERT(session, hs_btree_id == btree->id);
+#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
- if (cmp != 0)
- break;
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
- WT_STAT_CONN_DATA_INCR(session, cursor_next_hs_tombstone);
- continue;
- }
+ WT_ASSERT(session, cmp == 0);
+#endif
/*
* If we got here, we've got out-of-order updates in the history store.
*
@@ -950,11 +772,8 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* Don't incur the overhead of opening this new cursor unless we need it. In the regular
* case, we'll never get here.
*/
- if (insert_cursor == NULL) {
- WT_WITHOUT_DHANDLE(session,
- ret = __wt_open_cursor(session, WT_HS_URI, NULL, open_cursor_cfg, &insert_cursor));
- WT_ERR(ret);
- }
+ if (hs_insert_cursor == NULL)
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_insert_cursor));
/*
* If these history store records are resolved prepared updates, their durable timestamps
@@ -973,47 +792,38 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
__wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
__wt_timestamp_to_string(ts, ts_string[4]));
- tw.start_ts = tw.durable_start_ts = ts;
- tw.start_txn = hs_cbt->upd_value->tw.start_txn;
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts;
+ hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
/*
* We're going to be inserting something immediately after with the same timestamp. Either
* another moved update OR the update itself that triggered the correction. In either case,
* we should preserve the stop transaction id.
*/
- tw.stop_ts = tw.durable_stop_ts = ts;
- tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts;
+ hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
/* Extract the underlying value for reinsertion. */
WT_ERR(hs_cursor->get_value(
- hs_cursor, &hs_stop_durable_ts, &hs_start_durable_ts, &hs_upd_type, &hs_value));
+ hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
- /* Reinsert entry with earlier timestamp. */
- while ((ret = __hs_insert_record_with_btree_int(session, insert_cursor, btree->id, key,
- (uint8_t)hs_upd_type, &hs_value, &tw, *counter)) == WT_RESTART)
- ;
- WT_ERR(ret);
+ /* Insert the value back with different timestamps. */
+ hs_insert_cursor->set_key(hs_insert_cursor, 4, btree->id, &hs_key, ts, *counter);
+ hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, hs_insert_tw.durable_stop_ts,
+ hs_insert_tw.durable_start_ts, (uint64_t)hs_upd_type, &hs_value);
+ WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
++(*counter);
- /* Delete entry with higher timestamp. */
- hs_cbt->compare = 0;
- WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, NULL));
- tombstone->txnid = WT_TXN_NONE;
- tombstone->start_ts = tombstone->durable_ts = WT_TS_NONE;
- while ((ret = __wt_hs_modify(hs_cbt, tombstone)) == WT_RESTART) {
- WT_WITH_PAGE_INDEX(session, ret = __wt_hs_row_search(hs_cbt, &hs_cursor->key, false));
- WT_ERR(ret);
- }
- WT_ERR(ret);
- tombstone = NULL;
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_fixup_move);
+ /* Delete the entry with higher timestamp. */
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ WT_STAT_CONN_INCR(session, cache_hs_order_fixup_move);
+ WT_STAT_DATA_INCR(session, cache_hs_order_fixup_move);
}
if (ret == WT_NOTFOUND)
ret = 0;
err:
- __wt_free(session, tombstone);
- if (insert_cursor != NULL)
- insert_cursor->close(insert_cursor);
+ if (hs_insert_cursor != NULL)
+ hs_insert_cursor->close(hs_insert_cursor);
return (ret);
}
@@ -1027,26 +837,21 @@ static int
__hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
const WT_ITEM *key, bool reinsert)
{
- WT_CURSOR *insert_cursor;
+ WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW tw;
- WT_UPDATE *upd;
+ WT_TIME_WINDOW hs_insert_tw;
wt_timestamp_t durable_timestamp, hs_start_ts, hs_stop_durable_ts;
uint64_t hs_counter, hs_insert_counter, hs_upd_type;
uint32_t hs_btree_id;
- int cmp;
- const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
+ hs_cbt = __wt_curhs_get_cbt(hs_cursor);
hs_insert_counter = 0;
WT_CLEAR(hs_key);
WT_CLEAR(hs_value);
- WT_TIME_WINDOW_INIT(&tw);
- upd = NULL;
- insert_cursor = NULL;
+ hs_insert_cursor = NULL;
if (reinsert) {
/*
* Determine the starting value of our counter, i.e. highest counter value of the timestamp
@@ -1056,90 +861,60 @@ __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_
* The cursor will also be positioned at the start of the range that we wish to start
* inserting.
*/
- WT_WITHOUT_DHANDLE(session,
- ret = __wt_open_cursor(session, WT_HS_URI, NULL, open_cursor_cfg, &insert_cursor));
+ WT_WITHOUT_DHANDLE(session, ret = __wt_curhs_open(session, NULL, &hs_insert_cursor));
WT_ERR(ret);
- F_SET(insert_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
- WT_ERR_NOTFOUND_OK(
- __wt_hs_cursor_position(session, insert_cursor, btree_id, key, WT_TS_NONE, NULL), true);
+ F_SET(hs_insert_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ hs_insert_cursor->set_key(hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, UINT64_MAX);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_insert_cursor), true);
if (ret == WT_NOTFOUND) {
hs_insert_counter = 0;
ret = 0;
} else {
- WT_ERR(insert_cursor->get_key(
- insert_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_insert_counter));
+ WT_ERR(hs_insert_cursor->get_key(
+ hs_insert_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_insert_counter));
+ WT_ASSERT(session, hs_start_ts == WT_TS_NONE);
/*
- * Increment the hs counter that we'll be using to insert with to avoid overwriting the
- * record we just found.
+ * Increment the history store counter that we'll be using to insert with to avoid
+ * overwriting the record we just found.
*/
hs_insert_counter++;
}
}
/* Begin iterating over the range of entries we expect to replace. */
- for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
- /*
- * If the btree id or key isn't ours, that means that we've hit the end of the key range and
- * that there is no more history store content for this key.
- */
- if (hs_btree_id != btree_id)
- break;
- WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
- if (cmp != 0)
- break;
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
- WT_STAT_CONN_DATA_INCR(session, cursor_next_hs_tombstone);
- continue;
- }
-
- /*
- * Once we reinsert the entry below, we're not allowed to fail otherwise we'll be leaving
- * our history store an invalid state. Anything that can potentially fail, such as heap
- * allocation of the tombstone that we'll be using to remove the old value, should be
- * performed before reinsertion.
- */
- WT_ERR(__wt_upd_alloc_tombstone(session, &upd, NULL));
if (reinsert) {
WT_ERR(hs_cursor->get_value(
hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &hs_upd_type, &hs_value));
- tw.start_ts = tw.durable_start_ts = WT_TS_NONE;
- tw.start_txn = hs_cbt->upd_value->tw.start_txn;
+ /* Reinsert entry with zero timestamp. */
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = WT_TS_NONE;
+ hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
+
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = WT_TS_NONE;
+ hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
- tw.stop_ts = tw.durable_stop_ts = WT_TS_NONE;
- tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
+ hs_insert_cursor->set_key(
+ hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, hs_insert_counter);
+ hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, WT_TS_NONE, WT_TS_NONE,
+ (uint64_t)hs_upd_type, &hs_value);
+ WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
+ WT_STAT_CONN_INCR(session, cache_hs_insert);
+ WT_STAT_DATA_INCR(session, cache_hs_insert);
- /* Reinsert entry with zero timestamp. */
- while (
- (ret = __hs_insert_record_with_btree_int(session, insert_cursor, btree_id, &hs_key,
- (uint8_t)hs_upd_type, &hs_value, &tw, hs_insert_counter)) == WT_RESTART)
- ;
hs_insert_counter++;
- WT_ERR(ret);
}
+
/*
- * Since we're using internal functions to modify the row structure, we need to manually set
- * the comparison to an exact match.
- */
- hs_cbt->compare = 0;
- /*
- * Append a globally visible tombstone to the update list. This will effectively make the
- * value invisible and the key itself will eventually get removed during reconciliation.
+ * Remove the key using history store cursor interface.
*
* If anything fails after this point and we're reinserting we need to panic as it will
* leave our history store in an unexpected state with duplicate entries.
*/
- upd->txnid = WT_TXN_NONE;
- upd->start_ts = upd->durable_ts = WT_TS_NONE;
- if ((ret = __wt_hs_modify(hs_cbt, upd)) != 0) {
+ if ((ret = hs_cursor->remove(hs_cursor)) != 0) {
if (reinsert)
WT_ERR_PANIC(session, WT_PANIC,
"Failed to insert tombstone, history store now "
@@ -1147,14 +922,13 @@ __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_
else
WT_ERR(ret);
}
- upd = NULL;
- WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate);
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate);
}
if (ret == WT_NOTFOUND)
ret = 0;
err:
- __wt_free(session, upd);
- if (insert_cursor != NULL)
- insert_cursor->close(insert_cursor);
+ if (hs_insert_cursor != NULL)
+ hs_insert_cursor->close(hs_insert_cursor);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/history/hs_verify.c b/src/third_party/wiredtiger/src/history/hs_verify.c
index 2fc49daa643..3b73a587922 100644
--- a/src/third_party/wiredtiger/src/history/hs_verify.c
+++ b/src/third_party/wiredtiger/src/history/hs_verify.c
@@ -15,10 +15,9 @@
* store.
*/
static int
-__hs_verify_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id)
+__hs_verify_id(
+ WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id)
{
- WT_CURSOR *hs_cursor;
- WT_CURSOR_BTREE *hs_cbt;
WT_DECL_ITEM(prev_key);
WT_DECL_RET;
WT_ITEM key;
@@ -27,12 +26,14 @@ __hs_verify_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_
uint32_t btree_id;
int cmp;
- hs_cursor = session->hs_cursor;
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
WT_CLEAR(key);
WT_ERR(__wt_scr_alloc(session, 0, &prev_key));
+#ifndef HAVE_DIAGNOSTIC
+ WT_UNUSED(this_btree_id);
+#endif
+
/*
* If using standard cursors, we need to skip the non-globally visible tombstones in the data
* table to verify the corresponding entries in the history store are too present in the data
@@ -46,27 +47,18 @@ __hs_verify_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_
* verify. When we return after moving to a new key the caller is responsible for keeping the
* cursor there or deciding they're done.
*/
- for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
- WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter));
-
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/*
* If the btree id does not match the preview one, we're done. It is up to the caller to set
* up for the next tree and call us, if they choose. For a full history store walk, the
* caller sends in WT_BTREE_ID_INVALID and this function will set and use the first btree id
* it finds and will return once it walks off that tree, leaving the cursor set to the first
* key of that new tree.
+ *
+ * We should never cross the btree id, assert if we do so.
*/
- if (btree_id != this_btree_id)
- break;
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
- WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone);
- continue;
- }
+ WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &key, &hs_start_ts, &hs_counter));
+ WT_ASSERT(session, btree_id == this_btree_id);
/*
* If we have already checked against this key, keep going to the next key. We only need to
@@ -114,22 +106,14 @@ __wt_hs_verify_one(WT_SESSION_IMPL *session)
WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE ds_cbt;
WT_DECL_RET;
- WT_ITEM hs_key;
uint32_t btree_id;
- int exact;
- hs_cursor = session->hs_cursor;
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
btree_id = S2BT(session)->id;
- /*
- * We are required to position the history store cursor. Set it to the first record of our btree
- * in the history store.
- */
- memset(&hs_key, 0, sizeof(hs_key));
- hs_cursor->set_key(hs_cursor, btree_id, &hs_key, 0, 0);
- ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact);
- if (ret == 0 && exact < 0)
- ret = __wt_hs_cursor_next(session, hs_cursor);
+ hs_cursor->set_key(hs_cursor, 1, btree_id);
+ WT_ERR(__wt_curhs_search_near_after(session, hs_cursor));
/*
* If we positioned the cursor there is something to verify.
@@ -141,9 +125,12 @@ __wt_hs_verify_one(WT_SESSION_IMPL *session)
if (ret == 0) {
__wt_btcur_init(session, &ds_cbt);
__wt_btcur_open(&ds_cbt);
- ret = __hs_verify_id(session, &ds_cbt, btree_id);
+ ret = __hs_verify_id(session, hs_cursor, &ds_cbt, btree_id);
WT_TRET(__wt_btcur_close(&ds_cbt, false));
}
+
+err:
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret == WT_NOTFOUND ? 0 : ret);
}
@@ -173,10 +160,10 @@ __wt_hs_verify(WT_SESSION_IMPL *session)
btree_id = WT_BTREE_ID_INVALID;
uri_data = NULL;
+ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_hs_cursor_open(session));
- hs_cursor = session->hs_cursor;
- WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true);
+ WT_ERR_NOTFOUND_OK(hs_cursor->next(hs_cursor), true);
stop = ret == WT_NOTFOUND ? true : false;
ret = 0;
@@ -198,17 +185,16 @@ __wt_hs_verify(WT_SESSION_IMPL *session)
}
WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &ds_cursor));
F_SET(ds_cursor, WT_CURSOR_RAW_OK);
- ret = __hs_verify_id(session, (WT_CURSOR_BTREE *)ds_cursor, btree_id);
+ ret = __hs_verify_id(session, hs_cursor, (WT_CURSOR_BTREE *)ds_cursor, btree_id);
if (ret == WT_NOTFOUND)
stop = true;
WT_TRET(ds_cursor->close(ds_cursor));
WT_ERR_NOTFOUND_OK(ret, false);
}
err:
- WT_TRET(__wt_hs_cursor_close(session));
-
__wt_scr_free(session, &buf);
WT_ASSERT(session, key.mem == NULL && key.memsize == 0);
__wt_free(session, uri_data);
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index a894b79eeef..2a2bd5aca2f 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -36,32 +36,32 @@
WT_DATA_HANDLE *__olddh = (s)->dhandle; \
const char *__oldname; \
/* If this isn't an API reentry, the name should be NULL and the counter should be 0. */ \
- WT_ASSERT(session, (s)->name != NULL || s->api_call_counter == 0); \
+ WT_ASSERT(session, (s)->name != NULL || (s)->api_call_counter == 0); \
__oldname = (s)->name; \
- ++s->api_call_counter; \
+ ++(s)->api_call_counter; \
(s)->dhandle = (dh); \
(s)->name = (s)->lastop = #h "." #n
#define API_SESSION_POP(s) \
(s)->dhandle = __olddh; \
(s)->name = __oldname; \
- --s->api_call_counter
+ --(s)->api_call_counter
/* Standard entry points to the API: declares/initializes local variables. */
-#define API_SESSION_INIT(s, h, n, dh) \
- WT_TRACK_OP_DECL; \
- API_SESSION_PUSH(s, h, n, dh); \
- /* \
- * No code before this line, otherwise error handling won't be \
- * correct. \
- */ \
- WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
- WT_SINGLE_THREAD_CHECK_START(s); \
- WT_TRACK_OP_INIT(s); \
- if (s->api_call_counter == 1 && !F_ISSET(s, WT_SESSION_INTERNAL)) \
- __wt_op_timer_start(s); \
- /* Reset wait time if this isn't an API reentry. */ \
- if (s->api_call_counter == 1) \
- (s)->cache_wait_us = 0; \
+#define API_SESSION_INIT(s, h, n, dh) \
+ WT_TRACK_OP_DECL; \
+ API_SESSION_PUSH(s, h, n, dh); \
+ /* \
+ * No code before this line, otherwise error handling won't be \
+ * correct. \
+ */ \
+ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
+ WT_SINGLE_THREAD_CHECK_START(s); \
+ WT_TRACK_OP_INIT(s); \
+ if ((s)->api_call_counter == 1 && !F_ISSET(s, WT_SESSION_INTERNAL)) \
+ __wt_op_timer_start(s); \
+ /* Reset wait time if this isn't an API reentry. */ \
+ if ((s)->api_call_counter == 1) \
+ (s)->cache_wait_us = 0; \
__wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
#define API_CALL_NOCONF(s, h, n, dh) \
@@ -75,21 +75,26 @@
if ((config) != NULL) \
WT_ERR(__wt_config_check((s), WT_CONFIG_REF(session, h##_##n), (config), 0))
-#define API_END(s, ret) \
- if ((s) != NULL) { \
- WT_TRACK_OP_END(s); \
- WT_SINGLE_THREAD_CHECK_STOP(s); \
- if ((ret) != 0) \
- __wt_txn_err_set(s, ret); \
- if (s->api_call_counter == 1 && !F_ISSET(session, WT_SESSION_INTERNAL)) \
- __wt_op_timer_stop(s); \
- /* \
- * No code after this line, otherwise error handling \
- * won't be correct. \
- */ \
- API_SESSION_POP(s); \
- } \
- } \
+#define API_END(s, ret) \
+ if ((s) != NULL) { \
+ WT_TRACK_OP_END(s); \
+ WT_SINGLE_THREAD_CHECK_STOP(s); \
+ if ((ret) != 0) \
+ __wt_txn_err_set(s, ret); \
+ if ((s)->api_call_counter == 1 && !F_ISSET(session, WT_SESSION_INTERNAL)) \
+ __wt_op_timer_stop(s); \
+ /* \
+ * We should not leave any history store cursor open when return from an api call. \
+ * However, we cannot do a stricter check before WT-7247 is resolved. \
+ */ \
+ WT_ASSERT(s, (s)->api_call_counter > 1 || (s)->hs_cursor_counter <= 2); \
+ /* \
+ * No code after this line, otherwise error handling \
+ * won't be correct. \
+ */ \
+ API_SESSION_POP(s); \
+ } \
+ } \
while (0)
/* An API call wrapped in a transaction if necessary. */
@@ -188,13 +193,15 @@
SESSION_API_PREPARE_CHECK(s, WT_SESSION, n); \
API_CALL_NOCONF(s, WT_SESSION, n, NULL)
-#define SESSION_API_PREPARE_CHECK(s, h, n) \
- do { \
- int __prepare_ret; \
- API_SESSION_PUSH(s, WT_SESSION, n, NULL); \
- __prepare_ret = __wt_txn_context_prepare_check(s); \
- API_SESSION_POP(s); \
- WT_RET(__prepare_ret); \
+#define SESSION_API_PREPARE_CHECK(s, h, n) \
+ do { \
+ if ((s)->api_call_counter == 0) { \
+ int __prepare_ret; \
+ API_SESSION_PUSH(s, WT_SESSION, n, NULL); \
+ __prepare_ret = __wt_txn_context_prepare_check(s); \
+ API_SESSION_POP(s); \
+ WT_RET(__prepare_ret); \
+ } \
} while (0)
#define SESSION_API_CALL(s, n, config, cfg) \
@@ -209,8 +216,7 @@
#define CURSOR_API_CALL(cur, s, n, bt) \
(s) = (WT_SESSION_IMPL *)(cur)->session; \
- if ((s)->hs_cursor == NULL) \
- SESSION_API_PREPARE_CHECK(s, WT_CURSOR, n); \
+ SESSION_API_PREPARE_CHECK(s, WT_CURSOR, n); \
API_CALL_NOCONF(s, WT_CURSOR, n, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
if (F_ISSET(cur, WT_CURSTD_CACHED)) \
WT_ERR(__wt_cursor_cached(cur))
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index d489f7aa638..82023a1573e 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -288,7 +288,7 @@ struct __wt_cursor_hs {
WT_CURSOR *file_cursor; /* Queries of regular history store data */
WT_TIME_WINDOW time_window;
uint32_t btree_id;
- WT_ITEM datastore_key;
+ WT_ITEM *datastore_key;
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_HS_CUR_BTREE_ID_SET 0x1u
diff --git a/src/third_party/wiredtiger/src/include/cursor_inline.h b/src/third_party/wiredtiger/src/include/cursor_inline.h
index ef359942853..4c5889b6b9e 100644
--- a/src/third_party/wiredtiger/src/include/cursor_inline.h
+++ b/src/third_party/wiredtiger/src/include/cursor_inline.h
@@ -7,6 +7,32 @@
*/
/*
+ * __wt_curhs_get_btree --
+ * Convert a history store cursor to the underlying btree.
+ */
+static inline WT_BTREE *
+__wt_curhs_get_btree(WT_CURSOR *cursor)
+{
+ WT_CURSOR_HS *hs_cursor;
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+
+ return (CUR2BT(hs_cursor->file_cursor));
+}
+
+/*
+ * __wt_curhs_get_cbt --
+ * Convert a history store cursor to the underlying btree cursor.
+ */
+static inline WT_CURSOR_BTREE *
+__wt_curhs_get_cbt(WT_CURSOR *cursor)
+{
+ WT_CURSOR_HS *hs_cursor;
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+
+ return ((WT_CURSOR_BTREE *)hs_cursor->file_cursor);
+}
+
+/*
* __cursor_set_recno --
* The cursor value in the interface has to track the value in the underlying cursor, update
* them in parallel.
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index dfefe57ba26..7fac6f5cbd4 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -495,8 +495,14 @@ extern int __wt_curfile_next_random(WT_CURSOR *cursor)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curhs_cache(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curhs_search_near_after(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curhs_search_near_before(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx,
@@ -596,7 +602,7 @@ extern int __wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr,
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE(
(visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
+extern int __wt_debug_cursor_tree_hs(void *session_arg, const char *ofile)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
@@ -750,26 +756,11 @@ extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_cache(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_close(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_open(WT_SESSION_IMPL *session)
+extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_delete_key_from_ts(
- WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
- uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *base_value_buf)
+extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
+ const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, WT_ITEM *base_value_buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1716,6 +1707,7 @@ extern void __wt_gen_next(WT_SESSION_IMPL *session, int which, uint64_t *genp);
extern void __wt_gen_next_drain(WT_SESSION_IMPL *session, int which);
extern void __wt_hazard_close(WT_SESSION_IMPL *session);
extern void __wt_hs_close(WT_SESSION_IMPL *session);
+extern void __wt_hs_upd_time_window(WT_CURSOR *hs_cursor, WT_TIME_WINDOW **twp);
extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg);
extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
@@ -1820,8 +1812,12 @@ extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
+static inline WT_BTREE *__wt_curhs_get_btree(WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_CELL *__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_CURSOR_BTREE *__wt_curhs_get_cbt(WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_IKEY *__wt_ref_key_instantiated(WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index f07a9bba91f..9d783cede10 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -92,7 +92,7 @@ struct __wt_session_impl {
WT_COMPACT_STATE *compact; /* Compaction information */
enum { WT_COMPACT_NONE = 0, WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
- WT_CURSOR *hs_cursor; /* History store table cursor */
+ u_int hs_cursor_counter; /* Number of open history store cursors */
WT_CURSOR *meta_cursor; /* Metadata file */
void *meta_track; /* Metadata operation tracking */
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index f7f6cb8232e..9131489d0fc 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -451,10 +451,8 @@ struct __wt_connection_stats {
int64_t cursor_modify_bytes;
int64_t cursor_modify_bytes_touch;
int64_t cursor_next;
- int64_t cursor_next_hs_tombstone_rts;
int64_t cursor_restart;
int64_t cursor_prev;
- int64_t cursor_prev_hs_tombstone_rts;
int64_t cursor_remove;
int64_t cursor_remove_bytes;
int64_t cursor_reserve;
diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h
index b2365b1b2ac..6c89b2024bf 100644
--- a/src/third_party/wiredtiger/src/include/txn_inline.h
+++ b/src/third_party/wiredtiger/src/include/txn_inline.h
@@ -1044,8 +1044,8 @@ retry:
/* If there's no visible update in the update chain or ondisk, check the history store file. */
if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(session->dhandle, WT_DHANDLE_HS)) {
__wt_timing_stress(session, WT_TIMING_STRESS_HS_SEARCH);
- WT_RET(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno, cbt->upd_value, false,
- &cbt->upd_value->buf));
+ WT_RET(__wt_hs_find_upd(session, S2BT(session)->id, key, cbt->iface.value_format, recno,
+ cbt->upd_value, &cbt->upd_value->buf));
}
/*
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index d0584f49dc1..7878645d75e 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -5338,842 +5338,832 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1136
/*! cursor: cursor next calls */
#define WT_STAT_CONN_CURSOR_NEXT 1137
-/*!
- * cursor: cursor next calls that skip due to a globally visible history
- * store tombstone in rollback to stable
- */
-#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE_RTS 1138
/*! cursor: cursor operation restarted */
-#define WT_STAT_CONN_CURSOR_RESTART 1139
+#define WT_STAT_CONN_CURSOR_RESTART 1138
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1140
-/*!
- * cursor: cursor prev calls that skip due to a globally visible history
- * store tombstone in rollback to stable
- */
-#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE_RTS 1141
+#define WT_STAT_CONN_CURSOR_PREV 1139
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1142
+#define WT_STAT_CONN_CURSOR_REMOVE 1140
/*! cursor: cursor remove key bytes removed */
-#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1143
+#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1141
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1144
+#define WT_STAT_CONN_CURSOR_RESERVE 1142
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1145
+#define WT_STAT_CONN_CURSOR_RESET 1143
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1146
+#define WT_STAT_CONN_CURSOR_SEARCH 1144
/*! cursor: cursor search history store calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_HS 1147
+#define WT_STAT_CONN_CURSOR_SEARCH_HS 1145
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1148
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1146
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1149
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1147
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1150
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1148
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1151
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1149
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1152
+#define WT_STAT_CONN_CURSOR_SWEEP 1150
/*! cursor: cursor truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1153
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1151
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1154
+#define WT_STAT_CONN_CURSOR_UPDATE 1152
/*! cursor: cursor update key and value bytes */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1155
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1153
/*! cursor: cursor update value size change */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1156
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1154
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1157
+#define WT_STAT_CONN_CURSOR_REOPEN 1155
/*! data-handle: connection data handle size */
-#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1158
+#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1156
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1159
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1157
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1160
+#define WT_STAT_CONN_DH_SWEEP_REF 1158
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1161
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1159
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1162
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1160
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1163
+#define WT_STAT_CONN_DH_SWEEP_TOD 1161
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1164
+#define WT_STAT_CONN_DH_SWEEPS 1162
/*!
* data-handle: connection sweeps skipped due to checkpoint gathering
* handles
*/
-#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1165
+#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1163
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1166
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1164
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1167
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1165
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1168
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1166
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1169
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1167
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1170
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1168
/*! lock: dhandle lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1171
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1169
/*! lock: dhandle lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1172
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1170
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1173
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1171
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1174
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1172
/*!
* lock: durable timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1175
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1173
/*!
* lock: durable timestamp queue lock internal thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1176
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1174
/*! lock: durable timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1177
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1175
/*! lock: durable timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1178
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1176
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1179
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1177
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1180
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1178
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1181
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1179
/*!
* lock: read timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1182
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1180
/*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1183
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1181
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1184
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1182
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1185
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1183
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1186
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1184
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1187
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1185
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1188
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1186
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1189
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1187
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1190
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1188
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1191
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1189
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1192
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1190
/*! lock: txn global lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1193
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1191
/*! lock: txn global lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1194
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1192
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1195
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1193
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1196
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1194
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1197
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1195
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1198
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1196
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1199
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1197
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1200
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1198
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1201
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1199
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1202
+#define WT_STAT_CONN_LOG_FLUSH 1200
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1203
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1201
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1204
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1202
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1205
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1203
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1206
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1204
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1207
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1205
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1208
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1206
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1209
+#define WT_STAT_CONN_LOG_SCANS 1207
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1210
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1208
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1211
+#define WT_STAT_CONN_LOG_WRITE_LSN 1209
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1212
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1210
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1213
+#define WT_STAT_CONN_LOG_SYNC 1211
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1214
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1212
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1215
+#define WT_STAT_CONN_LOG_SYNC_DIR 1213
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1216
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1214
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1217
+#define WT_STAT_CONN_LOG_WRITES 1215
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1218
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1216
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1219
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1217
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1220
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1218
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1221
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1219
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1222
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1220
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1223
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1221
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1224
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1222
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1225
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1223
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1226
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1224
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1227
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1225
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1228
+#define WT_STAT_CONN_LOG_SLOT_RACES 1226
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1229
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1227
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1230
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1228
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1231
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1229
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1232
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1230
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1233
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1231
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1234
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1232
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1235
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1233
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1236
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1234
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1237
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1235
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1238
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1236
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1239
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1237
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1240
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1238
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1241
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1239
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1242
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1240
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1243
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1241
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1244
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1242
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1245
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1243
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1246
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1244
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1247
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1245
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1248
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1246
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1249
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1247
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1250
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1248
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1251
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1249
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1252
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1250
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1253
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1251
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1254
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1252
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1255
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1253
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1256
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1254
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1257
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1255
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1258
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1256
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1259
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1257
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1260
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1258
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1261
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1259
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1262
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1260
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1263
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1261
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1264
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1262
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1265
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1263
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1266
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1264
/*! reconciliation: maximum seconds spent in a reconciliation call */
-#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1267
+#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1265
/*!
* reconciliation: page reconciliation calls that resulted in values with
* prepared transaction metadata
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1268
+#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1266
/*!
* reconciliation: page reconciliation calls that resulted in values with
* timestamps
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TS 1269
+#define WT_STAT_CONN_REC_PAGES_WITH_TS 1267
/*!
* reconciliation: page reconciliation calls that resulted in values with
* transaction ids
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1270
+#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1268
/*! reconciliation: pages written including at least one prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1271
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1269
/*! reconciliation: pages written including at least one start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1272
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1270
/*! reconciliation: records written including a prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1273
+#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1271
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1274
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1272
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1275
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1273
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1276
+#define WT_STAT_CONN_SESSION_OPEN 1274
/*! session: session query timestamp calls */
-#define WT_STAT_CONN_SESSION_QUERY_TS 1277
+#define WT_STAT_CONN_SESSION_QUERY_TS 1275
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1278
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1276
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1279
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1277
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1280
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1278
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1281
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1279
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1282
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1280
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1283
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1281
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1284
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1282
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1285
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1283
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1286
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1284
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1287
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1285
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1288
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1286
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1289
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1287
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1290
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1288
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1291
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1289
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1292
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1290
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1293
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1291
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1294
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1292
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1295
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1293
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1296
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1294
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1297
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1295
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1298
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1296
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1299
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1297
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1300
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1298
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1301
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1299
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1302
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1300
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1303
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1301
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1304
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1302
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1305
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1303
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1306
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1304
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1307
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1305
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1308
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1306
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1309
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1307
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1310
+#define WT_STAT_CONN_PAGE_SLEEP 1308
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1311
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1309
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1312
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1310
/*! transaction: Number of prepared updates */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1313
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1311
/*! transaction: durable timestamp queue entries walked */
-#define WT_STAT_CONN_TXN_DURABLE_QUEUE_WALKED 1314
+#define WT_STAT_CONN_TXN_DURABLE_QUEUE_WALKED 1312
/*! transaction: durable timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_DURABLE_QUEUE_EMPTY 1315
+#define WT_STAT_CONN_TXN_DURABLE_QUEUE_EMPTY 1313
/*! transaction: durable timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_DURABLE_QUEUE_HEAD 1316
+#define WT_STAT_CONN_TXN_DURABLE_QUEUE_HEAD 1314
/*! transaction: durable timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_DURABLE_QUEUE_INSERTS 1317
+#define WT_STAT_CONN_TXN_DURABLE_QUEUE_INSERTS 1315
/*! transaction: durable timestamp queue length */
-#define WT_STAT_CONN_TXN_DURABLE_QUEUE_LEN 1318
+#define WT_STAT_CONN_TXN_DURABLE_QUEUE_LEN 1316
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1319
+#define WT_STAT_CONN_TXN_PREPARE 1317
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1320
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1318
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1321
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1319
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1322
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1320
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1323
+#define WT_STAT_CONN_TXN_QUERY_TS 1321
/*! transaction: read timestamp queue entries walked */
-#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1324
+#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1322
/*! transaction: read timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1325
+#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1323
/*! transaction: read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1326
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1324
/*! transaction: read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1327
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1325
/*! transaction: read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1328
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1326
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_RTS 1329
+#define WT_STAT_CONN_TXN_RTS 1327
/*! transaction: rollback to stable pages visited */
-#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1330
+#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1328
/*! transaction: rollback to stable tree walk skipping pages */
-#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1331
+#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1329
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1332
+#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1330
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1333
+#define WT_STAT_CONN_TXN_SET_TS 1331
/*! transaction: set timestamp durable calls */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1334
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1332
/*! transaction: set timestamp durable updates */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1335
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1333
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1336
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1334
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1337
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1335
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1338
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1336
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1339
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1337
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1340
+#define WT_STAT_CONN_TXN_BEGIN 1338
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1341
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1339
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1342
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1340
/*!
* transaction: transaction checkpoint history store file duration
* (usecs)
*/
-#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1343
+#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1341
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1344
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1342
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1345
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1343
/*!
* transaction: transaction checkpoint most recent duration for gathering
* all handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1346
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1344
/*!
* transaction: transaction checkpoint most recent duration for gathering
* applied handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1347
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1345
/*!
* transaction: transaction checkpoint most recent duration for gathering
* skipped handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1348
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1346
/*! transaction: transaction checkpoint most recent handles applied */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1349
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1347
/*! transaction: transaction checkpoint most recent handles skipped */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1350
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1348
/*! transaction: transaction checkpoint most recent handles walked */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1351
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1349
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1352
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1350
/*! transaction: transaction checkpoint prepare currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1353
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1351
/*! transaction: transaction checkpoint prepare max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1354
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1352
/*! transaction: transaction checkpoint prepare min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1355
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1353
/*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1356
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1354
/*! transaction: transaction checkpoint prepare total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1357
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1355
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1358
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1356
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1359
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1357
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1360
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1358
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1361
+#define WT_STAT_CONN_TXN_CHECKPOINT 1359
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1362
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1360
/*! transaction: transaction failures due to history store */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1363
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1361
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1364
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1362
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1365
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1363
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1366
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1364
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1367
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1365
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1368
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1366
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1369
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1367
/*!
* transaction: transaction range of timestamps pinned by the oldest
* active read timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1370
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1368
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1371
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1369
/*! transaction: transaction read timestamp of the oldest active reader */
-#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1372
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1370
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1373
+#define WT_STAT_CONN_TXN_SYNC 1371
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1374
+#define WT_STAT_CONN_TXN_COMMIT 1372
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1375
+#define WT_STAT_CONN_TXN_ROLLBACK 1373
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1376
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1374
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1377
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1375
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1378
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1376
/*! cache: bytes dirty in the cache cumulative */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1379
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1377
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1380
+#define WT_STAT_CONN_CACHE_BYTES_READ 1378
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1381
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1379
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1382
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1380
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1383
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1381
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1384
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1382
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1385
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1383
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1386
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1384
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1387
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1385
/*!
* cache: eviction walk target pages reduced due to history store cache
* pressure
*/
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1388
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1386
/*! cache: eviction walks abandoned */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1389
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1387
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1390
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1388
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1391
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1389
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1392
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1390
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1393
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1391
/*! cache: eviction walks restarted */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1394
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1392
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1395
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1393
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1396
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1394
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1397
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1395
/*! cache: history store table insert calls */
-#define WT_STAT_CONN_CACHE_HS_INSERT 1398
+#define WT_STAT_CONN_CACHE_HS_INSERT 1396
/*! cache: history store table insert calls that returned restart */
-#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1399
+#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1397
/*!
* cache: history store table out-of-order resolved updates that lose
* their durable timestamp
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1400
+#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1398
/*!
* cache: history store table out-of-order updates that were fixed up by
* moving existing records
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1401
+#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1399
/*!
* cache: history store table out-of-order updates that were fixed up
* during insertion
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1402
+#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1400
/*! cache: history store table reads */
-#define WT_STAT_CONN_CACHE_HS_READ 1403
+#define WT_STAT_CONN_CACHE_HS_READ 1401
/*! cache: history store table reads missed */
-#define WT_STAT_CONN_CACHE_HS_READ_MISS 1404
+#define WT_STAT_CONN_CACHE_HS_READ_MISS 1402
/*! cache: history store table reads requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1405
+#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1403
/*!
* cache: history store table truncation by rollback to stable to remove
* an unstable update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1406
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1404
/*!
* cache: history store table truncation by rollback to stable to remove
* an update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1407
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1405
/*! cache: history store table truncation to remove an update */
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1408
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1406
/*!
* cache: history store table truncation to remove range of updates due
* to key being removed from the data page during reconciliation
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1409
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1407
/*!
* cache: history store table truncation to remove range of updates due
* to non timestamped update on data page
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1410
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1408
/*! cache: history store table writes requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1411
+#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1409
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1412
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1410
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1413
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1411
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1414
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1412
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1415
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1413
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1416
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1414
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1417
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1415
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1418
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1416
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1419
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1417
/*! cache: page written requiring history store records */
-#define WT_STAT_CONN_CACHE_WRITE_HS 1420
+#define WT_STAT_CONN_CACHE_WRITE_HS 1418
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1421
+#define WT_STAT_CONN_CACHE_READ 1419
/*! cache: pages read into cache after truncate */
-#define WT_STAT_CONN_CACHE_READ_DELETED 1422
+#define WT_STAT_CONN_CACHE_READ_DELETED 1420
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1423
+#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1421
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1424
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1422
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1425
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1423
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1426
+#define WT_STAT_CONN_CACHE_WRITE 1424
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1427
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1425
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1428
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1426
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1429
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1427
/*! checkpoint-cleanup: pages added for eviction */
-#define WT_STAT_CONN_CC_PAGES_EVICT 1430
+#define WT_STAT_CONN_CC_PAGES_EVICT 1428
/*! checkpoint-cleanup: pages removed */
-#define WT_STAT_CONN_CC_PAGES_REMOVED 1431
+#define WT_STAT_CONN_CC_PAGES_REMOVED 1429
/*! checkpoint-cleanup: pages skipped during tree walk */
-#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1432
+#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1430
/*! checkpoint-cleanup: pages visited */
-#define WT_STAT_CONN_CC_PAGES_VISITED 1433
+#define WT_STAT_CONN_CC_PAGES_VISITED 1431
/*! cursor: Total number of entries skipped by cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1434
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1432
/*! cursor: Total number of entries skipped by cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1435
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1433
/*!
* cursor: Total number of entries skipped to position the history store
* cursor
*/
-#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1436
+#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1434
/*!
* cursor: cursor next calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1437
+#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1435
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1438
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1436
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1439
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1437
/*!
* cursor: cursor prev calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1440
+#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1438
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1441
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1439
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1442
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1440
/*! cursor: open cursor count */
-#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1443
+#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1441
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1444
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1442
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1445
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1443
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1446
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1444
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1447
+#define WT_STAT_CONN_REC_PAGES 1445
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1448
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1446
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1449
+#define WT_STAT_CONN_REC_PAGE_DELETE 1447
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1450
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1448
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1451
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1449
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1452
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1450
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1453
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1451
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1454
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1452
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1455
+#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1453
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1456
+#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1454
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1457
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1455
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1458
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1456
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1459
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1457
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1460
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1458
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1461
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1459
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1462
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1460
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1463
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1461
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1464
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1462
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1465
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1463
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1466
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1464
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1467
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1465
/*! session: flush_tier operation calls */
-#define WT_STAT_CONN_FLUSH_TIER 1468
+#define WT_STAT_CONN_FLUSH_TIER 1466
/*! session: tiered storage local retention time (secs) */
-#define WT_STAT_CONN_TIERED_RETENTION 1469
+#define WT_STAT_CONN_TIERED_RETENTION 1467
/*! transaction: race to read prepared update retry */
-#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1470
+#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1468
/*!
- * transaction: rollback to stable hs records with stop timestamps older
- * than newer records
+ * transaction: rollback to stable history store records with stop
+ * timestamps older than newer records
*/
-#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1471
+#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1469
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1472
+#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1470
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1473
+#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1471
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1474
+#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1472
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1475
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1473
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1476
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1474
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1477
+#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1475
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1478
+#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1476
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1479
+#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1477
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1480
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1478
/*!
* @}
@@ -6771,8 +6761,8 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! transaction: race to read prepared update retry */
#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2200
/*!
- * transaction: rollback to stable hs records with stop timestamps older
- * than newer records
+ * transaction: rollback to stable history store records with stop
+ * timestamps older than newer records
*/
#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2201
/*! transaction: rollback to stable inconsistent checkpoint */
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 26f360ca4c0..db4bb56c976 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -703,6 +703,7 @@ __wt_rec_row_leaf(
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK_KV *kpack, _kpack, *vpack, _vpack;
+ WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(tmpkey);
WT_DECL_RET;
@@ -720,6 +721,7 @@ __wt_rec_row_leaf(
void *copy;
btree = S2BT(session);
+ hs_cursor = NULL;
page = pageref->page;
slvg_skip = salvage == NULL ? 0 : salvage->skip;
WT_TIME_WINDOW_INIT(&tw);
@@ -914,11 +916,19 @@ __wt_rec_row_leaf(
* ever need to blow away history store content, so we can skip this.
*/
if (!F_ISSET(session, WT_SESSION_NO_DATA_HANDLES)) {
- WT_ERR(__wt_hs_cursor_open(session));
+ /*
+ * FIXME-WT-7053: we will hit the dhandle deadlock if we open multiple
+ * history store cursors in reconciliation. Once it is fixed, we can move
+ * the open and close of the history store cursor inside the delete key
+ * function.
+ */
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
WT_ERR(__wt_hs_delete_key_from_ts(
- session, btree->id, tmpkey, WT_TS_NONE, false));
- WT_ERR(__wt_hs_cursor_close(session));
- WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
+ session, hs_cursor, btree->id, tmpkey, WT_TS_NONE, false));
+ WT_ERR(hs_cursor->close(hs_cursor));
+ hs_cursor = NULL;
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
}
}
@@ -1034,6 +1044,8 @@ leaf_insert:
ret = __wt_rec_split_finish(session, r);
err:
+ if (hs_cursor != NULL)
+ WT_TRET(hs_cursor->close(hs_cursor));
__wt_scr_free(session, &tmpkey);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 6eec392127e..d0511459385 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -2289,8 +2289,6 @@ __rec_hs_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
if (i == r->multi_next)
return (0);
- WT_RET(__wt_hs_cursor_open(session));
-
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
if (multi->supd != NULL) {
WT_ERR(__wt_hs_insert_updates(session, r->page, multi));
@@ -2302,7 +2300,6 @@ __rec_hs_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
}
err:
- WT_TRET(__wt_hs_cursor_close(session));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index cec727d8ecd..aec5e3c0f75 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -542,6 +542,9 @@ __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, co
{
WT_DECL_RET;
+ /* We should not open other cursors when there are open history store cursors in the session. */
+ WT_ASSERT(session, strcmp(uri, WT_HS_URI) == 0 || session->hs_cursor_counter == 0);
+
/* We do not cache any subordinate tables/files cursors. */
if (owner == NULL) {
if ((ret = __wt_cursor_cache_get(session, uri, NULL, cfg, cursorp)) == 0)
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 47605ab42f8..6723f064f17 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -208,7 +208,8 @@ static const char *const __stats_dsrc_desc[] = {
"session: flush_tier operation calls",
"session: tiered storage local retention time (secs)",
"transaction: race to read prepared update retry",
- "transaction: rollback to stable hs records with stop timestamps older than newer records",
+ "transaction: rollback to stable history store records with stop timestamps older than newer "
+ "records",
"transaction: rollback to stable inconsistent checkpoint",
"transaction: rollback to stable keys removed",
"transaction: rollback to stable keys restored",
@@ -1098,12 +1099,8 @@ static const char *const __stats_connection_desc[] = {
"cursor: cursor modify key and value bytes affected",
"cursor: cursor modify value bytes modified",
"cursor: cursor next calls",
- "cursor: cursor next calls that skip due to a globally visible history store tombstone in "
- "rollback to stable",
"cursor: cursor operation restarted",
"cursor: cursor prev calls",
- "cursor: cursor prev calls that skip due to a globally visible history store tombstone in "
- "rollback to stable",
"cursor: cursor remove calls",
"cursor: cursor remove key bytes removed",
"cursor: cursor reserve calls",
@@ -1437,7 +1434,8 @@ static const char *const __stats_connection_desc[] = {
"session: flush_tier operation calls",
"session: tiered storage local retention time (secs)",
"transaction: race to read prepared update retry",
- "transaction: rollback to stable hs records with stop timestamps older than newer records",
+ "transaction: rollback to stable history store records with stop timestamps older than newer "
+ "records",
"transaction: rollback to stable inconsistent checkpoint",
"transaction: rollback to stable keys removed",
"transaction: rollback to stable keys restored",
@@ -1625,10 +1623,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cursor_modify_bytes = 0;
stats->cursor_modify_bytes_touch = 0;
stats->cursor_next = 0;
- stats->cursor_next_hs_tombstone_rts = 0;
stats->cursor_restart = 0;
stats->cursor_prev = 0;
- stats->cursor_prev_hs_tombstone_rts = 0;
stats->cursor_remove = 0;
stats->cursor_remove_bytes = 0;
stats->cursor_reserve = 0;
@@ -2139,10 +2135,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cursor_modify_bytes += WT_STAT_READ(from, cursor_modify_bytes);
to->cursor_modify_bytes_touch += WT_STAT_READ(from, cursor_modify_bytes_touch);
to->cursor_next += WT_STAT_READ(from, cursor_next);
- to->cursor_next_hs_tombstone_rts += WT_STAT_READ(from, cursor_next_hs_tombstone_rts);
to->cursor_restart += WT_STAT_READ(from, cursor_restart);
to->cursor_prev += WT_STAT_READ(from, cursor_prev);
- to->cursor_prev_hs_tombstone_rts += WT_STAT_READ(from, cursor_prev_hs_tombstone_rts);
to->cursor_remove += WT_STAT_READ(from, cursor_remove);
to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 0e087eb6029..bf85cf61443 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -721,76 +721,27 @@ __wt_txn_release(WT_SESSION_IMPL *session)
* Append the update older than the prepared update to the update chain
*/
static int
-__txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *key, WT_PAGE *page,
+__txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_PAGE *page,
WT_UPDATE *chain, bool commit, WT_UPDATE **fix_updp, bool *upd_appended)
{
- WT_CURSOR_BTREE *hs_cbt;
- WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(hs_value);
WT_DECL_RET;
+ WT_TIME_WINDOW *hs_tw;
WT_UPDATE *tombstone, *upd;
- wt_timestamp_t durable_ts, hs_start_ts, hs_stop_durable_ts;
+ wt_timestamp_t durable_ts, hs_stop_durable_ts;
size_t size, total_size;
- uint64_t hs_counter, type_full;
- uint32_t hs_btree_id;
- int cmp;
+ uint64_t type_full;
char ts_string[2][WT_TS_INT_STRING_SIZE];
WT_ASSERT(session, chain != NULL);
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
*fix_updp = NULL;
*upd_appended = false;
size = total_size = 0;
tombstone = upd = NULL;
- /* Allocate buffers for the data store and history store key. */
- WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
- for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
-
- /* Stop before crossing over to the next btree */
- if (hs_btree_id != S2BT(session)->id) {
- ret = WT_NOTFOUND;
- goto done;
- }
-
- /*
- * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
- * have crossed over the desired key and not found the record we are looking for.
- */
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
- if (cmp != 0) {
- ret = WT_NOTFOUND;
- goto done;
- }
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (!__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw))
- break;
- else
- WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
- }
-
- /* We walked off the top of the history store. */
- if (ret == WT_NOTFOUND)
- goto done;
- WT_ERR(ret);
-
- /*
- * As part of the history store search, we never get an exact match based on our search criteria
- * as we always search for a maximum record for that key. Make sure that we set the comparison
- * result as an exact match to remove this key as part of rollback to stable. In case if we
- * don't mark the comparison result as same, later the __wt_row_modify function will not
- * properly remove the update from history store.
- */
- hs_cbt->compare = 0;
-
/* Get current value. */
WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts, &durable_ts, &type_full, hs_value));
@@ -799,15 +750,16 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
/*
* If the history update already has a stop time point and we are committing the prepared update
- * there is no work to do.
+ * there is no work to do. This happens if a deleted key is reinserted by a prepared update.
*/
if (hs_stop_durable_ts != WT_TS_MAX && commit)
goto done;
+ __wt_hs_upd_time_window(hs_cursor, &hs_tw);
WT_ERR(__wt_upd_alloc(session, hs_value, WT_UPDATE_STANDARD, &upd, &size));
- upd->txnid = hs_cbt->upd_value->tw.start_txn;
- upd->durable_ts = hs_cbt->upd_value->tw.durable_start_ts;
- upd->start_ts = hs_cbt->upd_value->tw.start_ts;
+ upd->txnid = hs_tw->start_txn;
+ upd->durable_ts = hs_tw->durable_start_ts;
+ upd->start_ts = hs_tw->start_ts;
*fix_updp = upd;
/*
@@ -831,11 +783,11 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
/* If the history store record has a valid stop time point, append it. */
if (hs_stop_durable_ts != WT_TS_MAX) {
- WT_ASSERT(session, hs_cbt->upd_value->tw.stop_ts != WT_TS_MAX);
+ WT_ASSERT(session, hs_tw->stop_ts != WT_TS_MAX);
WT_ERR(__wt_upd_alloc(session, NULL, WT_UPDATE_TOMBSTONE, &tombstone, &size));
- tombstone->durable_ts = hs_cbt->upd_value->tw.durable_stop_ts;
- tombstone->start_ts = hs_cbt->upd_value->tw.stop_ts;
- tombstone->txnid = hs_cbt->upd_value->tw.stop_txn;
+ tombstone->durable_ts = hs_tw->durable_stop_ts;
+ tombstone->start_ts = hs_tw->stop_ts;
+ tombstone->txnid = hs_tw->stop_txn;
tombstone->next = upd;
/*
* Set the flag to indicate that this update has been restored from history store for the
@@ -873,7 +825,6 @@ err:
__wt_free_update_list(session, &upd);
}
done:
- __wt_scr_free(session, &hs_key);
__wt_scr_free(session, &hs_value);
return (ret);
}
@@ -958,15 +909,18 @@ static int
__txn_fixup_prepared_update(
WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_UPDATE *fix_upd, bool commit)
{
- WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_ITEM hs_value;
+ WT_TIME_WINDOW tw;
WT_TXN *txn;
- WT_UPDATE *hs_upd;
uint32_t txn_flags;
+#ifdef HAVE_DIAGNOSTIC
+ uint64_t hs_upd_type;
+ wt_timestamp_t hs_durable_ts, hs_stop_durable_ts;
+#endif
- hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
txn = session->txn;
+ WT_TIME_WINDOW_INIT(&tw);
/*
* Transaction error and prepare are cleared temporarily as cursor functions are not allowed
@@ -982,33 +936,34 @@ __txn_fixup_prepared_update(
* If the history update already has a stop time point and we are committing the prepared update
* there is no work to do.
*/
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
if (commit) {
- hs_upd->start_ts = txn->commit_timestamp;
- hs_upd->durable_ts = txn->durable_timestamp;
- hs_upd->txnid = txn->id;
+ tw.stop_ts = txn->commit_timestamp;
+ tw.durable_stop_ts = txn->durable_timestamp;
+ tw.stop_txn = txn->id;
+ WT_TIME_WINDOW_SET_START(&tw, fix_upd);
- hs_value.data = fix_upd->data;
- hs_value.size = fix_upd->size;
+#ifdef HAVE_DIAGNOSTIC
+ /* Retrieve the existing update value and stop timestamp. */
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &hs_stop_durable_ts, &hs_durable_ts, &hs_upd_type, &hs_value));
+ WT_ASSERT(session, hs_stop_durable_ts == WT_TS_MAX);
+ WT_ASSERT(session, (uint8_t)hs_upd_type == WT_UPDATE_STANDARD);
+#endif
/*
* We need to update the stop durable timestamp stored in the history store value.
*
* Pack the value using cursor api.
*/
- hs_cursor->set_value(hs_cursor, txn->durable_timestamp, fix_upd->durable_ts,
- (uint64_t)fix_upd->type, &hs_value);
- WT_ERR(__wt_upd_alloc(session, &hs_cursor->value, WT_UPDATE_STANDARD, &hs_upd->next, NULL));
- hs_upd->next->durable_ts = fix_upd->durable_ts;
- hs_upd->next->start_ts = fix_upd->start_ts;
- hs_upd->next->txnid = fix_upd->txnid;
+ hs_value.data = fix_upd->data;
+ hs_value.size = fix_upd->size;
+ hs_cursor->set_value(hs_cursor, &tw, tw.durable_stop_ts, tw.durable_start_ts,
+ (uint64_t)WT_UPDATE_STANDARD, &hs_value);
+ WT_ERR(hs_cursor->update(hs_cursor));
+ } else {
+ WT_ERR(hs_cursor->remove(hs_cursor));
}
- WT_ERR(__wt_hs_modify(hs_cbt, hs_upd));
-
- if (0) {
err:
- __wt_free_update_list(session, &hs_upd);
- }
F_SET(txn, txn_flags);
return (ret);
@@ -1128,22 +1083,15 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
cbt = (WT_CURSOR_BTREE *)(*cursorp);
hs_btree_id = S2BT(session)->id;
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session));
- hs_cursor = session->hs_cursor;
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
/*
* Scan the history store for the given btree and key with maximum start timestamp to let
* the search point to the last version of the key.
*/
- WT_ERR_NOTFOUND_OK(__wt_hs_cursor_position(
- session, hs_cursor, hs_btree_id, &op->u.op_row.key, WT_TS_MAX, NULL),
- true);
-
- if (ret == 0)
- /* Not found if we cross the tree or key boundary. */
- WT_ERR_NOTFOUND_OK(__txn_append_hs_record(session, hs_cursor, &op->u.op_row.key,
- cbt->ref->page, upd, commit, &fix_upd, &upd_appended),
- true);
+ hs_cursor->set_key(hs_cursor, 4, hs_btree_id, &op->u.op_row.key, WT_TS_MAX, UINT64_MAX);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_cursor), true);
if (ret == WT_NOTFOUND && !commit) {
/*
* Allocate a tombstone and prepend it to the row so when we reconcile the update chain
@@ -1156,7 +1104,10 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
__wt_row_modify(cbt, &cbt->iface.key, NULL, tombstone, WT_UPDATE_INVALID, false));
WT_ERR(ret);
tombstone = NULL;
- } else
+ } else if (ret == 0)
+ WT_ERR(__txn_append_hs_record(
+ session, hs_cursor, cbt->ref->page, upd, commit, &fix_upd, &upd_appended));
+ else
ret = 0;
}
@@ -1212,15 +1163,14 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
* Fix the history store contents if they exist, when there are no more updates in the update
* list. Only in eviction, it is possible to write an unfinished history store update when the
* prepared updates are written to the data store. When the page is read back into memory, there
- * will be only one uncommitted prepared update. There can be a false positive of fixing history
- * store when handling prepared inserts, but it doesn't cost much.
+ * will be only one uncommitted prepared update.
*/
if (fix_upd != NULL)
WT_ERR(__txn_fixup_prepared_update(session, hs_cursor, fix_upd, commit));
err:
if (hs_cursor != NULL)
- WT_TRET(__wt_hs_cursor_close(session));
+ WT_TRET(hs_cursor->close(hs_cursor));
if (!upd_appended)
__wt_free(session, fix_upd);
__wt_free(session, tombstone);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 14383dc3017..2ece30a9f0d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -276,18 +276,17 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
{
WT_CELL_UNPACK_KV *unpack, _unpack;
WT_CURSOR *hs_cursor;
- WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(hs_value);
WT_DECL_ITEM(key);
WT_DECL_RET;
WT_ITEM full_value;
- WT_UPDATE *hs_upd, *tombstone, *upd;
+ WT_TIME_WINDOW *hs_tw;
+ WT_UPDATE *tombstone, *upd;
wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts;
uint64_t hs_counter, type_full;
uint32_t hs_btree_id;
uint8_t type;
- int cmp;
char ts_string[4][WT_TS_INT_STRING_SIZE];
bool valid_update_found;
#ifdef HAVE_DIAGNOSTIC
@@ -295,7 +294,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
#endif
hs_cursor = NULL;
- hs_upd = tombstone = upd = NULL;
+ tombstone = upd = NULL;
hs_durable_ts = hs_start_ts = hs_stop_durable_ts = WT_TS_NONE;
hs_btree_id = S2BT(session)->id;
WT_CLEAR(full_value);
@@ -319,9 +318,13 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
newer_hs_durable_ts = unpack->tw.durable_start_ts;
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session));
- hs_cursor = session->hs_cursor;
- cbt = (WT_CURSOR_BTREE *)hs_cursor;
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
+ /*
+ * Rollback-to-stable operates exclusively (i.e., it is the only active operation in the system)
+ * outside the constraints of transactions. Therefore, there is no need for snapshot based
+ * visibility checks.
+ */
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
/*
* Scan the history store for the given btree and key with maximum start timestamp to let the
@@ -330,40 +333,11 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* into data store and removed from history store. If none of the history store records satisfy
* the given timestamp, the key is removed from data store.
*/
- ret = __wt_hs_cursor_position(session, hs_cursor, hs_btree_id, key, WT_TS_MAX, NULL);
- for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
+ hs_cursor->set_key(hs_cursor, 4, hs_btree_id, key, WT_TS_MAX, UINT64_MAX);
+ ret = __wt_curhs_search_near_before(session, hs_cursor);
+ for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
- /* Stop before crossing over to the next btree */
- if (hs_btree_id != S2BT(session)->id)
- break;
-
- /*
- * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
- * have crossed over the desired key and not found the record we are looking for.
- */
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
- if (cmp != 0)
- break;
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
- WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone_rts);
- continue;
- }
-
- /*
- * As part of the history store search, we never get an exact match based on our search
- * criteria as we always search for a maximum record for that key. Make sure that we set the
- * comparison result as an exact match to remove this key as part of rollback to stable. In
- * case if we don't mark the comparison result as same, later the __wt_row_modify function
- * will not properly remove the update from history store.
- */
- cbt->compare = 0;
-
/* Get current value and convert to full update if it is a modify. */
WT_ERR(hs_cursor->get_value(
hs_cursor, &hs_stop_durable_ts, &hs_durable_ts, &type_full, hs_value));
@@ -416,16 +390,17 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* selected update to the update chain. Also it confirms that history store doesn't contains
* any newer version than the current version for the key.
*/
+ /* Retrieve the time window from the history cursor. */
+ __wt_hs_upd_time_window(hs_cursor, &hs_tw);
if (!replace &&
(hs_stop_durable_ts != WT_TS_NONE ||
- !__rollback_check_if_txnid_non_committed(session, cbt->upd_value->tw.stop_txn)) &&
+ !__rollback_check_if_txnid_non_committed(session, hs_tw->stop_txn)) &&
(hs_stop_durable_ts <= rollback_timestamp)) {
__wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
"history store update valid with stop timestamp: %s, stable timestamp: %s, txnid: "
"%" PRIu64 " and type: %" PRIu8,
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(rollback_timestamp, ts_string[1]),
- cbt->upd_value->tw.stop_txn, type);
+ __wt_timestamp_to_string(rollback_timestamp, ts_string[1]), hs_tw->stop_txn, type);
break;
}
@@ -434,7 +409,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* transaction id.
*/
if ((hs_durable_ts != WT_TS_NONE ||
- !__rollback_check_if_txnid_non_committed(session, cbt->upd_value->tw.start_txn)) &&
+ !__rollback_check_if_txnid_non_committed(session, hs_tw->start_txn)) &&
(hs_durable_ts <= rollback_timestamp)) {
__wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
"history store update valid with start timestamp: %s, durable timestamp: %s, stop "
@@ -442,8 +417,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
__wt_timestamp_to_string(hs_start_ts, ts_string[0]),
__wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
- __wt_timestamp_to_string(rollback_timestamp, ts_string[3]),
- cbt->upd_value->tw.start_txn, type);
+ __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, type);
+ WT_ASSERT(session, hs_tw->start_ts < unpack->tw.start_ts);
valid_update_found = true;
break;
}
@@ -455,8 +430,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
__wt_timestamp_to_string(hs_start_ts, ts_string[0]),
__wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
- __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), cbt->upd_value->tw.start_txn,
- cbt->upd_value->tw.stop_txn, type);
+ __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn,
+ hs_tw->stop_txn, type);
/*
* Start time point of the current record may be used as stop time point of the previous
@@ -468,8 +443,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
first_record = false;
#endif
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
- WT_ERR(__wt_hs_modify(cbt, hs_upd));
+ WT_ERR(hs_cursor->remove(hs_cursor));
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_rts_unstable);
}
@@ -480,9 +454,10 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* list. Otherwise remove the key by adding a tombstone.
*/
if (valid_update_found) {
+ /* Retrieve the time window from the history cursor. */
+ __wt_hs_upd_time_window(hs_cursor, &hs_tw);
WT_ASSERT(session,
- cbt->upd_value->tw.start_ts < unpack->tw.start_ts ||
- cbt->upd_value->tw.start_txn < unpack->tw.start_txn);
+ hs_tw->start_ts < unpack->tw.start_ts || hs_tw->start_txn < unpack->tw.start_txn);
WT_ERR(__wt_upd_alloc(session, &full_value, WT_UPDATE_STANDARD, &upd, NULL));
/*
@@ -494,9 +469,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
if (F_ISSET(S2C(session), WT_CONN_RECOVERING))
upd->txnid = WT_TXN_NONE;
else
- upd->txnid = cbt->upd_value->tw.start_txn;
- upd->durable_ts = cbt->upd_value->tw.durable_start_ts;
- upd->start_ts = cbt->upd_value->tw.start_ts;
+ upd->txnid = hs_tw->start_txn;
+ upd->durable_ts = hs_tw->durable_start_ts;
+ upd->start_ts = hs_tw->start_ts;
__wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
"update restored from history store txnid: %" PRIu64
", start_ts: %s and durable_ts: %s",
@@ -527,9 +502,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
if (F_ISSET(S2C(session), WT_CONN_RECOVERING))
tombstone->txnid = WT_TXN_NONE;
else
- tombstone->txnid = cbt->upd_value->tw.stop_txn;
- tombstone->durable_ts = cbt->upd_value->tw.durable_stop_ts;
- tombstone->start_ts = cbt->upd_value->tw.stop_ts;
+ tombstone->txnid = hs_tw->stop_txn;
+ tombstone->durable_ts = hs_tw->durable_stop_ts;
+ tombstone->start_ts = hs_tw->stop_ts;
__wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
"tombstone restored from history store txnid: %" PRIu64
", start_ts: %s, durable_ts: %s",
@@ -557,8 +532,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
/* Finally remove that update from history store. */
if (valid_update_found) {
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
- WT_ERR(__wt_hs_modify(cbt, hs_upd));
+ WT_ERR(hs_cursor->remove(hs_cursor));
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_rts);
}
@@ -567,13 +541,13 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
err:
WT_ASSERT(session, tombstone == NULL || upd == tombstone);
__wt_free_update_list(session, &upd);
- __wt_free_update_list(session, &hs_upd);
}
__wt_scr_free(session, &hs_key);
__wt_scr_free(session, &hs_value);
__wt_scr_free(session, &key);
__wt_buf_free(session, &full_value);
- WT_TRET(__wt_hs_cursor_close(session));
+ if (hs_cursor != NULL)
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
@@ -1305,74 +1279,44 @@ static int
__rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_id)
{
WT_CURSOR *hs_cursor;
- WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(hs_key);
WT_DECL_RET;
- WT_ITEM key;
- WT_UPDATE *hs_upd;
wt_timestamp_t hs_start_ts;
uint64_t hs_counter;
uint32_t hs_btree_id;
- int exact;
char ts_string[WT_TS_INT_STRING_SIZE];
hs_cursor = NULL;
- WT_CLEAR(key);
- hs_upd = NULL;
WT_RET(__wt_scr_alloc(session, 0, &hs_key));
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session));
- hs_cursor = session->hs_cursor;
- cbt = (WT_CURSOR_BTREE *)hs_cursor;
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
/* Walk the history store for the given btree. */
- hs_cursor->set_key(hs_cursor, btree_id, &key, WT_TS_NONE, 0);
- ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact);
-
- /*
- * The search should always end up pointing to the start of the required btree or end of the
- * previous btree on success. Move the cursor based on the result.
- */
- WT_ASSERT(session, (ret != 0 || exact != 0));
- if (ret == 0 && exact < 0)
- ret = __wt_hs_cursor_next(session, hs_cursor);
+ hs_cursor->set_key(hs_cursor, 1, btree_id);
+ ret = __wt_curhs_search_near_after(session, hs_cursor);
- for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
- /* Stop crossing into the next btree boundary. */
- if (btree_id != hs_btree_id)
- break;
-
- /*
- * If the stop time pair on the tombstone in the history store is already globally visible
- * we can skip it.
- */
- if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
- WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone_rts);
- continue;
- }
+ /* We shouldn't cross the btree search space. */
+ WT_ASSERT(session, btree_id == hs_btree_id);
- /* Set this comparison as exact match of the search for later use. */
- cbt->compare = 0;
__wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
"rollback to stable history store cleanup of update with start timestamp: %s",
__wt_timestamp_to_string(hs_start_ts, ts_string));
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
- WT_ERR(__wt_hs_modify(cbt, hs_upd));
+ WT_ERR(hs_cursor->remove(hs_cursor));
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_rts);
- hs_upd = NULL;
}
WT_ERR_NOTFOUND_OK(ret, false);
err:
__wt_scr_free(session, &hs_key);
- __wt_free(session, hs_upd);
- WT_TRET(__wt_hs_cursor_close(session));
+ if (hs_cursor != NULL)
+ WT_TRET(hs_cursor->close(hs_cursor));
return (ret);
}
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 44d14821a92..4cd239a528a 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -378,7 +378,7 @@ format_die(void)
testutil_check(__wt_debug_cursor_page(g.page_dump_cursor, g.home_pagedump));
fprintf(stderr, "snapshot-isolation error: Dumping HS to %s\n", g.home_hsdump);
#if WIREDTIGER_VERSION_MAJOR >= 10
- testutil_check(__wt_debug_cursor_tree_hs(g.page_dump_cursor, g.home_hsdump));
+ testutil_check(__wt_debug_cursor_tree_hs(CUR2S(g.page_dump_cursor), g.home_hsdump));
#endif
}
#endif
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor13.py b/src/third_party/wiredtiger/test/suite/test_cursor13.py
index c1d7254c016..75ac0b9a890 100755
--- a/src/third_party/wiredtiger/test/suite/test_cursor13.py
+++ b/src/third_party/wiredtiger/test/suite/test_cursor13.py
@@ -71,7 +71,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase):
if hs_before[0] == hs_after[0] and hs_before[1] == hs_after[1]:
break
- # Fail if we haven't been able to get stable hs stats after too many attempts.
+ # Fail if we haven't been able to get stable history store stats after too many attempts.
# Seems impossible, but better to check than to have an accidental infinite loop.
self.assertNotEqual(i, max_tries - 1)
diff --git a/src/third_party/wiredtiger/test/suite/test_hs05.py b/src/third_party/wiredtiger/test/suite/test_hs05.py
index 0e1e5a84617..5a66d04f546 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs05.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs05.py
@@ -72,7 +72,7 @@ class test_hs05(wttest.WiredTigerTestCase):
score_diff = score_end - score_start
self.pr("After large updates score start: " + str(score_start))
self.pr("After large updates score end: " + str(score_end))
- self.pr("After large updates hs score diff: " + str(score_diff))
+ self.pr("After large updates history store score diff: " + str(score_diff))
def test_checkpoint_hs_reads(self):
# Create a small table.
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py
index 58d156b26b1..a6d1f8703d4 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py
@@ -37,7 +37,7 @@ def timestamp_str(t):
return '%x' % t
# test_rollback_to_stable11.py
-# Test the rollback to stable is retrieving the proper hs update.
+# Test the rollback to stable is retrieving the proper history store update.
class test_rollback_to_stable11(test_rollback_to_stable_base):
session_config = 'isolation=snapshot'
diff --git a/src/third_party/wiredtiger/test/suite/test_util21.py b/src/third_party/wiredtiger/test/suite/test_util21.py
new file mode 100644
index 00000000000..cdd117649db
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_util21.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2021 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wttest
+from suite_subprocess import suite_subprocess
+from helper import compare_files
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_util21.py
+# Ensure that wt dump can dump obsolete data in the history store.
+class test_util21(wttest.WiredTigerTestCase, suite_subprocess):
+ conn_config = 'cache_size=50MB'
+ session_config = 'isolation=snapshot'
+
+ def add_data_with_timestamp(self, uri, value, ts):
+ # Apply a series of updates with commit timestamp.
+ cursor = self.session.open_cursor(uri)
+ for i in range(1, 5):
+ self.session.begin_transaction()
+ cursor[str(i)] = value
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(ts))
+ cursor.close()
+
+ def test_dump_obsolete_data(self):
+ uri = 'table:test_util21'
+ create_params = 'key_format=S,value_format=S'
+ self.session.create(uri, create_params)
+
+ value1 = 'a' * 100
+ value2 = 'b' * 100
+ value3 = 'c' * 100
+ value4 = 'd' * 100
+
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
+
+ self.add_data_with_timestamp(uri, value1, 2)
+ self.add_data_with_timestamp(uri, value2, 3)
+ self.add_data_with_timestamp(uri, value3, 5)
+ self.add_data_with_timestamp(uri, value4, 7)
+ # Perform checkpoint, to clean the dirty pages and place values on disk.
+ self.session.checkpoint()
+
+ # Set stable timestamp, so we don't lose data when closing/opening connection when using wt dump.
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(10))
+
+ # Call dump on the values before the oldest timestamp is set
+ self.runWt(['dump', 'file:WiredTigerHS.wt'], outfilename="before_oldest")
+
+ # Set oldest timestamp, and checkpoint, the obsolete data should not removed as
+ # the pages are clean.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(6))
+ self.session.checkpoint()
+ self.runWt(['dump', 'file:WiredTigerHS.wt'], outfilename="after_oldest")
+
+ self.assertEqual(True, compare_files(self, "before_oldest", "after_oldest"))
+
+if __name__ == '__main__':
+ wttest.run()