summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-11-20 17:30:42 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-20 07:24:19 +0000
commitd0dfd00a48e83bc0e7e17d938f2578970a329304 (patch)
tree07a9a582d7fbfec81e6e82d2ee4b49e6d50d9362 /src/third_party/wiredtiger/src
parentefb1467bec93d1de4aff70e817e10645f2ebfb3f (diff)
downloadmongo-d0dfd00a48e83bc0e7e17d938f2578970a329304.tar.gz
Import wiredtiger: b22e16b7643e0e07c784962899b3a45728536947 from branch mongodb-5.0
ref: d05021d0ee..b22e16b764 for: 4.9.0 WT-6563 Create a reproducer for invalid modification application WT-6672 Don't increase the writegen number until RTS cleans up the checkpoint WT-6859 Implement search_near method for the history store cursor
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c18
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c9
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c31
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_hs.c331
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c31
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h10
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h1
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in55
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c16
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c11
10 files changed, 473 insertions, 40 deletions
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 7ec7b27170c..55c59fee565 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -297,6 +297,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_log_flush[] = {
{"sync", "string", NULL, "choices=[\"background\",\"off\",\"on\"]", NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
+static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor_debug_subconfigs[] = {
+ {"release_evict", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor_incremental_subconfigs[] = {
{"consolidate", "boolean", NULL, NULL, NULL, 0}, {"enabled", "boolean", NULL, NULL, NULL, 0},
{"file", "string", NULL, NULL, NULL, 0}, {"force_stop", "boolean", NULL, NULL, NULL, 0},
@@ -308,6 +311,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{"append", "boolean", NULL, NULL, NULL, 0}, {"bulk", "string", NULL, NULL, NULL, 0},
{"checkpoint", "string", NULL, NULL, NULL, 0},
{"checkpoint_wait", "boolean", NULL, NULL, NULL, 0},
+ {"debug", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_debug_subconfigs, 1},
{"dump", "string", NULL, "choices=[\"hex\",\"json\",\"pretty\",\"print\"]", NULL, 0},
{"incremental", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_incremental_subconfigs, 7},
{"next_random", "boolean", NULL, NULL, NULL, 0},
@@ -924,13 +928,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
{"WT_SESSION.log_flush", "sync=on", confchk_WT_SESSION_log_flush, 1},
{"WT_SESSION.log_printf", "", NULL, 0},
{"WT_SESSION.open_cursor",
- "append=false,bulk=false,checkpoint=,checkpoint_wait=true,dump=,"
- "incremental=(consolidate=false,enabled=false,file=,"
- "force_stop=false,granularity=16MB,src_id=,this_id=),"
- "next_random=false,next_random_sample_size=0,overwrite=true,"
- "raw=false,read_once=false,readonly=false,skip_sort_check=false,"
- "statistics=,target=",
- confchk_WT_SESSION_open_cursor, 15},
+ "append=false,bulk=false,checkpoint=,checkpoint_wait=true,"
+ "debug=(release_evict=false),dump=,incremental=(consolidate=false"
+ ",enabled=false,file=,force_stop=false,granularity=16MB,src_id=,"
+ "this_id=),next_random=false,next_random_sample_size=0,"
+ "overwrite=true,raw=false,read_once=false,readonly=false,"
+ "skip_sort_check=false,statistics=,target=",
+ confchk_WT_SESSION_open_cursor, 16},
{"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction,
1},
{"WT_SESSION.query_timestamp", "get=read", confchk_WT_SESSION_query_timestamp, 1},
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 9b5e2394b61..f3da1531709 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2740,7 +2740,14 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
WT_ERR(wt_session->salvage(wt_session, WT_METAFILE_URI, NULL));
}
- /* Initialize the connection's base write generation. */
+ /*
+ * Initialize the connection's base write generation.
+ *
+ * We'll write over this value after performing rollback to stable however, we need to set it
+ * here. The logic below will involve opening up the metadata file and if the connection-wide
+ * base write generation is uninitialized, we'll tag the btree with the wrong base write gen and
+ * incorrectly interpret transaction ids during rollback to stable.
+ */
WT_ERR(__wt_metadata_init_base_write_gen(session));
WT_ERR(__wt_metadata_cursor(session, NULL));
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 16a9d7812b5..8727ed1b18b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -848,6 +848,37 @@ restart:
}
/*
+ * __wt_dhandle_update_write_gens --
+ * Update the open dhandles write generation and base write generation number.
+ */
+void
+__wt_dhandle_update_write_gens(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+
+ conn = S2C(session);
+
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
+ if (dhandle == NULL)
+ break;
+ btree = (WT_BTREE *)dhandle->handle;
+
+ WT_ASSERT(session, btree != NULL);
+
+ /*
+ * Initialize the btrees write generation numbers and runtime write generations after
+ * rollback to stable so that the transaction ids of the pages will be reset when loaded
+ * from disk to memory.
+ */
+ btree->write_gen = btree->base_write_gen = btree->run_write_gen =
+ WT_MAX(btree->write_gen, conn->base_write_gen);
+ }
+}
+
+/*
* __wt_verbose_dump_handles --
* Dump information about all data handles.
*/
diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c
index 500b9208b98..923b9941d0e 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_hs.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c
@@ -152,6 +152,7 @@ __curhs_close(WT_CURSOR *cursor)
WT_CURSOR *file_cursor;
WT_CURSOR_HS *hs_cursor;
WT_DECL_RET;
+ WT_ITEM *datastore_key;
WT_SESSION_IMPL *session;
hs_cursor = (WT_CURSOR_HS *)cursor;
@@ -161,6 +162,8 @@ __curhs_close(WT_CURSOR *cursor)
err:
if (file_cursor != NULL)
WT_TRET(file_cursor->close(file_cursor));
+ datastore_key = &hs_cursor->datastore_key;
+ __wt_scr_free(session, &datastore_key);
__wt_cursor_close(cursor);
API_END_RET(session, ret);
@@ -185,6 +188,10 @@ __curhs_reset(WT_CURSOR *cursor)
ret = file_cursor->reset(file_cursor);
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
+ hs_cursor->btree_id = 0;
+ hs_cursor->datastore_key.data = NULL;
+ hs_cursor->datastore_key.size = 0;
+ hs_cursor->flags = 0;
err:
API_END_RET(session, ret);
@@ -199,15 +206,327 @@ __curhs_set_key(WT_CURSOR *cursor, ...)
{
WT_CURSOR *file_cursor;
WT_CURSOR_HS *hs_cursor;
+ WT_ITEM *datastore_key;
+ WT_SESSION_IMPL *session;
+ wt_timestamp_t start_ts;
+ uint64_t counter;
+ uint32_t arg_count;
va_list ap;
hs_cursor = (WT_CURSOR_HS *)cursor;
file_cursor = hs_cursor->file_cursor;
+ session = CUR2S(cursor);
+ start_ts = WT_TS_NONE;
+ counter = 0;
va_start(ap, cursor);
- file_cursor->set_key(file_cursor, va_arg(ap, uint32_t), va_arg(ap, WT_ITEM *),
- va_arg(ap, wt_timestamp_t), va_arg(ap, uint64_t));
+ arg_count = va_arg(ap, uint32_t);
+
+ WT_ASSERT(session, arg_count >= 1 && arg_count <= 4);
+
+ hs_cursor->btree_id = va_arg(ap, uint32_t);
+ F_SET(hs_cursor, WT_HS_CUR_BTREE_ID_SET);
+ if (arg_count > 1) {
+ datastore_key = va_arg(ap, WT_ITEM *);
+ WT_IGNORE_RET(__wt_buf_set(
+ session, &hs_cursor->datastore_key, datastore_key->data, datastore_key->size));
+ F_SET(hs_cursor, WT_HS_CUR_KEY_SET);
+ } else {
+ hs_cursor->datastore_key.data = NULL;
+ hs_cursor->datastore_key.size = 0;
+ F_CLR(hs_cursor, WT_HS_CUR_KEY_SET);
+ }
+
+ if (arg_count > 2) {
+ start_ts = va_arg(ap, wt_timestamp_t);
+ F_SET(hs_cursor, WT_HS_CUR_TS_SET);
+ } else
+ F_CLR(hs_cursor, WT_HS_CUR_TS_SET);
+
+ if (arg_count > 3) {
+ counter = va_arg(ap, uint64_t);
+ F_SET(hs_cursor, WT_HS_CUR_COUNTER_SET);
+ } else
+ F_CLR(hs_cursor, WT_HS_CUR_COUNTER_SET);
+
va_end(ap);
+
+ file_cursor->set_key(
+ file_cursor, hs_cursor->btree_id, &hs_cursor->datastore_key, start_ts, counter);
+}
+
+/*
+ * __curhs_prev_visible --
+ * Check the visibility of the current history store record. If it is not visible, find the
+ * previous visible history store record.
+ */
+static int
+__curhs_prev_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR *std_cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_ITEM(datastore_key);
+ WT_DECL_RET;
+ wt_timestamp_t start_ts;
+ uint64_t counter;
+ uint32_t btree_id;
+ int cmp;
+
+ file_cursor = hs_cursor->file_cursor;
+ std_cursor = (WT_CURSOR *)hs_cursor;
+ cbt = (WT_CURSOR_BTREE *)file_cursor;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
+
+ for (; ret == 0; ret = __wt_hs_cursor_prev(session, file_cursor)) {
+ WT_ERR(file_cursor->get_key(file_cursor, &btree_id, &datastore_key, &start_ts, &counter));
+
+ /* Stop before crossing over to the next btree. */
+ if (F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET) && btree_id != hs_cursor->btree_id) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+
+ /*
+ * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
+ * have crossed over the desired key and not found the record we are looking for.
+ */
+ if (F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
+ WT_ERR(__wt_compare(session, NULL, datastore_key, &hs_cursor->datastore_key, &cmp));
+ if (cmp != 0) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+ }
+
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
+ WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
+ WT_STAT_DATA_INCR(session, cursor_prev_hs_tombstone);
+ continue;
+ }
+
+ /*
+ * Don't check the visibility of the record if we want to read any history store record that
+ * is not obsolete.
+ */
+ if (F_ISSET(std_cursor, WT_CURSTD_HS_READ_COMMITTED))
+ break;
+
+ if (__wt_txn_tw_stop_visible(session, &cbt->upd_value->tw)) {
+ /*
+ * If the stop time point of a record is visible to us, we won't be able to see anything
+ * for this entire key.
+ */
+ if (F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
+ ret = WT_NOTFOUND;
+ goto done;
+ } else
+ continue;
+ }
+
+ /* If the start time point is visible to us, let's return that record. */
+ if (__wt_txn_tw_start_visible(session, &cbt->upd_value->tw))
+ break;
+ }
+
+done:
+err:
+ __wt_scr_free(session, &datastore_key);
+ return (ret);
+}
+
+/*
+ * __curhs_next_visible --
+ * Check the visibility of the current history store record. If it is not visible, find the next
+ * visible history store record.
+ */
+static int
+__curhs_next_visible(WT_SESSION_IMPL *session, WT_CURSOR_HS *hs_cursor)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR *std_cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_ITEM(datastore_key);
+ WT_DECL_RET;
+ wt_timestamp_t start_ts;
+ uint64_t counter;
+ uint32_t btree_id;
+ int cmp;
+
+ file_cursor = hs_cursor->file_cursor;
+ std_cursor = (WT_CURSOR *)hs_cursor;
+ cbt = (WT_CURSOR_BTREE *)file_cursor;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
+
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, file_cursor)) {
+ WT_ERR(file_cursor->get_key(file_cursor, &btree_id, &datastore_key, &start_ts, &counter));
+
+ /* Stop before crossing over to the next btree. */
+ if (F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET) && btree_id != hs_cursor->btree_id) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+
+ /*
+ * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
+ * have crossed over the desired key and not found the record we are looking for.
+ */
+ if (F_ISSET(hs_cursor, WT_HS_CUR_KEY_SET)) {
+ WT_ERR(__wt_compare(session, NULL, datastore_key, &hs_cursor->datastore_key, &cmp));
+ if (cmp != 0) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+ }
+
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
+ WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone);
+ WT_STAT_DATA_INCR(session, cursor_next_hs_tombstone);
+ continue;
+ }
+
+ /*
+ * Don't check the visibility of the record if we want to read any history store record that
+ * is not obsolete.
+ */
+ if (F_ISSET(std_cursor, WT_CURSTD_HS_READ_COMMITTED))
+ break;
+
+ /*
+ * If the stop time point of a record is visible to us, check the next one.
+ */
+ if (__wt_txn_tw_stop_visible(session, &cbt->upd_value->tw))
+ continue;
+
+ /* If the start time point is visible to us, let's return that record. */
+ if (__wt_txn_tw_start_visible(session, &cbt->upd_value->tw))
+ break;
+ }
+
+done:
+err:
+ __wt_scr_free(session, &datastore_key);
+ return (ret);
+}
+
+/*
+ * __curhs_search_near --
+ * WT_CURSOR->search_near method for the hs cursor type.
+ */
+static int
+__curhs_search_near(WT_CURSOR *cursor, int *exactp)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_ITEM(srch_key);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int cmp;
+ int exact;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+ *exactp = 0;
+ cmp = 0;
+
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, search_near, CUR2BT(file_cursor));
+
+ WT_ERR(__wt_scr_alloc(session, 0, &srch_key));
+ /* At least we have the btree id set. */
+ WT_ASSERT(session, F_ISSET(hs_cursor, WT_HS_CUR_BTREE_ID_SET));
+ WT_ERR(__wt_buf_set(session, srch_key, file_cursor->key.data, file_cursor->key.size));
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_search_near(session, file_cursor, &exact), true);
+
+ /* Empty history store is fine. */
+ if (ret == WT_NOTFOUND)
+ goto done;
+
+ /*
+ * There are some key fields missing so we are searching a range of keys. Place the cursor at
+ * the start of the range.
+ */
+ if (!F_ISSET(hs_cursor, WT_HS_CUR_COUNTER_SET)) {
+ /*
+ * If we raced with a history store insert, we may be two or more records away from our
+ * target. Keep iterating forwards until we are on or past our target key.
+ *
+ * We can't use the cursor positioning helper that we use for regular reads since that will
+ * place us at the end of a particular key/timestamp range whereas we want to be placed at
+ * the beginning.
+ */
+ if (exact < 0) {
+ while ((ret = __wt_hs_cursor_next(session, file_cursor)) == 0) {
+ WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
+ if (cmp >= 0)
+ break;
+ }
+ /* No entries greater than or equal to the key we searched for. */
+ WT_ERR_NOTFOUND_OK(ret, true);
+ if (ret == WT_NOTFOUND)
+ goto done;
+
+ *exactp = cmp;
+ } else
+ *exactp = 1;
+
+ WT_ERR(__curhs_next_visible(session, hs_cursor));
+ }
+ /* Search the closest match that is smaller or equal to the search key. */
+ else {
+ /*
+ * Because of the special visibility rules for the history store, a new key can appear in
+ * between our search and the set of updates that we're interested in. Keep trying until we
+ * find it.
+ *
+ * There may be no history store entries for the given btree id and record key if they have
+ * been removed by rollback to stable.
+ *
+ * Note that we need to compare the raw key off the cursor to determine where we are in the
+ * history store as opposed to comparing the embedded data store key since the ordering is
+ * not guaranteed to be the same.
+ */
+ if (exact > 0) {
+ /*
+ * It's possible that we may race with a history store insert for another key. So we may
+ * be more than one record away the end of our target key/timestamp range. Keep
+ * iterating backwards until we land on our key.
+ */
+ while ((ret = file_cursor->prev(cursor)) == 0) {
+ WT_STAT_CONN_INCR(session, cursor_skip_hs_cur_position);
+ WT_STAT_DATA_INCR(session, cursor_skip_hs_cur_position);
+
+ WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
+ if (cmp <= 0)
+ break;
+ }
+
+ *exactp = cmp;
+ } else
+ *exactp = -1;
+#ifdef HAVE_DIAGNOSTIC
+ if (ret == 0) {
+ WT_ERR(__wt_compare(session, NULL, &file_cursor->key, srch_key, &cmp));
+ WT_ASSERT(session, cmp <= 0);
+ }
+#endif
+
+ WT_ERR(__curhs_prev_visible(session, hs_cursor));
+ }
+
+done:
+err:
+ __wt_scr_free(session, &srch_key);
+ API_END_RET(session, ret);
}
/*
@@ -356,7 +675,7 @@ __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
__wt_cursor_notsup, /* prev */
__curhs_reset, /* reset */
__wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
+ __curhs_search_near, /* search-near */
__curhs_insert, /* insert */
__wt_cursor_modify_value_format_notsup, /* modify */
__wt_cursor_notsup, /* update */
@@ -369,6 +688,7 @@ __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
WT_CURSOR *cursor;
WT_CURSOR_HS *hs_cursor;
WT_DECL_RET;
+ WT_ITEM *datastore_key;
WT_RET(__wt_calloc_one(session, &hs_cursor));
cursor = (WT_CURSOR *)hs_cursor;
@@ -381,6 +701,11 @@ __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
WT_ERR(__hs_cursor_open_int(session, &hs_cursor->file_cursor));
WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp));
+ WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
+ hs_cursor->btree_id = 0;
+ datastore_key = &hs_cursor->datastore_key;
+ WT_ERR(__wt_scr_alloc(session, 0, &datastore_key));
+ hs_cursor->flags = 0;
WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 364a6dffbdb..92b8e5c5b3e 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -979,6 +979,33 @@ err:
}
/*
+ * __cursor_config_debug --
+ * Set configuration options for debug category.
+ */
+static int
+__cursor_config_debug(WT_CURSOR *cursor, const char *cfg[])
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ /*
+ * Debug options. Special handling for options that aren't found - since reconfigure passes in
+ * just the single configuration string, not the stack.
+ */
+ if ((ret = __wt_config_gets_def(session, cfg, "debug.release_evict", 0, &cval)) == 0) {
+ if (cval.val)
+ F_SET(cursor, WT_CURSTD_DEBUG_RESET_EVICT);
+ else
+ F_CLR(cursor, WT_CURSTD_DEBUG_RESET_EVICT);
+ } else
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
+}
+
+/*
* __wt_cursor_reconfigure --
* Set runtime-configurable settings.
*/
@@ -988,6 +1015,7 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
WT_CONFIG_ITEM cval;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ const char *cfg[] = {config, NULL};
CURSOR_API_CALL(cursor, session, reconfigure, NULL);
@@ -1018,6 +1046,8 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
} else
WT_ERR_NOTFOUND_OK(ret, false);
+ WT_ERR(__cursor_config_debug(cursor, cfg));
+
err:
API_END_RET(session, ret);
}
@@ -1110,6 +1140,7 @@ __wt_cursor_init(
cursor->update = __wt_cursor_notsup;
F_CLR(cursor, WT_CURSTD_CACHEABLE);
}
+ WT_RET(__cursor_config_debug(cursor, cfg));
/*
* dump If an index cursor is opened with dump, then this function is called on the index files,
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 2edff95e833..9a737ed158e 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -287,6 +287,16 @@ struct __wt_cursor_hs {
WT_CURSOR *file_cursor; /* Queries of regular history store data */
WT_TIME_WINDOW time_window;
+ uint32_t btree_id;
+ WT_ITEM datastore_key;
+
+ /* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_HS_CUR_BTREE_ID_SET 0x1u
+#define WT_HS_CUR_COUNTER_SET 0x2u
+#define WT_HS_CUR_KEY_SET 0x4u
+#define WT_HS_CUR_TS_SET 0x8u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
struct __wt_cursor_index {
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index d902d72ff01..cf8bcdd353e 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1652,6 +1652,7 @@ extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...);
extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...);
+extern void __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session);
extern void __wt_encrypt_size(
WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
extern void __wt_err_func(
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index f5568a8b7e1..f44b4e188c2 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -699,30 +699,31 @@ struct __wt_cursor {
const char *internal_uri;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURSTD_APPEND 0x000001u
-#define WT_CURSTD_BULK 0x000002u
-#define WT_CURSTD_CACHEABLE 0x000004u
-#define WT_CURSTD_CACHED 0x000008u
-#define WT_CURSTD_DEAD 0x000010u
-#define WT_CURSTD_DEBUG_COPY_KEY 0x000020u
-#define WT_CURSTD_DEBUG_COPY_VALUE 0x000040u
-#define WT_CURSTD_DEBUG_RESET_EVICT 0x000080u
-#define WT_CURSTD_DUMP_HEX 0x000100u
-#define WT_CURSTD_DUMP_JSON 0x000200u
-#define WT_CURSTD_DUMP_PRETTY 0x000400u
-#define WT_CURSTD_DUMP_PRINT 0x000800u
-#define WT_CURSTD_IGNORE_TOMBSTONE 0x001000u
-#define WT_CURSTD_JOINED 0x002000u
-#define WT_CURSTD_KEY_EXT 0x004000u /* Key points out of tree. */
-#define WT_CURSTD_KEY_INT 0x008000u /* Key points into tree. */
-#define WT_CURSTD_META_INUSE 0x010000u
-#define WT_CURSTD_OPEN 0x020000u
-#define WT_CURSTD_OVERWRITE 0x040000u
-#define WT_CURSTD_RAW 0x080000u
-#define WT_CURSTD_RAW_SEARCH 0x100000u
-#define WT_CURSTD_UPDATE_LOCAL 0x200000u
-#define WT_CURSTD_VALUE_EXT 0x400000u /* Value points out of tree. */
-#define WT_CURSTD_VALUE_INT 0x800000u /* Value points into tree. */
+#define WT_CURSTD_APPEND 0x0000001u
+#define WT_CURSTD_BULK 0x0000002u
+#define WT_CURSTD_CACHEABLE 0x0000004u
+#define WT_CURSTD_CACHED 0x0000008u
+#define WT_CURSTD_DEAD 0x0000010u
+#define WT_CURSTD_DEBUG_COPY_KEY 0x0000020u
+#define WT_CURSTD_DEBUG_COPY_VALUE 0x0000040u
+#define WT_CURSTD_DEBUG_RESET_EVICT 0x0000080u
+#define WT_CURSTD_DUMP_HEX 0x0000100u
+#define WT_CURSTD_DUMP_JSON 0x0000200u
+#define WT_CURSTD_DUMP_PRETTY 0x0000400u
+#define WT_CURSTD_DUMP_PRINT 0x0000800u
+#define WT_CURSTD_HS_READ_COMMITTED 0x0001000u
+#define WT_CURSTD_IGNORE_TOMBSTONE 0x0002000u
+#define WT_CURSTD_JOINED 0x0004000u
+#define WT_CURSTD_KEY_EXT 0x0008000u /* Key points out of tree. */
+#define WT_CURSTD_KEY_INT 0x0010000u /* Key points into tree. */
+#define WT_CURSTD_META_INUSE 0x0020000u
+#define WT_CURSTD_OPEN 0x0040000u
+#define WT_CURSTD_OVERWRITE 0x0080000u
+#define WT_CURSTD_RAW 0x0100000u
+#define WT_CURSTD_RAW_SEARCH 0x0200000u
+#define WT_CURSTD_UPDATE_LOCAL 0x0400000u
+#define WT_CURSTD_VALUE_EXT 0x0800000u /* Value points out of tree. */
+#define WT_CURSTD_VALUE_INT 0x1000000u /* Value points into tree. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
#define WT_CURSTD_KEY_SET (WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)
#define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
@@ -867,6 +868,12 @@ struct __wt_session {
* @config{checkpoint, the name of a checkpoint to open (the reserved name
* "WiredTigerCheckpoint" opens the most recent internal checkpoint taken for the object).
* The cursor does not support data modification., a string; default empty.}
+ * @config{debug = (, configure debug specific behavior on a cursor. Generally only used
+ * for internal testing purposes., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;release_evict, Configure the cursor to evict the page
+ * positioned on when the reset API is used., a boolean flag; default \c false.}
+ * @config{
+ * ),,}
* @config{dump, configure the cursor for dump format inputs and outputs: "hex" selects a
* simple hexadecimal format\, "json" selects a JSON format with each record formatted as
* fields named by column names if available\, "pretty" selects a human-readable format
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 6e042fc9b33..c08c5d457b7 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -548,8 +548,7 @@ err:
/*
* __recovery_setup_file --
- * Set up the recovery slot for a file, track the largest file ID, and update the base write gen
- * based on the file's configuration.
+ * Set up the recovery slot for a file and track the largest file ID.
*/
static int
__recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
@@ -595,8 +594,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
(WT_IS_MAX_LSN(&r->max_ckpt_lsn) || __wt_log_cmp(&lsn, &r->max_ckpt_lsn) > 0))
WT_ASSIGN_LSN(&r->max_ckpt_lsn, &lsn);
- /* Update the base write gen based on this file's configuration. */
- return (__wt_metadata_update_base_write_gen(r->session, config));
+ return (0);
}
/*
@@ -979,6 +977,16 @@ done:
*/
WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));
+ /* Initialize the connection's base write generation after rollback to stable. */
+ WT_ERR(__wt_metadata_init_base_write_gen(session));
+
+ /*
+ * Update the open dhandles write generations and base write generation with the connection's
+ * base write generation. The write generations of the pages which are in disk will be
+ * initialized when loaded to cache.
+ */
+ __wt_dhandle_update_write_gens(session);
+
/*
* If we're downgrading and have newer log files, force an archive, no matter what the archive
* setting is.
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 90ec9389deb..ce904576651 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -333,7 +333,16 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
if (valid_update_found) {
WT_ERR(__wt_upd_alloc(session, &full_value, WT_UPDATE_STANDARD, &upd, NULL));
- upd->txnid = cbt->upd_value->tw.start_txn;
+ /*
+ * Set the transaction id of updates to WT_TXN_NONE when called from recovery, because
+ * the connections write generation will be initialized after rollback to stable and the
+ * updates in the cache will be problematic. The transaction id of pages which are in
+ * disk will be automatically reset as part of unpacking cell when loaded to cache.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_RECOVERING))
+ upd->txnid = WT_TXN_NONE;
+ else
+ upd->txnid = cbt->upd_value->tw.start_txn;
upd->durable_ts = cbt->upd_value->tw.durable_start_ts;
upd->start_ts = cbt->upd_value->tw.start_ts;
__wt_verbose(session, WT_VERB_RTS,