summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-05-21 17:27:38 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-21 07:53:11 +0000
commit86a73ffa9f6d38f878420e4e2da68deb4d01d66b (patch)
tree01f990c201cd0d341588d7687efbfbdf959f8d00
parent607a0ac8c11e32af57aafe56f8adfd80f5d03b4b (diff)
downloadmongo-86a73ffa9f6d38f878420e4e2da68deb4d01d66b.tar.gz
Import wiredtiger: 25c305c94d2ba492841a7c2a270d9a92ea1fb284 from branch mongodb-4.4
ref: 7bf362af19..25c305c94d for: 4.4.0-rc7 WT-6212 Dump failure message to Evergreen log for checkpoint-stress-test WT-6232 Fix the logic that incorrectly returns EINVAL for in_mem config with prepared transactions WT-6238 Fix salvage panic seeing prepared updates restored from disk WT-6248 Add HS records to page dumps WT-6285 Only retry reading if the prepared update is restored from the disk
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py5
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c397
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c21
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c12
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i17
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in3
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c16
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c6
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml9
12 files changed, 255 insertions, 239 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 3a428bc8e71..b32f8455b21 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1386,11 +1386,6 @@ methods = {
Display the contents of on-disk blocks as they are verified,
using the application's message handler, intended for debugging''',
type='boolean'),
- Config('dump_history', 'false', r'''
- Display a key's values along with its time window as
- they are verified against the history store, using the application's
- message handler, intended for debugging''',
- type='boolean'),
Config('dump_layout', 'false', r'''
Display the layout of the files as they are verified, using the
application's message handler, intended for debugging; requires
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 2af6d667182..75d0caac8d8 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "7bf362af190a36a31589d3d78eb1cd1a5963b79d"
+ "commit": "25c305c94d2ba492841a7c2a270d9a92ea1fb284"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index c6891085b43..09871831d2b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -16,6 +16,13 @@ typedef struct __wt_dbg WT_DBG;
struct __wt_dbg {
WT_SESSION_IMPL *session; /* Enclosing session */
+ WT_ITEM *key;
+
+ WT_ITEM *hs_key; /* History store lookups */
+ WT_ITEM *hs_value;
+ uint32_t session_flags;
+ bool hs_is_local, is_owner;
+
/*
* When using the standard event handlers, the debugging output has to do its own message
* handling because its output isn't line-oriented.
@@ -37,7 +44,7 @@ static const /* Output separator */
static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool);
static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
-static int __debug_modify(WT_DBG *, WT_UPDATE *, const char *);
+static int __debug_modify(WT_DBG *, const uint8_t *, const char *);
static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
static int __debug_page_col_fix(WT_DBG *, WT_REF *);
static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t);
@@ -152,19 +159,6 @@ __debug_item_value(WT_DBG *ds, const char *tag, const void *data_arg, size_t siz
}
/*
- * __debug_time_window --
- * Dump a time window, with an optional tag.
- */
-static inline int
-__debug_time_window(WT_DBG *ds, const char *tag, WT_TIME_WINDOW *tw)
-{
- char time_string[WT_TIME_STRING_SIZE];
-
- return (ds->f(ds, "\t%s%s%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
- __wt_time_window_to_string(tw, time_string)));
-}
-
-/*
* __dmsg_event --
* Send a debug message to the event handler.
*/
@@ -289,6 +283,12 @@ __debug_wrapup(WT_DBG *ds)
session = ds->session;
msg = ds->msg;
+ if (ds->hs_is_local)
+ WT_TRET(__wt_hs_cursor_close(session, ds->session_flags, ds->is_owner));
+
+ __wt_scr_free(session, &ds->key);
+ __wt_scr_free(session, &ds->hs_key);
+ __wt_scr_free(session, &ds->hs_value);
__wt_scr_free(session, &ds->t1);
__wt_scr_free(session, &ds->t2);
@@ -406,6 +406,100 @@ err:
}
/*
+ * __debug_hs_cursor --
+ * Dump information pointed to by a single history store cursor.
+ */
+static int
+__debug_hs_cursor(WT_DBG *ds, WT_CURSOR *hs_cursor)
+{
+ WT_SESSION_IMPL *session;
+ WT_TIME_WINDOW tw;
+ uint64_t hs_counter, hs_upd_type;
+ uint32_t hs_btree_id;
+ char time_string[WT_TIME_STRING_SIZE];
+
+ session = ds->session;
+
+ WT_TIME_WINDOW_INIT(&tw);
+
+ WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, ds->hs_key, &tw.start_ts, &hs_counter));
+ WT_RET(hs_cursor->get_value(
+ hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type, ds->hs_value));
+
+ switch (hs_upd_type) {
+ case WT_UPDATE_MODIFY:
+ WT_RET(ds->f(ds,
+ "\t"
+ "hs-modify: %s\n",
+ __wt_time_window_to_string(&tw, time_string)));
+ WT_RET(__debug_modify(ds, ds->hs_value->data, "V"));
+ break;
+ case WT_UPDATE_STANDARD:
+ WT_RET(ds->f(ds,
+ "\t"
+ "hs-update: %s\n",
+ __wt_time_window_to_string(&tw, time_string)));
+ WT_RET(__debug_item_value(ds, "V", ds->hs_value->data, ds->hs_value->size));
+ break;
+ default:
+ /*
+ * Currently, we expect only modifies or full values to be exposed by hs_cursors. This means
+ * we can ignore other types for now.
+ */
+ WT_ASSERT(session, hs_upd_type == WT_UPDATE_MODIFY || hs_upd_type == WT_UPDATE_STANDARD);
+ break;
+ }
+ return (0);
+}
+
+/*
+ * __debug_hs_key --
+ * Dump any HS records associated with the key.
+ */
+static int
+__debug_hs_key(WT_DBG *ds)
+{
+ WT_BTREE *btree;
+ WT_CURSOR *hs_cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ wt_timestamp_t older_start_ts;
+ uint64_t hs_counter;
+ uint32_t hs_btree_id;
+ int cmp, exact;
+
+ session = ds->session;
+ btree = S2BT(session);
+ hs_btree_id = btree->id;
+
+ /*
+ * Open a history store cursor positioned at the end of the data store key (the newest record)
+ * and iterate backwards until we reach a different key or btree.
+ */
+ hs_cursor = session->hs_cursor;
+ hs_cursor->set_key(hs_cursor, hs_btree_id, ds->key, WT_TS_MAX, WT_TXN_MAX);
+ ret = hs_cursor->search_near(hs_cursor, &exact);
+
+ /* If we jumped to the next key, go back to the previous key. */
+ if (ret == 0 && exact > 0)
+ ret = hs_cursor->prev(hs_cursor);
+
+ for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
+ WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, ds->t1, &older_start_ts, &hs_counter));
+
+ if (hs_btree_id != btree->id)
+ break;
+
+ WT_RET(__wt_compare(session, NULL, ds->key, ds->t1, &cmp));
+ if (cmp != 0)
+ break;
+
+ WT_RET(__debug_hs_cursor(ds, hs_cursor));
+ }
+ return (ret == WT_NOTFOUND ? 0 : ret);
+}
+
+/*
* __debug_cell_int_data --
* Dump a single WT_COL_INT or WT_ROW_INT disk image cell's data in debugging mode.
*/
@@ -454,7 +548,8 @@ __debug_cell_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_ADDR *unp
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
+ if (!WT_TIME_AGGREGATE_IS_EMPTY(&unpack->ta))
+ WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
WT_RET(__wt_scr_alloc(session, 128, &buf));
ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
@@ -484,81 +579,31 @@ __debug_dsk_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
}
/*
- * __debug_cell_kv_data --
- * Dump a single WT_COL_VAR or WT_ROW_LEAF disk image cell's data in debugging mode.
+ * __debug_cell_kv --
+ * Dump a single unpacked WT_COL_VAR or WT_ROW_LEAF disk image WT_CELL.
*/
static int
-__debug_cell_kv_data(
+__debug_cell_kv(
WT_DBG *ds, WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK_KV *unpack)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
WT_SESSION_IMPL *session;
+ char time_string[WT_TIME_STRING_SIZE];
const char *p;
session = ds->session;
- /*
- * Column-store references to deleted cells return a NULL cell reference.
- */
+ /* Column-store references to deleted cells return a NULL cell reference. */
if (unpack == NULL)
return (__debug_item(ds, tag, "deleted", strlen("deleted")));
- /*
- * Row-store references to empty cells return a NULL on-page reference.
- */
+ /* Row-store references to empty cells return a NULL on-page reference. */
if (unpack->cell == NULL)
- return (__debug_item(ds, tag, "", 0));
-
- switch (unpack->raw) {
- case WT_CELL_DEL:
- p = __wt_cell_type_string(unpack->raw);
- return (__debug_item(ds, tag, p, strlen(p)));
- }
-
- WT_RET(__wt_scr_alloc(session, 256, &buf));
- WT_ERR(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, buf) :
- __wt_page_cell_data_ref(session, page, unpack, buf));
-
- switch (unpack->raw) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_PFX:
- case WT_CELL_KEY_SHORT:
- case WT_CELL_KEY_SHORT_PFX:
- WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size));
- break;
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_SHORT:
- WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size));
- break;
- }
-
-err:
- __wt_scr_free(session, &buf);
- return (ret);
-}
-
-/*
- * __debug_cell_kv --
- * Dump a single unpacked WT_COL_VAR or WT_ROW_LEAF disk image WT_CELL.
- */
-static int
-__debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack)
-{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char time_string[WT_TIME_STRING_SIZE];
-
- session = ds->session;
+ return (__debug_item(ds, tag, "zero-length", strlen("zero-length")));
WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size));
- /* Dump cell's per-disk page type information. */
- switch (dsk->type) {
+ /* Dump per-disk page type information. */
+ switch (page_type) {
case WT_PAGE_COL_VAR:
WT_RET(ds->f(ds, ", rle: %" PRIu64, __wt_cell_rle(unpack)));
break;
@@ -572,7 +617,7 @@ __debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack
break;
}
- /* Dump timestamps. */
+ /* Dump time window. */
switch (unpack->raw) {
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -580,23 +625,48 @@ __debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
- WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
+ if (!WT_TIME_WINDOW_IS_EMPTY(&unpack->tw))
+ WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
break;
}
- /* Dump overflow addresses. */
+ /* Column-store deleted cells. */
+ switch (unpack->raw) {
+ case WT_CELL_DEL:
+ p = __wt_cell_type_string(unpack->raw);
+ return (__debug_item(ds, tag, p, strlen(p)));
+ }
+
+ /* Overflow addresses. */
switch (unpack->raw) {
case WT_CELL_KEY_OVFL:
case WT_CELL_VALUE_OVFL:
- WT_RET(__wt_scr_alloc(session, 128, &buf));
- ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
- __wt_scr_free(session, &buf);
- WT_RET(ret);
+ WT_RET(ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, ds->t1)));
break;
}
WT_RET(ds->f(ds, "\n"));
- return (__debug_cell_kv_data(ds, NULL, dsk->type, NULL, unpack));
+ WT_RET(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, ds->t1) :
+ __wt_page_cell_data_ref(session, page, unpack, ds->t1));
+
+ /* Standard key/value cells. */
+ switch (unpack->raw) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_KEY_SHORT_PFX:
+ WT_RET(__debug_item_key(ds, tag, ds->t1->data, ds->t1->size));
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_SHORT:
+ WT_RET(__debug_item_value(ds, tag, ds->t1->data, ds->t1->size));
+ break;
+ }
+
+ return (0);
}
/*
@@ -609,7 +679,7 @@ __debug_dsk_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
WT_CELL_UNPACK_KV unpack;
WT_CELL_FOREACH_KV (ds->session, dsk, unpack) {
- WT_RET(__debug_cell_kv(ds, dsk, &unpack));
+ WT_RET(__debug_cell_kv(ds, NULL, dsk->type, NULL, &unpack));
}
WT_CELL_FOREACH_END;
return (0);
@@ -885,6 +955,8 @@ __wt_debug_cursor_page(void *cursor_arg, const char *ofile)
cbt = cursor_arg;
session = CUR2S(cursor_arg);
+ WT_RET(__wt_debug_cursor_tree_hs(cursor_arg, "/tmp/ohs"));
+
WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_debug_page(session, NULL, cbt->ref, ofile));
return (ret);
}
@@ -915,87 +987,6 @@ __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
}
/*
- * __wt_debug_cursor_hs --
- * Dump information pointed to by a single history store cursor.
- */
-int
-__wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor)
-{
- WT_DBG *ds, _ds;
- WT_DECL_ITEM(hs_key);
- WT_DECL_ITEM(hs_value);
- WT_DECL_RET;
- WT_TIME_WINDOW tw;
- WT_UPDATE *upd;
- uint64_t hs_counter, hs_upd_type_full;
- uint32_t hs_btree_id;
- uint8_t hs_upd_type;
-
- ds = &_ds;
- WT_TIME_WINDOW_INIT(&tw);
-
- WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
- WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
- WT_ERR(__debug_config(session, ds, NULL));
-
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &tw.start_ts, &hs_counter));
- WT_ERR(hs_cursor->get_value(
- hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type_full, hs_value));
- WT_ERR(__debug_time_window(ds, "T", &tw));
-
- hs_upd_type = (uint8_t)hs_upd_type_full;
- switch (hs_upd_type) {
- case WT_UPDATE_MODIFY:
- WT_ERR(__wt_upd_alloc(session, hs_value, hs_upd_type, &upd, NULL));
- WT_ERR(__debug_modify(ds, upd, "\tM "));
- break;
- case WT_UPDATE_STANDARD:
- WT_ERR(__debug_item_value(ds, "V", hs_value->data, hs_value->size));
- break;
- default:
- /*
- * Currently, we expect only modifies or full values to be exposed by hs_cursors. This means
- * we can ignore other types for now.
- */
- WT_ASSERT(session, hs_upd_type == WT_UPDATE_MODIFY || hs_upd_type == WT_UPDATE_STANDARD);
- break;
- }
-
-err:
- __wt_scr_free(session, &hs_key);
- __wt_scr_free(session, &hs_value);
- WT_RET(__debug_wrapup(ds));
-
- return (ret);
-}
-
-/*
- * __wt_debug_key_value --
- * Dump information about a key and/or value.
- */
-int
-__wt_debug_key_value(
- WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, uint64_t rle, WT_CELL_UNPACK_KV *value)
-{
- WT_DBG *ds, _ds;
- WT_DECL_RET;
-
- ds = &_ds;
-
- WT_ERR(__debug_config(session, ds, NULL));
-
- if (key == NULL)
- WT_ERR(ds->f(ds, "\tK {%" PRIu64 " %" PRIu64 "}", recno, rle));
- else
- WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
- WT_ERR(__debug_time_window(ds, "T", &value->tw));
- WT_ERR(__debug_cell_kv_data(ds, NULL, value != NULL ? value->type : 0, "V", value));
-
-err:
- return (__debug_wrapup(ds));
-}
-
-/*
* __debug_tree --
* Dump the in-memory information for a tree.
*/
@@ -1030,6 +1021,17 @@ __debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
session = ds->session;
+ /* Set up history store support. */
+ if (!WT_IS_HS(S2BT(session))) {
+ WT_RET(__wt_scr_alloc(session, 100, &ds->key));
+ WT_RET(__wt_scr_alloc(session, 0, &ds->hs_key));
+ WT_RET(__wt_scr_alloc(session, 0, &ds->hs_value));
+ if (session->hs_cursor == NULL) {
+ WT_RET(__wt_hs_cursor(session, &ds->session_flags, &ds->is_owner));
+ ds->hs_is_local = true;
+ }
+ }
+
/* Dump the page metadata. */
WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
WT_RET(ret);
@@ -1257,12 +1259,15 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
WT_COL *cip;
WT_INSERT_HEAD *update;
WT_PAGE *page;
+ WT_SESSION_IMPL *session;
uint64_t recno, rle;
uint32_t i;
+ uint8_t *p;
char tag[64];
unpack = &_unpack;
page = ref->page;
+ session = ds->session;
recno = ref->ref_recno;
WT_COL_FOREACH (page, cip, i) {
@@ -1270,7 +1275,14 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
__wt_cell_unpack_kv(ds->session, page->dsk, cell, unpack);
rle = __wt_cell_rle(unpack);
WT_RET(__wt_snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle));
- WT_RET(__debug_cell_kv_data(ds, page, WT_PAGE_COL_VAR, tag, unpack));
+ WT_RET(__debug_cell_kv(ds, page, WT_PAGE_COL_VAR, tag, unpack));
+
+ if (!WT_IS_HS(S2BT(session))) {
+ p = ds->key->mem;
+ WT_RET(__wt_vpack_uint(&p, 0, recno));
+ ds->key->size = WT_PTRDIFF(p, ds->key->mem);
+ WT_RET(__debug_hs_key(ds));
+ }
if ((update = WT_COL_UPDATE(page, cip)) != NULL)
WT_RET(__debug_col_skip(ds, update, "update", false));
@@ -1326,8 +1338,6 @@ static int
__debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
{
WT_CELL_UNPACK_KV *unpack, _unpack;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
WT_INSERT_HEAD *insert;
WT_ROW *rip;
WT_SESSION_IMPL *session;
@@ -1336,32 +1346,31 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
session = ds->session;
unpack = &_unpack;
- WT_RET(__wt_scr_alloc(session, 256, &key));
/*
* Dump any K/V pairs inserted into the page before the first from-disk key on the page.
*/
if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- WT_ERR(__debug_row_skip(ds, insert));
+ WT_RET(__debug_row_skip(ds, insert));
/* Dump the page's K/V pairs. */
WT_ROW_FOREACH (page, rip, i) {
- WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
- WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
+ WT_RET(__wt_row_leaf_key(session, page, rip, ds->key, false));
+ WT_RET(__debug_item_key(ds, "K", ds->key->data, ds->key->size));
__wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
- WT_ERR(__debug_cell_kv_data(ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
+ WT_RET(__debug_cell_kv(ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
- WT_ERR(__debug_update(ds, upd, false));
+ WT_RET(__debug_update(ds, upd, false));
+
+ if (!WT_IS_HS(S2BT(session)))
+ WT_RET(__debug_hs_key(ds));
if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- WT_ERR(__debug_row_skip(ds, insert));
+ WT_RET(__debug_row_skip(ds, insert));
}
-
-err:
- __wt_scr_free(session, &key);
- return (ret);
+ return (0);
}
/*
@@ -1372,10 +1381,21 @@ static int
__debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte)
{
WT_INSERT *ins;
+ WT_SESSION_IMPL *session;
+ uint8_t *p;
+
+ session = ds->session;
WT_SKIP_FOREACH (ins, head) {
WT_RET(ds->f(ds, "\t%s %" PRIu64 "\n", tag, WT_INSERT_RECNO(ins)));
WT_RET(__debug_update(ds, ins->upd, hexbyte));
+
+ if (!WT_IS_HS(S2BT(session))) {
+ p = ds->key->mem;
+ WT_RET(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(ins)));
+ ds->key->size = WT_PTRDIFF(p, ds->key->mem);
+ WT_RET(__debug_hs_key(ds));
+ }
}
return (0);
}
@@ -1388,10 +1408,18 @@ static int
__debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head)
{
WT_INSERT *ins;
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
WT_SKIP_FOREACH (ins, head) {
WT_RET(__debug_item_key(ds, "insert", WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
WT_RET(__debug_update(ds, ins->upd, false));
+
+ if (!WT_IS_HS(S2BT(session))) {
+ WT_RET(__wt_buf_set(session, ds->key, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__debug_hs_key(ds));
+ }
}
return (0);
}
@@ -1401,15 +1429,14 @@ __debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head)
* Dump a modify update.
*/
static int
-__debug_modify(WT_DBG *ds, WT_UPDATE *upd, const char *tag)
+__debug_modify(WT_DBG *ds, const uint8_t *data, const char *tag)
{
size_t nentries, data_size, offset, size;
const size_t *p;
- const uint8_t *data;
- p = (size_t *)upd->data;
+ p = (size_t *)data;
memcpy(&nentries, p++, sizeof(size_t));
- data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t));
+ data += sizeof(size_t) + (nentries * 3 * sizeof(size_t));
WT_RET(ds->f(ds, "%s%" WT_SIZET_FMT ": ", tag != NULL ? tag : "", nentries));
for (; nentries-- > 0; data += data_size) {
@@ -1442,7 +1469,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
break;
case WT_UPDATE_MODIFY:
WT_RET(ds->f(ds, "\tvalue {modify: "));
- WT_RET(__debug_modify(ds, upd, NULL));
+ WT_RET(__debug_modify(ds, upd->data, NULL));
WT_RET(ds->f(ds, "}\n"));
break;
case WT_UPDATE_RESERVE:
@@ -1544,8 +1571,8 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
if (F_ISSET(ref, WT_REF_FLAG_READING))
WT_RET(ds->f(ds, ", %s", "reading"));
- if (__wt_ref_addr_copy(session, ref, &addr))
- WT_RET(ds->f(ds, "%s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string),
+ if (__wt_ref_addr_copy(session, ref, &addr) && !WT_TIME_AGGREGATE_IS_EMPTY(&addr.ta))
+ WT_RET(ds->f(ds, ", %s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string),
__wt_addr_string(session, addr.addr, addr.size, ds->t1)));
return (ds->f(ds, "\n"));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 5b0aee10191..10dca448c28 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -27,7 +27,6 @@ typedef struct {
((vs)->dump_address || (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages)
bool dump_address; /* Configure: dump special */
bool dump_blocks;
- bool dump_history;
bool dump_layout;
bool dump_pages;
@@ -65,9 +64,6 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
vs->dump_blocks = cval.val != 0;
- WT_RET(__wt_config_gets(session, cfg, "dump_history", &cval));
- vs->dump_history = cval.val != 0;
-
WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
vs->dump_layout = cval.val != 0;
@@ -84,7 +80,7 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
}
#if !defined(HAVE_DIAGNOSTIC)
- if (vs->dump_blocks || vs->dump_pages || vs->dump_history)
+ if (vs->dump_blocks || vs->dump_pages)
WT_RET_MSG(session, ENOTSUP, "the WiredTiger library was not built in diagnostic mode");
#endif
@@ -807,12 +803,6 @@ __verify_key_hs(
if (cmp != 0)
break;
-#ifdef HAVE_DIAGNOSTIC
- /* Optionally dump historical time windows and values in debug mode. */
- if (vs->dump_history)
- WT_RET(__wt_debug_cursor_hs(session, hs_cursor));
-#endif
-
/* Verify the newer record's start is later than the older record's stop. */
if (newer_start_ts < older_stop_ts) {
WT_RET_MSG(session, WT_ERROR,
@@ -996,11 +986,6 @@ __verify_page_content_leaf(
WT_RET(__wt_row_leaf_key(session, page, rip++, vs->tmp1, false));
WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs));
-
-#ifdef HAVE_DIAGNOSTIC
- if (vs->dump_history)
- WT_RET(__wt_debug_key_value(session, vs->tmp1, WT_RECNO_OOB, 0, &unpack));
-#endif
} else if (page->type == WT_PAGE_COL_VAR) {
rle = __wt_cell_rle(&unpack);
p = vs->tmp1->mem;
@@ -1008,10 +993,6 @@ __verify_page_content_leaf(
vs->tmp1->size = WT_PTRDIFF(p, vs->tmp1->mem);
WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs));
-#ifdef HAVE_DIAGNOSTIC
- if (vs->dump_history)
- WT_RET(__wt_debug_key_value(session, NULL, recno, rle, &unpack));
-#endif
recno += rle;
vs->records_so_far += rle;
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index a29bcbe25d0..d433fb02ff7 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -355,8 +355,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = {
static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = {
{"dump_address", "boolean", NULL, NULL, NULL, 0}, {"dump_blocks", "boolean", NULL, NULL, NULL, 0},
- {"dump_history", "boolean", NULL, NULL, NULL, 0}, {"dump_layout", "boolean", NULL, NULL, NULL, 0},
- {"dump_offsets", "list", NULL, NULL, NULL, 0}, {"dump_pages", "boolean", NULL, NULL, NULL, 0},
+ {"dump_layout", "boolean", NULL, NULL, NULL, 0}, {"dump_offsets", "list", NULL, NULL, NULL, 0},
+ {"dump_pages", "boolean", NULL, NULL, NULL, 0},
{"stable_timestamp", "boolean", NULL, NULL, NULL, 0}, {"strict", "boolean", NULL, NULL, NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -930,10 +930,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
{"WT_SESSION.transaction_sync", "timeout_ms=1200000", confchk_WT_SESSION_transaction_sync, 1},
{"WT_SESSION.truncate", "", NULL, 0}, {"WT_SESSION.upgrade", "", NULL, 0},
{"WT_SESSION.verify",
- "dump_address=false,dump_blocks=false,dump_history=false,"
- "dump_layout=false,dump_offsets=,dump_pages=false,"
- "stable_timestamp=false,strict=false",
- confchk_WT_SESSION_verify, 8},
+ "dump_address=false,dump_blocks=false,dump_layout=false,"
+ "dump_offsets=,dump_pages=false,stable_timestamp=false,"
+ "strict=false",
+ confchk_WT_SESSION_verify, 7},
{"colgroup.meta", "app_metadata=,collator=,columns=,source=,type=file", confchk_colgroup_meta, 5},
{"file.config",
"access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 15f86d5c69e..b1f5be9ed5e 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -969,7 +969,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
/*
* After positioning our cursor, we're stepping backwards to find the correct update. Since the
* timestamp is part of the key, our cursor needs to go from the newest record (further in the
- * las) to the oldest (earlier in the las) for a given key.
+ * history store) to the oldest (earlier in the history store) for a given key.
*/
read_timestamp = allow_prepare ? txn->prepare_timestamp : txn_shared->read_timestamp;
WT_ERR_NOTFOUND_OK(
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index a7ee16e9b74..9042f73c20e 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -589,8 +589,6 @@ extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t
const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE(
(visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
@@ -598,8 +596,6 @@ extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_key_value(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno,
- uint64_t rle, WT_CELL_UNPACK_KV *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size,
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 575e58b3cae..04389f6100a 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -881,7 +881,8 @@ __wt_txn_read_upd_list(
* Save the prepared update to help us detect if we race with prepared commit or
* rollback.
*/
- if (prepare_updp != NULL && *prepare_updp == NULL)
+ if (prepare_updp != NULL && *prepare_updp == NULL &&
+ F_ISSET(upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK))
*prepare_updp = upd;
continue;
}
@@ -920,7 +921,6 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
{
WT_TIME_WINDOW tw;
WT_UPDATE *prepare_upd;
- uint8_t prepare_state;
prepare_upd = NULL;
@@ -999,12 +999,17 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
cbt->upd_value, false, &cbt->upd_value->buf));
/*
- * Retry if we race with prepared commit or rollback as the reader may have read changed history
- * store content.
+ * Retry if we race with prepared commit or rollback. If we race with prepared rollback, the
+ * value the reader should read may have been removed from the history store and appended to the
+ * data store. If we race with prepared commit, imagine a case we read with timestamp 50 and we
+ * have a prepared update with timestamp 30 and a history store record with timestamp 20,
+ * committing the prepared update will cause the stop timestamp of the history store record
+ * being updated to 30 and the reader not seeing it.
*/
if (prepare_upd != NULL) {
- WT_ORDERED_READ(prepare_state, prepare_upd->prepare_state);
- if (prepare_upd->txnid == WT_TXN_ABORTED || prepare_state == WT_PREPARE_RESOLVED)
+ WT_ASSERT(session, F_ISSET(prepare_upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK));
+ if (prepare_upd->txnid == WT_TXN_ABORTED ||
+ prepare_upd->prepare_state == WT_PREPARE_RESOLVED)
return (WT_RESTART);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 0d48a9dc525..771bdb1e919 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1711,9 +1711,6 @@ struct __wt_session {
* @config{dump_blocks, Display the contents of on-disk blocks as they are verified\, using
* the application's message handler\, intended for debugging., a boolean flag; default \c
* false.}
- * @config{dump_history, Display a key's values along with its time window as they are
- * verified against the history store\, using the application's message handler\, intended
- * for debugging., a boolean flag; default \c false.}
* @config{dump_layout, Display the layout of the files as they are verified\, using the
* application's message handler\, intended for debugging; requires optional support from
* the block manager., a boolean flag; default \c false.}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 9e9dd627be2..758a47c00d4 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -314,11 +314,11 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
continue;
}
- /* Ignore prepared updates if it is not eviction. */
+ /* Ignore prepared updates if it is checkpoint. */
if (upd->prepare_state == WT_PREPARE_LOCKED ||
upd->prepare_state == WT_PREPARE_INPROGRESS) {
WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid == upd->txnid);
- if (!F_ISSET(r, WT_REC_EVICT)) {
+ if (F_ISSET(r, WT_REC_CHECKPOINT)) {
has_newer_updates = true;
if (upd->start_ts > max_ts)
max_ts = upd->start_ts;
@@ -330,8 +330,18 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
if (upd->start_ts < r->min_skipped_ts)
r->min_skipped_ts = upd->start_ts;
continue;
- } else
+ } else {
+ /*
+ * For prepared updates written to the date store in salvage, we write the same
+ * prepared value to the date store. If there is still content for that key left in
+ * the history store, rollback to stable will bring it back to the data store.
+ * Otherwise, it removes the key.
+ */
+ WT_ASSERT(session, F_ISSET(r, WT_REC_EVICT) ||
+ (F_ISSET(r, WT_REC_VISIBILITY_ERR) &&
+ F_ISSET(upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK)));
WT_ASSERT(session, upd->prepare_state == WT_PREPARE_INPROGRESS);
+ }
}
/* Track the first update with non-zero timestamp. */
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index f20bfba99fb..dac020c35f1 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -1503,11 +1503,9 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
* Logged table updates should never be prepared. As these updates are immediately durable,
* it is not possible to roll them back if the prepared transaction is rolled back.
*/
- if (!F_ISSET(op->btree, WT_BTREE_NO_LOGGING) &&
- (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
- F_ISSET(S2C(session), WT_CONN_IN_MEMORY)))
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) &&
+ !F_ISSET(op->btree, WT_BTREE_NO_LOGGING))
WT_RET_MSG(session, EINVAL, "transaction prepare is not supported with logged tables");
-
switch (op->type) {
case WT_TXN_OP_NONE:
break;
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 14d5a06fbb8..36d6a66b785 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -297,7 +297,14 @@ functions:
done
for t in $(seq ${no_of_procs|1}); do
- wait -n || exit $?
+ ret=0
+ wait -n || ret=$?
+ if [ $ret -ne 0 ]; then
+ # Skip the below lines from nohup output file because they are very verbose and
+ # print only the errors to evergreen log file.
+ grep -v "Finished verifying" nohup.out.* | grep -v "Finished a checkpoint" | grep -v "thread starting"
+ fi
+ exit $ret
done
done