diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-05-21 17:27:38 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-21 07:53:11 +0000 |
commit | 86a73ffa9f6d38f878420e4e2da68deb4d01d66b (patch) | |
tree | 01f990c201cd0d341588d7687efbfbdf959f8d00 /src/third_party/wiredtiger | |
parent | 607a0ac8c11e32af57aafe56f8adfd80f5d03b4b (diff) | |
download | mongo-86a73ffa9f6d38f878420e4e2da68deb4d01d66b.tar.gz |
Import wiredtiger: 25c305c94d2ba492841a7c2a270d9a92ea1fb284 from branch mongodb-4.4
ref: 7bf362af19..25c305c94d
for: 4.4.0-rc7
WT-6212 Dump failure message to Evergreen log for checkpoint-stress-test
WT-6232 Fix the logic that incorrectly returns EINVAL for in_mem config with prepared transactions
WT-6238 Fix salvage panic seeing prepared updates restored from disk
WT-6248 Add HS records to page dumps
WT-6285 Only retry reading if the prepared update is restored from the disk
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r-- | src/third_party/wiredtiger/dist/api_data.py | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_debug.c | 397 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_vrfy.c | 21 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/config/config_def.c | 12 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.i | 17 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/wiredtiger.in | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_visibility.c | 16 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 6 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/test/evergreen.yml | 9 |
12 files changed, 255 insertions, 239 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 3a428bc8e71..b32f8455b21 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -1386,11 +1386,6 @@ methods = { Display the contents of on-disk blocks as they are verified, using the application's message handler, intended for debugging''', type='boolean'), - Config('dump_history', 'false', r''' - Display a key's values along with its time window as - they are verified against the history store, using the application's - message handler, intended for debugging''', - type='boolean'), Config('dump_layout', 'false', r''' Display the layout of the files as they are verified, using the application's message handler, intended for debugging; requires diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 2af6d667182..75d0caac8d8 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "7bf362af190a36a31589d3d78eb1cd1a5963b79d" + "commit": "25c305c94d2ba492841a7c2a270d9a92ea1fb284" } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index c6891085b43..09871831d2b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -16,6 +16,13 @@ typedef struct __wt_dbg WT_DBG; struct __wt_dbg { WT_SESSION_IMPL *session; /* Enclosing session */ + WT_ITEM *key; + + WT_ITEM *hs_key; /* History store lookups */ + WT_ITEM *hs_value; + uint32_t session_flags; + bool hs_is_local, is_owner; + /* * When using the standard event handlers, the debugging output has to do its own message * handling because its output isn't line-oriented. @@ -37,7 +44,7 @@ static const /* Output separator */ static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool); static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *); -static int __debug_modify(WT_DBG *, WT_UPDATE *, const char *); +static int __debug_modify(WT_DBG *, const uint8_t *, const char *); static int __debug_page(WT_DBG *, WT_REF *, uint32_t); static int __debug_page_col_fix(WT_DBG *, WT_REF *); static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t); @@ -152,19 +159,6 @@ __debug_item_value(WT_DBG *ds, const char *tag, const void *data_arg, size_t siz } /* - * __debug_time_window -- - * Dump a time window, with an optional tag. - */ -static inline int -__debug_time_window(WT_DBG *ds, const char *tag, WT_TIME_WINDOW *tw) -{ - char time_string[WT_TIME_STRING_SIZE]; - - return (ds->f(ds, "\t%s%s%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ", - __wt_time_window_to_string(tw, time_string))); -} - -/* * __dmsg_event -- * Send a debug message to the event handler. */ @@ -289,6 +283,12 @@ __debug_wrapup(WT_DBG *ds) session = ds->session; msg = ds->msg; + if (ds->hs_is_local) + WT_TRET(__wt_hs_cursor_close(session, ds->session_flags, ds->is_owner)); + + __wt_scr_free(session, &ds->key); + __wt_scr_free(session, &ds->hs_key); + __wt_scr_free(session, &ds->hs_value); __wt_scr_free(session, &ds->t1); __wt_scr_free(session, &ds->t2); @@ -406,6 +406,100 @@ err: } /* + * __debug_hs_cursor -- + * Dump information pointed to by a single history store cursor. + */ +static int +__debug_hs_cursor(WT_DBG *ds, WT_CURSOR *hs_cursor) +{ + WT_SESSION_IMPL *session; + WT_TIME_WINDOW tw; + uint64_t hs_counter, hs_upd_type; + uint32_t hs_btree_id; + char time_string[WT_TIME_STRING_SIZE]; + + session = ds->session; + + WT_TIME_WINDOW_INIT(&tw); + + WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, ds->hs_key, &tw.start_ts, &hs_counter)); + WT_RET(hs_cursor->get_value( + hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type, ds->hs_value)); + + switch (hs_upd_type) { + case WT_UPDATE_MODIFY: + WT_RET(ds->f(ds, + "\t" + "hs-modify: %s\n", + __wt_time_window_to_string(&tw, time_string))); + WT_RET(__debug_modify(ds, ds->hs_value->data, "V")); + break; + case WT_UPDATE_STANDARD: + WT_RET(ds->f(ds, + "\t" + "hs-update: %s\n", + __wt_time_window_to_string(&tw, time_string))); + WT_RET(__debug_item_value(ds, "V", ds->hs_value->data, ds->hs_value->size)); + break; + default: + /* + * Currently, we expect only modifies or full values to be exposed by hs_cursors. This means + * we can ignore other types for now. + */ + WT_ASSERT(session, hs_upd_type == WT_UPDATE_MODIFY || hs_upd_type == WT_UPDATE_STANDARD); + break; + } + return (0); +} + +/* + * __debug_hs_key -- + * Dump any HS records associated with the key. + */ +static int +__debug_hs_key(WT_DBG *ds) +{ + WT_BTREE *btree; + WT_CURSOR *hs_cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + wt_timestamp_t older_start_ts; + uint64_t hs_counter; + uint32_t hs_btree_id; + int cmp, exact; + + session = ds->session; + btree = S2BT(session); + hs_btree_id = btree->id; + + /* + * Open a history store cursor positioned at the end of the data store key (the newest record) + * and iterate backwards until we reach a different key or btree. + */ + hs_cursor = session->hs_cursor; + hs_cursor->set_key(hs_cursor, hs_btree_id, ds->key, WT_TS_MAX, WT_TXN_MAX); + ret = hs_cursor->search_near(hs_cursor, &exact); + + /* If we jumped to the next key, go back to the previous key. */ + if (ret == 0 && exact > 0) + ret = hs_cursor->prev(hs_cursor); + + for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) { + WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, ds->t1, &older_start_ts, &hs_counter)); + + if (hs_btree_id != btree->id) + break; + + WT_RET(__wt_compare(session, NULL, ds->key, ds->t1, &cmp)); + if (cmp != 0) + break; + + WT_RET(__debug_hs_cursor(ds, hs_cursor)); + } + return (ret == WT_NOTFOUND ? 0 : ret); +} + +/* * __debug_cell_int_data -- * Dump a single WT_COL_INT or WT_ROW_INT disk image cell's data in debugging mode. */ @@ -454,7 +548,8 @@ __debug_cell_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_ADDR *unp case WT_CELL_ADDR_INT: case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: - WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string))); + if (!WT_TIME_AGGREGATE_IS_EMPTY(&unpack->ta)) + WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string))); WT_RET(__wt_scr_alloc(session, 128, &buf)); ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf)); @@ -484,81 +579,31 @@ __debug_dsk_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk) } /* - * __debug_cell_kv_data -- - * Dump a single WT_COL_VAR or WT_ROW_LEAF disk image cell's data in debugging mode. + * __debug_cell_kv -- + * Dump a single unpacked WT_COL_VAR or WT_ROW_LEAF disk image WT_CELL. */ static int -__debug_cell_kv_data( +__debug_cell_kv( WT_DBG *ds, WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK_KV *unpack) { - WT_DECL_ITEM(buf); - WT_DECL_RET; WT_SESSION_IMPL *session; + char time_string[WT_TIME_STRING_SIZE]; const char *p; session = ds->session; - /* - * Column-store references to deleted cells return a NULL cell reference. - */ + /* Column-store references to deleted cells return a NULL cell reference. */ if (unpack == NULL) return (__debug_item(ds, tag, "deleted", strlen("deleted"))); - /* - * Row-store references to empty cells return a NULL on-page reference. - */ + /* Row-store references to empty cells return a NULL on-page reference. */ if (unpack->cell == NULL) - return (__debug_item(ds, tag, "", 0)); - - switch (unpack->raw) { - case WT_CELL_DEL: - p = __wt_cell_type_string(unpack->raw); - return (__debug_item(ds, tag, p, strlen(p))); - } - - WT_RET(__wt_scr_alloc(session, 256, &buf)); - WT_ERR(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, buf) : - __wt_page_cell_data_ref(session, page, unpack, buf)); - - switch (unpack->raw) { - case WT_CELL_KEY: - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_PFX: - case WT_CELL_KEY_SHORT: - case WT_CELL_KEY_SHORT_PFX: - WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size)); - break; - case WT_CELL_VALUE: - case WT_CELL_VALUE_COPY: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_SHORT: - WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size)); - break; - } - -err: - __wt_scr_free(session, &buf); - return (ret); -} - -/* - * __debug_cell_kv -- - * Dump a single unpacked WT_COL_VAR or WT_ROW_LEAF disk image WT_CELL. - */ -static int -__debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack) -{ - WT_DECL_ITEM(buf); - WT_DECL_RET; - WT_SESSION_IMPL *session; - char time_string[WT_TIME_STRING_SIZE]; - - session = ds->session; + return (__debug_item(ds, tag, "zero-length", strlen("zero-length"))); WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size)); - /* Dump cell's per-disk page type information. */ - switch (dsk->type) { + /* Dump per-disk page type information. */ + switch (page_type) { case WT_PAGE_COL_VAR: WT_RET(ds->f(ds, ", rle: %" PRIu64, __wt_cell_rle(unpack))); break; @@ -572,7 +617,7 @@ __debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack break; } - /* Dump timestamps. */ + /* Dump time window. */ switch (unpack->raw) { case WT_CELL_DEL: case WT_CELL_VALUE: @@ -580,23 +625,48 @@ __debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack case WT_CELL_VALUE_OVFL: case WT_CELL_VALUE_OVFL_RM: case WT_CELL_VALUE_SHORT: - WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string))); + if (!WT_TIME_WINDOW_IS_EMPTY(&unpack->tw)) + WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string))); break; } - /* Dump overflow addresses. */ + /* Column-store deleted cells. */ + switch (unpack->raw) { + case WT_CELL_DEL: + p = __wt_cell_type_string(unpack->raw); + return (__debug_item(ds, tag, p, strlen(p))); + } + + /* Overflow addresses. */ switch (unpack->raw) { case WT_CELL_KEY_OVFL: case WT_CELL_VALUE_OVFL: - WT_RET(__wt_scr_alloc(session, 128, &buf)); - ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf)); - __wt_scr_free(session, &buf); - WT_RET(ret); + WT_RET(ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, ds->t1))); break; } WT_RET(ds->f(ds, "\n")); - return (__debug_cell_kv_data(ds, NULL, dsk->type, NULL, unpack)); + WT_RET(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, ds->t1) : + __wt_page_cell_data_ref(session, page, unpack, ds->t1)); + + /* Standard key/value cells. */ + switch (unpack->raw) { + case WT_CELL_KEY: + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_PFX: + case WT_CELL_KEY_SHORT: + case WT_CELL_KEY_SHORT_PFX: + WT_RET(__debug_item_key(ds, tag, ds->t1->data, ds->t1->size)); + break; + case WT_CELL_VALUE: + case WT_CELL_VALUE_COPY: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_SHORT: + WT_RET(__debug_item_value(ds, tag, ds->t1->data, ds->t1->size)); + break; + } + + return (0); } /* @@ -609,7 +679,7 @@ __debug_dsk_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk) WT_CELL_UNPACK_KV unpack; WT_CELL_FOREACH_KV (ds->session, dsk, unpack) { - WT_RET(__debug_cell_kv(ds, dsk, &unpack)); + WT_RET(__debug_cell_kv(ds, NULL, dsk->type, NULL, &unpack)); } WT_CELL_FOREACH_END; return (0); @@ -885,6 +955,8 @@ __wt_debug_cursor_page(void *cursor_arg, const char *ofile) cbt = cursor_arg; session = CUR2S(cursor_arg); + WT_RET(__wt_debug_cursor_tree_hs(cursor_arg, "/tmp/ohs")); + WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_debug_page(session, NULL, cbt->ref, ofile)); return (ret); } @@ -915,87 +987,6 @@ __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile) } /* - * __wt_debug_cursor_hs -- - * Dump information pointed to by a single history store cursor. - */ -int -__wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor) -{ - WT_DBG *ds, _ds; - WT_DECL_ITEM(hs_key); - WT_DECL_ITEM(hs_value); - WT_DECL_RET; - WT_TIME_WINDOW tw; - WT_UPDATE *upd; - uint64_t hs_counter, hs_upd_type_full; - uint32_t hs_btree_id; - uint8_t hs_upd_type; - - ds = &_ds; - WT_TIME_WINDOW_INIT(&tw); - - WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); - WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); - WT_ERR(__debug_config(session, ds, NULL)); - - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &tw.start_ts, &hs_counter)); - WT_ERR(hs_cursor->get_value( - hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type_full, hs_value)); - WT_ERR(__debug_time_window(ds, "T", &tw)); - - hs_upd_type = (uint8_t)hs_upd_type_full; - switch (hs_upd_type) { - case WT_UPDATE_MODIFY: - WT_ERR(__wt_upd_alloc(session, hs_value, hs_upd_type, &upd, NULL)); - WT_ERR(__debug_modify(ds, upd, "\tM ")); - break; - case WT_UPDATE_STANDARD: - WT_ERR(__debug_item_value(ds, "V", hs_value->data, hs_value->size)); - break; - default: - /* - * Currently, we expect only modifies or full values to be exposed by hs_cursors. This means - * we can ignore other types for now. - */ - WT_ASSERT(session, hs_upd_type == WT_UPDATE_MODIFY || hs_upd_type == WT_UPDATE_STANDARD); - break; - } - -err: - __wt_scr_free(session, &hs_key); - __wt_scr_free(session, &hs_value); - WT_RET(__debug_wrapup(ds)); - - return (ret); -} - -/* - * __wt_debug_key_value -- - * Dump information about a key and/or value. - */ -int -__wt_debug_key_value( - WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, uint64_t rle, WT_CELL_UNPACK_KV *value) -{ - WT_DBG *ds, _ds; - WT_DECL_RET; - - ds = &_ds; - - WT_ERR(__debug_config(session, ds, NULL)); - - if (key == NULL) - WT_ERR(ds->f(ds, "\tK {%" PRIu64 " %" PRIu64 "}", recno, rle)); - else - WT_ERR(__debug_item_key(ds, "K", key->data, key->size)); - WT_ERR(__debug_time_window(ds, "T", &value->tw)); - WT_ERR(__debug_cell_kv_data(ds, NULL, value != NULL ? value->type : 0, "V", value)); - -err: - return (__debug_wrapup(ds)); -} - -/* * __debug_tree -- * Dump the in-memory information for a tree. */ @@ -1030,6 +1021,17 @@ __debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags) session = ds->session; + /* Set up history store support. */ + if (!WT_IS_HS(S2BT(session))) { + WT_RET(__wt_scr_alloc(session, 100, &ds->key)); + WT_RET(__wt_scr_alloc(session, 0, &ds->hs_key)); + WT_RET(__wt_scr_alloc(session, 0, &ds->hs_value)); + if (session->hs_cursor == NULL) { + WT_RET(__wt_hs_cursor(session, &ds->session_flags, &ds->is_owner)); + ds->hs_is_local = true; + } + } + /* Dump the page metadata. */ WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref)); WT_RET(ret); @@ -1257,12 +1259,15 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) WT_COL *cip; WT_INSERT_HEAD *update; WT_PAGE *page; + WT_SESSION_IMPL *session; uint64_t recno, rle; uint32_t i; + uint8_t *p; char tag[64]; unpack = &_unpack; page = ref->page; + session = ds->session; recno = ref->ref_recno; WT_COL_FOREACH (page, cip, i) { @@ -1270,7 +1275,14 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) __wt_cell_unpack_kv(ds->session, page->dsk, cell, unpack); rle = __wt_cell_rle(unpack); WT_RET(__wt_snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle)); - WT_RET(__debug_cell_kv_data(ds, page, WT_PAGE_COL_VAR, tag, unpack)); + WT_RET(__debug_cell_kv(ds, page, WT_PAGE_COL_VAR, tag, unpack)); + + if (!WT_IS_HS(S2BT(session))) { + p = ds->key->mem; + WT_RET(__wt_vpack_uint(&p, 0, recno)); + ds->key->size = WT_PTRDIFF(p, ds->key->mem); + WT_RET(__debug_hs_key(ds)); + } if ((update = WT_COL_UPDATE(page, cip)) != NULL) WT_RET(__debug_col_skip(ds, update, "update", false)); @@ -1326,8 +1338,6 @@ static int __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page) { WT_CELL_UNPACK_KV *unpack, _unpack; - WT_DECL_ITEM(key); - WT_DECL_RET; WT_INSERT_HEAD *insert; WT_ROW *rip; WT_SESSION_IMPL *session; @@ -1336,32 +1346,31 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page) session = ds->session; unpack = &_unpack; - WT_RET(__wt_scr_alloc(session, 256, &key)); /* * Dump any K/V pairs inserted into the page before the first from-disk key on the page. */ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL) - WT_ERR(__debug_row_skip(ds, insert)); + WT_RET(__debug_row_skip(ds, insert)); /* Dump the page's K/V pairs. */ WT_ROW_FOREACH (page, rip, i) { - WT_ERR(__wt_row_leaf_key(session, page, rip, key, false)); - WT_ERR(__debug_item_key(ds, "K", key->data, key->size)); + WT_RET(__wt_row_leaf_key(session, page, rip, ds->key, false)); + WT_RET(__debug_item_key(ds, "K", ds->key->data, ds->key->size)); __wt_row_leaf_value_cell(session, page, rip, NULL, unpack); - WT_ERR(__debug_cell_kv_data(ds, page, WT_PAGE_ROW_LEAF, "V", unpack)); + WT_RET(__debug_cell_kv(ds, page, WT_PAGE_ROW_LEAF, "V", unpack)); if ((upd = WT_ROW_UPDATE(page, rip)) != NULL) - WT_ERR(__debug_update(ds, upd, false)); + WT_RET(__debug_update(ds, upd, false)); + + if (!WT_IS_HS(S2BT(session))) + WT_RET(__debug_hs_key(ds)); if ((insert = WT_ROW_INSERT(page, rip)) != NULL) - WT_ERR(__debug_row_skip(ds, insert)); + WT_RET(__debug_row_skip(ds, insert)); } - -err: - __wt_scr_free(session, &key); - return (ret); + return (0); } /* @@ -1372,10 +1381,21 @@ static int __debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte) { WT_INSERT *ins; + WT_SESSION_IMPL *session; + uint8_t *p; + + session = ds->session; WT_SKIP_FOREACH (ins, head) { WT_RET(ds->f(ds, "\t%s %" PRIu64 "\n", tag, WT_INSERT_RECNO(ins))); WT_RET(__debug_update(ds, ins->upd, hexbyte)); + + if (!WT_IS_HS(S2BT(session))) { + p = ds->key->mem; + WT_RET(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(ins))); + ds->key->size = WT_PTRDIFF(p, ds->key->mem); + WT_RET(__debug_hs_key(ds)); + } } return (0); } @@ -1388,10 +1408,18 @@ static int __debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head) { WT_INSERT *ins; + WT_SESSION_IMPL *session; + + session = ds->session; WT_SKIP_FOREACH (ins, head) { WT_RET(__debug_item_key(ds, "insert", WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins))); WT_RET(__debug_update(ds, ins->upd, false)); + + if (!WT_IS_HS(S2BT(session))) { + WT_RET(__wt_buf_set(session, ds->key, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins))); + WT_RET(__debug_hs_key(ds)); + } } return (0); } @@ -1401,15 +1429,14 @@ __debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head) * Dump a modify update. */ static int -__debug_modify(WT_DBG *ds, WT_UPDATE *upd, const char *tag) +__debug_modify(WT_DBG *ds, const uint8_t *data, const char *tag) { size_t nentries, data_size, offset, size; const size_t *p; - const uint8_t *data; - p = (size_t *)upd->data; + p = (size_t *)data; memcpy(&nentries, p++, sizeof(size_t)); - data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t)); + data += sizeof(size_t) + (nentries * 3 * sizeof(size_t)); WT_RET(ds->f(ds, "%s%" WT_SIZET_FMT ": ", tag != NULL ? tag : "", nentries)); for (; nentries-- > 0; data += data_size) { @@ -1442,7 +1469,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) break; case WT_UPDATE_MODIFY: WT_RET(ds->f(ds, "\tvalue {modify: ")); - WT_RET(__debug_modify(ds, upd, NULL)); + WT_RET(__debug_modify(ds, upd->data, NULL)); WT_RET(ds->f(ds, "}\n")); break; case WT_UPDATE_RESERVE: @@ -1544,8 +1571,8 @@ __debug_ref(WT_DBG *ds, WT_REF *ref) if (F_ISSET(ref, WT_REF_FLAG_READING)) WT_RET(ds->f(ds, ", %s", "reading")); - if (__wt_ref_addr_copy(session, ref, &addr)) - WT_RET(ds->f(ds, "%s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string), + if (__wt_ref_addr_copy(session, ref, &addr) && !WT_TIME_AGGREGATE_IS_EMPTY(&addr.ta)) + WT_RET(ds->f(ds, ", %s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string), __wt_addr_string(session, addr.addr, addr.size, ds->t1))); return (ds->f(ds, "\n")); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 5b0aee10191..10dca448c28 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -27,7 +27,6 @@ typedef struct { ((vs)->dump_address || (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages) bool dump_address; /* Configure: dump special */ bool dump_blocks; - bool dump_history; bool dump_layout; bool dump_pages; @@ -65,9 +64,6 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs) WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval)); vs->dump_blocks = cval.val != 0; - WT_RET(__wt_config_gets(session, cfg, "dump_history", &cval)); - vs->dump_history = cval.val != 0; - WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval)); vs->dump_layout = cval.val != 0; @@ -84,7 +80,7 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs) } #if !defined(HAVE_DIAGNOSTIC) - if (vs->dump_blocks || vs->dump_pages || vs->dump_history) + if (vs->dump_blocks || vs->dump_pages) WT_RET_MSG(session, ENOTSUP, "the WiredTiger library was not built in diagnostic mode"); #endif @@ -807,12 +803,6 @@ __verify_key_hs( if (cmp != 0) break; -#ifdef HAVE_DIAGNOSTIC - /* Optionally dump historical time windows and values in debug mode. */ - if (vs->dump_history) - WT_RET(__wt_debug_cursor_hs(session, hs_cursor)); -#endif - /* Verify the newer record's start is later than the older record's stop. */ if (newer_start_ts < older_stop_ts) { WT_RET_MSG(session, WT_ERROR, @@ -996,11 +986,6 @@ __verify_page_content_leaf( WT_RET(__wt_row_leaf_key(session, page, rip++, vs->tmp1, false)); WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs)); - -#ifdef HAVE_DIAGNOSTIC - if (vs->dump_history) - WT_RET(__wt_debug_key_value(session, vs->tmp1, WT_RECNO_OOB, 0, &unpack)); -#endif } else if (page->type == WT_PAGE_COL_VAR) { rle = __wt_cell_rle(&unpack); p = vs->tmp1->mem; @@ -1008,10 +993,6 @@ __verify_page_content_leaf( vs->tmp1->size = WT_PTRDIFF(p, vs->tmp1->mem); WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs)); -#ifdef HAVE_DIAGNOSTIC - if (vs->dump_history) - WT_RET(__wt_debug_key_value(session, NULL, recno, rle, &unpack)); -#endif recno += rle; vs->records_so_far += rle; } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index a29bcbe25d0..d433fb02ff7 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -355,8 +355,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = { static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = { {"dump_address", "boolean", NULL, NULL, NULL, 0}, {"dump_blocks", "boolean", NULL, NULL, NULL, 0}, - {"dump_history", "boolean", NULL, NULL, NULL, 0}, {"dump_layout", "boolean", NULL, NULL, NULL, 0}, - {"dump_offsets", "list", NULL, NULL, NULL, 0}, {"dump_pages", "boolean", NULL, NULL, NULL, 0}, + {"dump_layout", "boolean", NULL, NULL, NULL, 0}, {"dump_offsets", "list", NULL, NULL, NULL, 0}, + {"dump_pages", "boolean", NULL, NULL, NULL, 0}, {"stable_timestamp", "boolean", NULL, NULL, NULL, 0}, {"strict", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; @@ -930,10 +930,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", {"WT_SESSION.transaction_sync", "timeout_ms=1200000", confchk_WT_SESSION_transaction_sync, 1}, {"WT_SESSION.truncate", "", NULL, 0}, {"WT_SESSION.upgrade", "", NULL, 0}, {"WT_SESSION.verify", - "dump_address=false,dump_blocks=false,dump_history=false," - "dump_layout=false,dump_offsets=,dump_pages=false," - "stable_timestamp=false,strict=false", - confchk_WT_SESSION_verify, 8}, + "dump_address=false,dump_blocks=false,dump_layout=false," + "dump_offsets=,dump_pages=false,stable_timestamp=false," + "strict=false", + confchk_WT_SESSION_verify, 7}, {"colgroup.meta", "app_metadata=,collator=,columns=,source=,type=file", confchk_colgroup_meta, 5}, {"file.config", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index 15f86d5c69e..b1f5be9ed5e 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -969,7 +969,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma /* * After positioning our cursor, we're stepping backwards to find the correct update. Since the * timestamp is part of the key, our cursor needs to go from the newest record (further in the - * las) to the oldest (earlier in the las) for a given key. + * history store) to the oldest (earlier in the history store) for a given key. */ read_timestamp = allow_prepare ? txn->prepare_timestamp : txn_shared->read_timestamp; WT_ERR_NOTFOUND_OK( diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index a7ee16e9b74..9042f73c20e 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -589,8 +589,6 @@ extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE( (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile) @@ -598,8 +596,6 @@ extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_key_value(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, - uint64_t rle, WT_CELL_UNPACK_KV *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 575e58b3cae..04389f6100a 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -881,7 +881,8 @@ __wt_txn_read_upd_list( * Save the prepared update to help us detect if we race with prepared commit or * rollback. */ - if (prepare_updp != NULL && *prepare_updp == NULL) + if (prepare_updp != NULL && *prepare_updp == NULL && + F_ISSET(upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK)) *prepare_updp = upd; continue; } @@ -920,7 +921,6 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint { WT_TIME_WINDOW tw; WT_UPDATE *prepare_upd; - uint8_t prepare_state; prepare_upd = NULL; @@ -999,12 +999,17 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint cbt->upd_value, false, &cbt->upd_value->buf)); /* - * Retry if we race with prepared commit or rollback as the reader may have read changed history - * store content. + * Retry if we race with prepared commit or rollback. If we race with prepared rollback, the + * value the reader should read may have been removed from the history store and appended to the + * data store. If we race with prepared commit, imagine a case we read with timestamp 50 and we + * have a prepared update with timestamp 30 and a history store record with timestamp 20, + * committing the prepared update will cause the stop timestamp of the history store record + * being updated to 30 and the reader not seeing it. */ if (prepare_upd != NULL) { - WT_ORDERED_READ(prepare_state, prepare_upd->prepare_state); - if (prepare_upd->txnid == WT_TXN_ABORTED || prepare_state == WT_PREPARE_RESOLVED) + WT_ASSERT(session, F_ISSET(prepare_upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK)); + if (prepare_upd->txnid == WT_TXN_ABORTED || + prepare_upd->prepare_state == WT_PREPARE_RESOLVED) return (WT_RESTART); } diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 0d48a9dc525..771bdb1e919 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1711,9 +1711,6 @@ struct __wt_session { * @config{dump_blocks, Display the contents of on-disk blocks as they are verified\, using * the application's message handler\, intended for debugging., a boolean flag; default \c * false.} - * @config{dump_history, Display a key's values along with its time window as they are - * verified against the history store\, using the application's message handler\, intended - * for debugging., a boolean flag; default \c false.} * @config{dump_layout, Display the layout of the files as they are verified\, using the * application's message handler\, intended for debugging; requires optional support from * the block manager., a boolean flag; default \c false.} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 9e9dd627be2..758a47c00d4 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -314,11 +314,11 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v continue; } - /* Ignore prepared updates if it is not eviction. */ + /* Ignore prepared updates if it is checkpoint. */ if (upd->prepare_state == WT_PREPARE_LOCKED || upd->prepare_state == WT_PREPARE_INPROGRESS) { WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid == upd->txnid); - if (!F_ISSET(r, WT_REC_EVICT)) { + if (F_ISSET(r, WT_REC_CHECKPOINT)) { has_newer_updates = true; if (upd->start_ts > max_ts) max_ts = upd->start_ts; @@ -330,8 +330,18 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v if (upd->start_ts < r->min_skipped_ts) r->min_skipped_ts = upd->start_ts; continue; - } else + } else { + /* + * For prepared updates written to the date store in salvage, we write the same + * prepared value to the date store. If there is still content for that key left in + * the history store, rollback to stable will bring it back to the data store. + * Otherwise, it removes the key. + */ + WT_ASSERT(session, F_ISSET(r, WT_REC_EVICT) || + (F_ISSET(r, WT_REC_VISIBILITY_ERR) && + F_ISSET(upd, WT_UPDATE_PREPARE_RESTORED_FROM_DISK))); WT_ASSERT(session, upd->prepare_state == WT_PREPARE_INPROGRESS); + } } /* Track the first update with non-zero timestamp. */ diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index f20bfba99fb..dac020c35f1 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -1503,11 +1503,9 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) * Logged table updates should never be prepared. As these updates are immediately durable, * it is not possible to roll them back if the prepared transaction is rolled back. */ - if (!F_ISSET(op->btree, WT_BTREE_NO_LOGGING) && - (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) || - F_ISSET(S2C(session), WT_CONN_IN_MEMORY))) + if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) && + !F_ISSET(op->btree, WT_BTREE_NO_LOGGING)) WT_RET_MSG(session, EINVAL, "transaction prepare is not supported with logged tables"); - switch (op->type) { case WT_TXN_OP_NONE: break; diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 14d5a06fbb8..36d6a66b785 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -297,7 +297,14 @@ functions: done for t in $(seq ${no_of_procs|1}); do - wait -n || exit $? + ret=0 + wait -n || ret=$? + if [ $ret -ne 0 ]; then + # Skip the below lines from nohup output file because they are very verbose and + # print only the errors to evergreen log file. + grep -v "Finished verifying" nohup.out.* | grep -v "Finished a checkpoint" | grep -v "thread starting" + fi + exit $ret done done |