diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-06-25 14:30:35 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2020-06-25 15:13:10 +1000 |
commit | dd10c6328d7466a8ec21097094233afb5349d844 (patch) | |
tree | 951de3f9e0b1934f91f066f42f6fed15383f4d31 | |
parent | 77d222a1b6aab0763ef64c9a7712b1aace60bfe8 (diff) | |
download | mongo-dd10c6328d7466a8ec21097094233afb5349d844.tar.gz |
Import wiredtiger: 5a74e438ea34cc6737438f6c99ce2e5e25519a0e from branch mongodb-4.4
ref: 5e6daf7d42..5a74e438ea
for: 4.4.0-rc11
WT-6412 Fix extended stalls being seen during MongoDB performance testing
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 28 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/cursor.h | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.i | 23 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 40 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c | 16 |
7 files changed, 87 insertions, 32 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 6ba5f527c47..93efc061937 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "5e6daf7d42727e3d86b2603c20852d1426dee55f" + "commit": "5a74e438ea34cc6737438f6c99ce2e5e25519a0e" } diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index e94270eb37c..3b7c9a69de7 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -1105,14 +1105,14 @@ err: } /* - * __wt_find_hs_upd -- + * __wt_hs_find_upd -- * Scan the history store for a record the btree cursor wants to position on. Create an update * for the record and return to the caller. The caller may choose to optionally allow prepared * updates to be returned regardless of whether prepare is being ignored globally. Otherwise, a * prepare conflict will be returned upon reading a prepared update. */ int -__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, +__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) { WT_CURSOR *hs_cursor; @@ -1202,6 +1202,13 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma goto done; /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_visible_all( + session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) + continue; + /* * If the stop time point of a record is visible to us, we won't be able to see anything for * this entire key. Just jump straight to the end. */ @@ -1521,10 +1528,17 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter)); if (hs_btree_id != btree->id) break; + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); if (cmp != 0) break; - + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_visible_all( + session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) + continue; /* * If we got here, we've got out-of-order updates in the history store. * @@ -1617,6 +1631,14 @@ __hs_delete_key_from_pos( WT_RET(__wt_compare(session, NULL, &hs_key, key, &cmp)); if (cmp != 0) break; + + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_visible_all( + session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) + continue; /* * Since we're using internal functions to modify the row structure, we need to manually set * the comparison to an exact match. diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 95e5c8de1fb..dfcc4f7888f 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -503,9 +503,5 @@ struct __wt_cursor_table { #define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") -#define WT_CURSOR_IS_DUMP(cursor) \ - F_ISSET(cursor, \ - (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRETTY | WT_CURSTD_DUMP_PRINT)) - #define WT_CURSOR_RAW_OK \ (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRETTY | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 31e24f65edd..bfb6647c88f 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -724,9 +724,6 @@ extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path, const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, - uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_fsync_background(WT_SESSION_IMPL *session) @@ -767,6 +764,9 @@ extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, + uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index b99a54532b0..530b40ef9ed 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -854,10 +854,12 @@ __wt_txn_read_upd_list( /* * If the cursor is configured to ignore tombstones, copy the timestamps from the tombstones * to the stop time window of the update value being returned to the caller. Caller can - * process the stop time window to decide if there was a tombstone on the update chain. + * process the stop time window to decide if there was a tombstone on the update chain. If + * the time window already has a stop time set then we must've seen a tombstone prior to + * ours in the update list, and therefore don't need to do this again. */ if (type == WT_UPDATE_TOMBSTONE && F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) && - !__wt_txn_upd_visible_all(session, upd)) { + !WT_TIME_WINDOW_HAS_STOP(&cbt->upd_value->tw)) { cbt->upd_value->tw.durable_stop_ts = upd->durable_ts; cbt->upd_value->tw.stop_ts = upd->start_ts; cbt->upd_value->tw.stop_txn = upd->txnid; @@ -918,7 +920,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint { WT_TIME_WINDOW tw; WT_UPDATE *prepare_upd; - + bool have_stop_tw; prepare_upd = NULL; WT_RET(__wt_txn_read_upd_list(session, cbt, upd, &prepare_upd)); @@ -933,6 +935,12 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint return (0); } + /* + * When we inspected the update list we may have seen a tombstone leaving us with a valid stop + * time window, we don't want to overwrite this stop time window. + */ + have_stop_tw = WT_TIME_WINDOW_HAS_STOP(&cbt->upd_value->tw); + /* Check the ondisk value. */ if (vpack == NULL) { WT_TIME_WINDOW_INIT(&tw); @@ -949,9 +957,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint * return "not found", except scanning the history store during rollback to stable and when we * are told to ignore non-globally visible tombstones. */ - if (__wt_txn_tw_stop_visible(session, &tw) && - (!F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) || - (__wt_txn_tw_stop_visible_all(session, &tw) && !WT_CURSOR_IS_DUMP(&cbt->iface)))) { + if (!have_stop_tw && __wt_txn_tw_stop_visible(session, &tw) && + !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) { cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts; @@ -963,7 +970,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint } /* Store the stop time pair of the history store record that is returning. */ - if (WT_TIME_WINDOW_HAS_STOP(&tw) && WT_IS_HS(S2BT(session))) { + if (!have_stop_tw && WT_TIME_WINDOW_HAS_STOP(&tw) && WT_IS_HS(S2BT(session))) { cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts; cbt->upd_value->tw.stop_ts = tw.stop_ts; cbt->upd_value->tw.stop_txn = tw.stop_txn; @@ -986,7 +993,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint /* If there's no visible update in the update chain or ondisk, check the history store file. */ if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS)) - WT_RET_NOTFOUND_OK(__wt_find_hs_upd(session, key, cbt->iface.value_format, recno, + WT_RET_NOTFOUND_OK(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno, cbt->upd_value, false, &cbt->upd_value->buf)); /* diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index b63dadbf9b5..571ec8fd6bc 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -654,23 +654,37 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); + for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) { + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); - /* Not found if we cross the tree boundary. */ - if (hs_btree_id != S2BT(session)->id) { - ret = WT_NOTFOUND; - goto done; + /* Stop before crossing over to the next btree */ + if (hs_btree_id != S2BT(session)->id) { + ret = WT_NOTFOUND; + goto done; + } + + /* + * Keys are sorted in an order, skip the ones before the desired key, and bail out if we + * have crossed over the desired key and not found the record we are looking for. + */ + WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp)); + if (cmp != 0) { + ret = WT_NOTFOUND; + goto done; + } + + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (!__wt_txn_visible_all( + session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) + break; } - /* - * Keys are sorted in an order, skip the ones before the desired key, and bail out if we have - * crossed over the desired key and not found the record we are looking for. - */ - WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp)); - if (cmp != 0) { - ret = WT_NOTFOUND; + /* We walked off the top of the history store. */ + if (ret == WT_NOTFOUND) goto done; - } /* * As part of the history store search, we never get an exact match based on our search criteria diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index afc3508e577..bb4a96431f0 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -228,6 +228,14 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW break; /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_visible_all( + session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) + continue; + + /* * As part of the history store search, we never get an exact match based on our search * criteria as we always search for a maximum record for that key. Make sure that we set the * comparison result as an exact match to remove this key as part of rollback to stable. In @@ -1074,6 +1082,14 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ if (btree_id != hs_btree_id) break; + /* + * If the stop time pair on the tombstone in the history store is already globally visible + * we can skip it. + */ + if (__wt_txn_visible_all( + session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) + continue; + /* Set this comparison as exact match of the search for later use. */ cbt->compare = 0; __wt_verbose(session, WT_VERB_RTS, |