summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-06-25 14:30:35 +1000
committerLuke Chen <luke.chen@mongodb.com>2020-06-25 15:13:10 +1000
commitdd10c6328d7466a8ec21097094233afb5349d844 (patch)
tree951de3f9e0b1934f91f066f42f6fed15383f4d31
parent77d222a1b6aab0763ef64c9a7712b1aace60bfe8 (diff)
downloadmongo-dd10c6328d7466a8ec21097094233afb5349d844.tar.gz
Import wiredtiger: 5a74e438ea34cc6737438f6c99ce2e5e25519a0e from branch mongodb-4.4
ref: 5e6daf7d42..5a74e438ea for: 4.4.0-rc11 WT-6412 Fix extended stalls being seen during MongoDB performance testing
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c28
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h4
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h6
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i23
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c40
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c16
7 files changed, 87 insertions, 32 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 6ba5f527c47..93efc061937 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "5e6daf7d42727e3d86b2603c20852d1426dee55f"
+ "commit": "5a74e438ea34cc6737438f6c99ce2e5e25519a0e"
}
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index e94270eb37c..3b7c9a69de7 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -1105,14 +1105,14 @@ err:
}
/*
- * __wt_find_hs_upd --
+ * __wt_hs_find_upd --
* Scan the history store for a record the btree cursor wants to position on. Create an update
* for the record and return to the caller. The caller may choose to optionally allow prepared
* updates to be returned regardless of whether prepare is being ignored globally. Otherwise, a
* prepare conflict will be returned upon reading a prepared update.
*/
int
-__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno,
+__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno,
WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
{
WT_CURSOR *hs_cursor;
@@ -1202,6 +1202,13 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
goto done;
/*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_visible_all(
+ session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts))
+ continue;
+ /*
* If the stop time point of a record is visible to us, we won't be able to see anything for
* this entire key. Just jump straight to the end.
*/
@@ -1521,10 +1528,17 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
if (hs_btree_id != btree->id)
break;
+
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0)
break;
-
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_visible_all(
+ session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts))
+ continue;
/*
* If we got here, we've got out-of-order updates in the history store.
*
@@ -1617,6 +1631,14 @@ __hs_delete_key_from_pos(
WT_RET(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0)
break;
+
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_visible_all(
+ session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts))
+ continue;
/*
* Since we're using internal functions to modify the row structure, we need to manually set
* the comparison to an exact match.
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 95e5c8de1fb..dfcc4f7888f 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -503,9 +503,5 @@ struct __wt_cursor_table {
#define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r")
-#define WT_CURSOR_IS_DUMP(cursor) \
- F_ISSET(cursor, \
- (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRETTY | WT_CURSTD_DUMP_PRINT))
-
#define WT_CURSOR_RAW_OK \
(WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRETTY | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW)
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 31e24f65edd..bfb6647c88f 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -724,9 +724,6 @@ extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path
extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path,
const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
- uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags,
uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fsync_background(WT_SESSION_IMPL *session)
@@ -767,6 +764,9 @@ extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
+ uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index b99a54532b0..530b40ef9ed 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -854,10 +854,12 @@ __wt_txn_read_upd_list(
/*
* If the cursor is configured to ignore tombstones, copy the timestamps from the tombstones
* to the stop time window of the update value being returned to the caller. Caller can
- * process the stop time window to decide if there was a tombstone on the update chain.
+ * process the stop time window to decide if there was a tombstone on the update chain. If
+ * the time window already has a stop time set then we must've seen a tombstone prior to
+ * ours in the update list, and therefore don't need to do this again.
*/
if (type == WT_UPDATE_TOMBSTONE && F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) &&
- !__wt_txn_upd_visible_all(session, upd)) {
+ !WT_TIME_WINDOW_HAS_STOP(&cbt->upd_value->tw)) {
cbt->upd_value->tw.durable_stop_ts = upd->durable_ts;
cbt->upd_value->tw.stop_ts = upd->start_ts;
cbt->upd_value->tw.stop_txn = upd->txnid;
@@ -918,7 +920,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
{
WT_TIME_WINDOW tw;
WT_UPDATE *prepare_upd;
-
+ bool have_stop_tw;
prepare_upd = NULL;
WT_RET(__wt_txn_read_upd_list(session, cbt, upd, &prepare_upd));
@@ -933,6 +935,12 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
return (0);
}
+ /*
+ * When we inspected the update list we may have seen a tombstone leaving us with a valid stop
+ * time window, we don't want to overwrite this stop time window.
+ */
+ have_stop_tw = WT_TIME_WINDOW_HAS_STOP(&cbt->upd_value->tw);
+
/* Check the ondisk value. */
if (vpack == NULL) {
WT_TIME_WINDOW_INIT(&tw);
@@ -949,9 +957,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
* return "not found", except scanning the history store during rollback to stable and when we
* are told to ignore non-globally visible tombstones.
*/
- if (__wt_txn_tw_stop_visible(session, &tw) &&
- (!F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) ||
- (__wt_txn_tw_stop_visible_all(session, &tw) && !WT_CURSOR_IS_DUMP(&cbt->iface)))) {
+ if (!have_stop_tw && __wt_txn_tw_stop_visible(session, &tw) &&
+ !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) {
cbt->upd_value->buf.data = NULL;
cbt->upd_value->buf.size = 0;
cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts;
@@ -963,7 +970,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
}
/* Store the stop time pair of the history store record that is returning. */
- if (WT_TIME_WINDOW_HAS_STOP(&tw) && WT_IS_HS(S2BT(session))) {
+ if (!have_stop_tw && WT_TIME_WINDOW_HAS_STOP(&tw) && WT_IS_HS(S2BT(session))) {
cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts;
cbt->upd_value->tw.stop_ts = tw.stop_ts;
cbt->upd_value->tw.stop_txn = tw.stop_txn;
@@ -986,7 +993,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
/* If there's no visible update in the update chain or ondisk, check the history store file. */
if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS))
- WT_RET_NOTFOUND_OK(__wt_find_hs_upd(session, key, cbt->iface.value_format, recno,
+ WT_RET_NOTFOUND_OK(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno,
cbt->upd_value, false, &cbt->upd_value->buf));
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index b63dadbf9b5..571ec8fd6bc 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -654,23 +654,37 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
+ for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
- /* Not found if we cross the tree boundary. */
- if (hs_btree_id != S2BT(session)->id) {
- ret = WT_NOTFOUND;
- goto done;
+ /* Stop before crossing over to the next btree */
+ if (hs_btree_id != S2BT(session)->id) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+
+ /*
+ * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
+ * have crossed over the desired key and not found the record we are looking for.
+ */
+ WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+ if (cmp != 0) {
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (!__wt_txn_visible_all(
+ session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts))
+ break;
}
- /*
- * Keys are sorted in an order, skip the ones before the desired key, and bail out if we have
- * crossed over the desired key and not found the record we are looking for.
- */
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
- if (cmp != 0) {
- ret = WT_NOTFOUND;
+ /* We walked off the top of the history store. */
+ if (ret == WT_NOTFOUND)
goto done;
- }
/*
* As part of the history store search, we never get an exact match based on our search criteria
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index afc3508e577..bb4a96431f0 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -228,6 +228,14 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
break;
/*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_visible_all(
+ session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts))
+ continue;
+
+ /*
* As part of the history store search, we never get an exact match based on our search
* criteria as we always search for a maximum record for that key. Make sure that we set the
* comparison result as an exact match to remove this key as part of rollback to stable. In
@@ -1074,6 +1082,14 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
if (btree_id != hs_btree_id)
break;
+ /*
+ * If the stop time pair on the tombstone in the history store is already globally visible
+ * we can skip it.
+ */
+ if (__wt_txn_visible_all(
+ session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts))
+ continue;
+
/* Set this comparison as exact match of the search for later use. */
cbt->compare = 0;
__wt_verbose(session, WT_VERB_RTS,