summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-08-25 14:14:43 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-25 04:27:26 +0000
commitdba4734316f55fddb4fb3fae6cd541a18ad676bc (patch)
tree8181f36427b6f7cd70536959e7b5603e6c71d246
parent4f4adc1cf23281036dc4d9b61eb24c538e1d9863 (diff)
downloadmongo-dba4734316f55fddb4fb3fae6cd541a18ad676bc.tar.gz
Import wiredtiger: af22169ab22adeb7abba4628ae4173bcf6b5b23d from branch mongodb-4.4
ref: d437e51e78..af22169ab2 for: 4.4.2 WT-6578 Prevent reconciliation from looking past the on-disk value
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c17
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h13
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c23
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c19
7 files changed, 67 insertions, 13 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 2f62e46c2e0..d5dcda8618f 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "d437e51e78120ab2999ffe40c0b6d46f3f878126"
+ "commit": "af22169ab22adeb7abba4628ae4173bcf6b5b23d"
}
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 56526130622..9ad0f9aab28 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -1172,7 +1172,7 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa
*/
int
__wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno,
- WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
+ WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, WT_TIME_WINDOW *on_disk_tw)
{
WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -1354,6 +1354,21 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
break;
}
+ /*
+ * If we find a history store record that either corresponds to the on-disk value or is
+ * newer than it then we should use the on-disk value as the base value and apply our
+ * modifies on top of it.
+ */
+ if (on_disk_tw->start_ts < hs_start_ts_tmp ||
+ (on_disk_tw->start_ts == hs_start_ts_tmp &&
+ on_disk_tw->start_txn <= hs_cbt->upd_value->tw.start_txn)) {
+ /* Fallback to the onpage value as the base value. */
+ orig_hs_value_buf = hs_value;
+ hs_value = on_disk_buf;
+ upd_type = WT_UPDATE_STANDARD;
+ break;
+ }
+
WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp,
&upd_type_full, hs_value));
upd_type = (uint8_t)upd_type_full;
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index ea214244c57..325ba9f05c1 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1075,12 +1075,13 @@ struct __wt_update {
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_UPDATE_CLEARED_HS 0x01u /* Update that cleared the history store. */
-#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */
-#define WT_UPDATE_OBSOLETE 0x04u /* Update that is obsolete. */
-#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */
-#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */
-#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */
+#define WT_UPDATE_DS 0x02u /* Update has been written to the data store. */
+#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */
+#define WT_UPDATE_OBSOLETE 0x08u /* Update that is obsolete. */
+#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x10u /* Prepared update restored from data store. */
+#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x20u /* Fast truncate instantiation */
+#define WT_UPDATE_RESTORED_FROM_DS 0x40u /* Update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_HS 0x80u /* Update restored from history store. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 51d7bd996dd..7a37bf638e8 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -769,8 +769,8 @@ extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *curso
extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
- uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf,
+ WT_TIME_WINDOW *on_disk_tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 3cb29f342fe..450c7213e43 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -994,7 +994,7 @@ retry:
/* If there's no visible update in the update chain or ondisk, check the history store file. */
if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS))
WT_RET_NOTFOUND_OK(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno,
- cbt->upd_value, false, &cbt->upd_value->buf));
+ cbt->upd_value, false, &cbt->upd_value->buf, &tw));
/*
* Retry if we race with prepared commit or rollback. If we race with prepared rollback, the
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 5c7ae157bde..2a050333d18 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -590,6 +590,17 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
continue;
}
+ /*
+ * If we've selected an update, it should be flagged as being destined for the data store.
+ *
+ * If not, it's either because we're not doing a history store reconciliation or because the
+ * update is globally visible (in which case, subsequent updates become irrelevant for
+ * reconciliation).
+ */
+ WT_ASSERT(session,
+ F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) ||
+ __wt_txn_tw_start_visible_all(session, &upd_select.tw));
+
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
switch (upd->type) {
@@ -839,6 +850,18 @@ __wt_rec_row_leaf(
r->ovfl_items = true;
}
} else {
+ /*
+ * If we've selected an update, it should be flagged as being destined for the data
+ * store.
+ *
+ * If not, it's either because we're not doing a history store reconciliation or because
+ * the update is globally visible (in which case, subsequent updates become irrelevant
+ * for reconciliation).
+ */
+ WT_ASSERT(session,
+ F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) ||
+ __wt_txn_tw_start_visible_all(session, &upd_select.tw));
+
/* The first time we find an overflow record, discard the underlying blocks. */
if (F_ISSET(vpack, WT_CELL_UNPACK_OVERFLOW) && vpack->raw != WT_CELL_VALUE_OVFL_RM)
WT_ERR(__wt_ovfl_remove(session, page, vpack));
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 523ed514415..8e11c5edad4 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -286,9 +286,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* a concurrent transaction commits or rolls back while we are examining its updates. This
* check is not required for history store updates as they are implicitly committed. As
* prepared transaction IDs are globally visible, need to check the update state as well.
+ *
+ * If an earlier reconciliation chose this update (it is marked as being destined for the
+ * data store), we should select it regardless of visibility if we haven't already selected
+ * one. This is important as it is never ok to shift the on-disk value backwards in the
+ * update chain.
*/
- if (!is_hs_page && (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) :
- !__txn_visible_id(session, txnid))) {
+ if (!F_ISSET(upd, WT_UPDATE_DS) && !is_hs_page &&
+ (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) :
+ !__txn_visible_id(session, txnid))) {
/*
* Rare case: when applications run at low isolation levels, eviction may see a
* committed update followed by uncommitted updates. Give up in that case because we
@@ -521,6 +527,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL :
upd_select->upd,
supd_restore, upd_memsize));
+ /*
+ * Mark the selected update (and potentially the tombstone preceding it) as being destined
+ * for the data store. Subsequent reconciliations should know that they can select this
+ * update regardless of visibility.
+ */
+ if (upd_select->upd != NULL)
+ F_SET(upd_select->upd, WT_UPDATE_DS);
+ if (tombstone != NULL)
+ F_SET(tombstone, WT_UPDATE_DS);
upd_saved = upd_select->upd_saved = true;
}