diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-08-25 14:14:43 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-08-25 04:27:26 +0000 |
commit | dba4734316f55fddb4fb3fae6cd541a18ad676bc (patch) | |
tree | 8181f36427b6f7cd70536959e7b5603e6c71d246 | |
parent | 4f4adc1cf23281036dc4d9b61eb24c538e1d9863 (diff) | |
download | mongo-dba4734316f55fddb4fb3fae6cd541a18ad676bc.tar.gz |
Import wiredtiger: af22169ab22adeb7abba4628ae4173bcf6b5b23d from branch mongodb-4.4
ref: d437e51e78..af22169ab2
for: 4.4.2
WT-6578 Prevent reconciliation from looking past the on-disk value
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 17 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btmem.h | 13 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.i | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_row.c | 23 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_visibility.c | 19 |
7 files changed, 67 insertions, 13 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 2f62e46c2e0..d5dcda8618f 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "d437e51e78120ab2999ffe40c0b6d46f3f878126" + "commit": "af22169ab22adeb7abba4628ae4173bcf6b5b23d" } diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index 56526130622..9ad0f9aab28 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -1172,7 +1172,7 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa */ int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, - WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) + WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, WT_TIME_WINDOW *on_disk_tw) { WT_CURSOR *hs_cursor; WT_CURSOR_BTREE *hs_cbt; @@ -1354,6 +1354,21 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma break; } + /* + * If we find a history store record that either corresponds to the on-disk value or is + * newer than it then we should use the on-disk value as the base value and apply our + * modifies on top of it. + */ + if (on_disk_tw->start_ts < hs_start_ts_tmp || + (on_disk_tw->start_ts == hs_start_ts_tmp && + on_disk_tw->start_txn <= hs_cbt->upd_value->tw.start_txn)) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp, &upd_type_full, hs_value)); upd_type = (uint8_t)upd_type_full; diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index ea214244c57..325ba9f05c1 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -1075,12 +1075,13 @@ struct __wt_update { /* AUTOMATIC FLAG VALUE GENERATION START */ #define WT_UPDATE_CLEARED_HS 0x01u /* Update that cleared the history store. */ -#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */ -#define WT_UPDATE_OBSOLETE 0x04u /* Update that is obsolete. */ -#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */ -#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */ -#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */ +#define WT_UPDATE_DS 0x02u /* Update has been written to the data store. */ +#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */ +#define WT_UPDATE_OBSOLETE 0x08u /* Update that is obsolete. */ +#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x10u /* Prepared update restored from data store. */ +#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x20u /* Fast truncate instantiation */ +#define WT_UPDATE_RESTORED_FROM_DS 0x40u /* Update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_HS 0x80u /* Update restored from history store. */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint8_t flags; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 51d7bd996dd..7a37bf638e8 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -769,8 +769,8 @@ extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *curso extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, - uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, + WT_TIME_WINDOW *on_disk_tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 3cb29f342fe..450c7213e43 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -994,7 +994,7 @@ retry: /* If there's no visible update in the update chain or ondisk, check the history store file. */ if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS)) WT_RET_NOTFOUND_OK(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno, - cbt->upd_value, false, &cbt->upd_value->buf)); + cbt->upd_value, false, &cbt->upd_value->buf, &tw)); /* * Retry if we race with prepared commit or rollback. If we race with prepared rollback, the diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 5c7ae157bde..2a050333d18 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -590,6 +590,17 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) continue; } + /* + * If we've selected an update, it should be flagged as being destined for the data store. + * + * If not, it's either because we're not doing a history store reconciliation or because the + * update is globally visible (in which case, subsequent updates become irrelevant for + * reconciliation). + */ + WT_ASSERT(session, + F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) || + __wt_txn_tw_start_visible_all(session, &upd_select.tw)); + WT_TIME_WINDOW_COPY(&tw, &upd_select.tw); switch (upd->type) { @@ -839,6 +850,18 @@ __wt_rec_row_leaf( r->ovfl_items = true; } } else { + /* + * If we've selected an update, it should be flagged as being destined for the data + * store. + * + * If not, it's either because we're not doing a history store reconciliation or because + * the update is globally visible (in which case, subsequent updates become irrelevant + * for reconciliation). + */ + WT_ASSERT(session, + F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) || + __wt_txn_tw_start_visible_all(session, &upd_select.tw)); + /* The first time we find an overflow record, discard the underlying blocks. */ if (F_ISSET(vpack, WT_CELL_UNPACK_OVERFLOW) && vpack->raw != WT_CELL_VALUE_OVFL_RM) WT_ERR(__wt_ovfl_remove(session, page, vpack)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 523ed514415..8e11c5edad4 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -286,9 +286,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v * a concurrent transaction commits or rolls back while we are examining its updates. This * check is not required for history store updates as they are implicitly committed. As * prepared transaction IDs are globally visible, need to check the update state as well. + * + * If an earlier reconciliation chose this update (it is marked as being destined for the + * data store), we should select it regardless of visibility if we haven't already selected + * one. This is important as it is never ok to shift the on-disk value backwards in the + * update chain. */ - if (!is_hs_page && (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) : - !__txn_visible_id(session, txnid))) { + if (!F_ISSET(upd, WT_UPDATE_DS) && !is_hs_page && + (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) : + !__txn_visible_id(session, txnid))) { /* * Rare case: when applications run at low isolation levels, eviction may see a * committed update followed by uncommitted updates. Give up in that case because we @@ -521,6 +527,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL : upd_select->upd, supd_restore, upd_memsize)); + /* + * Mark the selected update (and potentially the tombstone preceding it) as being destined + * for the data store. Subsequent reconciliations should know that they can select this + * update regardless of visibility. + */ + if (upd_select->upd != NULL) + F_SET(upd_select->upd, WT_UPDATE_DS); + if (tombstone != NULL) + F_SET(tombstone, WT_UPDATE_DS); upd_saved = upd_select->upd_saved = true; } |