From a28e071d22e8056781b03b1243c3961b054801b6 Mon Sep 17 00:00:00 2001 From: Luke Chen Date: Fri, 21 Aug 2020 18:15:07 +1000 Subject: Import wiredtiger: 850b579d4b526de7f296cb617999ccc20adb4aff from branch mongodb-4.6 ref: 4cacb06e31..850b579d4b for: 4.5.1 WT-5693 Enable test_wt4105_large_doc_small_upd WT-6390 Extend compact02 timeout from 8 => 10 minutes WT-6569 Squash the prepared updates into a single update before writing it to data store WT-6578 Prevent reconciliation from looking past the on-disk value WT-6602 Allow operation timeout ms to be passed to commit and rollback --- src/third_party/wiredtiger/dist/api_data.py | 27 ++- src/third_party/wiredtiger/import.data | 2 +- src/third_party/wiredtiger/src/btree/bt_page.c | 14 ++ src/third_party/wiredtiger/src/config/config_def.c | 11 +- src/third_party/wiredtiger/src/history/hs.c | 17 +- src/third_party/wiredtiger/src/include/btmem.h | 13 +- src/third_party/wiredtiger/src/include/extern.h | 4 +- src/third_party/wiredtiger/src/include/txn.i | 5 +- .../wiredtiger/src/include/wiredtiger.in | 27 ++- src/third_party/wiredtiger/src/reconcile/rec_row.c | 23 +++ .../wiredtiger/src/reconcile/rec_visibility.c | 65 +++++- src/third_party/wiredtiger/src/txn/txn.c | 42 +++- src/third_party/wiredtiger/test/csuite/Makefile.am | 3 +- .../test/csuite/wt4105_large_doc_small_upd/main.c | 2 +- src/third_party/wiredtiger/test/evergreen.yml | 2 + .../wiredtiger/test/suite/test_compact02.py | 2 +- .../wiredtiger/test/suite/test_prepare08.py | 219 +++++++++++++++++++-- .../wiredtiger/test/suite/test_prepare10.py | 2 +- .../test/suite/test_rollback_to_stable10.py | 2 +- .../wiredtiger/test/suite/test_txn21.py | 10 + 20 files changed, 428 insertions(+), 64 deletions(-) (limited to 'src/third_party') diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index faf364196e4..17bbba20e65 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -1450,11 +1450,11 @@ methods = { Config('name', '', r''' name of the transaction for tracing and debugging'''), Config('operation_timeout_ms', '0', r''' - when non-zero, a requested limit on the number of elapsed real time milliseconds taken - to complete database operations in this transaction. Time is measured from the start - of each WiredTiger API call. There is no guarantee any operation will not take longer - than this amount of time. If WiredTiger notices the limit has been exceeded, an operation - may return a WT_ROLLBACK error. Default is to have no limit''', + when non-zero, a requested limit on the time taken to complete operations in this + transaction. Time is measured in real time milliseconds from the start of each WiredTiger + API call. There is no guarantee any operation will not take longer than this amount of time. + If WiredTiger notices the limit has been exceeded, an operation may return a WT_ROLLBACK + error. Default is to have no limit''', min=1), Config('priority', 0, r''' priority of the transaction for resolving conflicts. @@ -1501,6 +1501,13 @@ methods = { current transaction. The value must also not be older than the current stable timestamp. See @ref transaction_timestamps'''), + Config('operation_timeout_ms', '0', r''' + when non-zero, a requested limit on the time taken to complete operations in this + transaction. Time is measured in real time milliseconds from the start of each WiredTiger + API call. There is no guarantee any operation will not take longer than this amount of time. + If WiredTiger notices the limit has been exceeded, an operation may return a WT_ROLLBACK + error. Default is to have no limit''', + min=1), Config('sync', '', r''' override whether to sync log records when the transaction commits, inherited from ::wiredtiger_open \c transaction_sync. @@ -1542,7 +1549,15 @@ methods = { for a transaction. See @ref transaction_timestamps'''), ]), -'WT_SESSION.rollback_transaction' : Method([]), +'WT_SESSION.rollback_transaction' : Method([ + Config('operation_timeout_ms', '0', r''' + when non-zero, a requested limit on the time taken to complete operations in this + transaction. Time is measured in real time milliseconds from the start of each WiredTiger + API call. There is no guarantee any operation will not take longer than this amount of time. + If WiredTiger notices the limit has been exceeded, an operation may return a WT_ROLLBACK + error. Default is to have no limit''', + min=1), +]), 'WT_SESSION.checkpoint' : Method([ Config('drop', '', r''' diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 7eab1388a35..da0e52c702a 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.6", - "commit": "4cacb06e31b67cd17e0ecdcc366c3e9466e5ac3a" + "commit": "850b579d4b526de7f296cb617999ccc20adb4aff" } diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index b2b83eadaad..5a9cc9cea41 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -633,6 +633,20 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) tombstone->prepare_state = WT_PREPARE_INPROGRESS; F_SET(tombstone, WT_UPDATE_PREPARE_RESTORED_FROM_DS); F_SET(upd, WT_UPDATE_RESTORED_FROM_DS); + + /* + * Mark the update also as in-progress if the update and tombstone are from same + * transaction by comparing both the transaction and timestamps as the transaction + * information gets lost after restart. + */ + if (unpack.tw.start_ts == unpack.tw.stop_ts && + unpack.tw.durable_start_ts == unpack.tw.durable_stop_ts && + unpack.tw.start_txn == unpack.tw.stop_txn) { + upd->durable_ts = WT_TS_NONE; + upd->prepare_state = WT_PREPARE_INPROGRESS; + F_SET(upd, WT_UPDATE_PREPARE_RESTORED_FROM_DS); + } + tombstone->next = upd; } else { upd->durable_ts = WT_TS_NONE; diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 9705cbb618d..b6061c98c11 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -215,6 +215,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_checkpoint[] = { static const WT_CONFIG_CHECK confchk_WT_SESSION_commit_transaction[] = { {"commit_timestamp", "string", NULL, NULL, NULL, 0}, {"durable_timestamp", "string", NULL, NULL, NULL, 0}, + {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0}, {"sync", "string", NULL, "choices=[\"background\",\"off\",\"on\"]", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; @@ -345,6 +346,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_reconfigure[] = { NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; +static const WT_CONFIG_CHECK confchk_WT_SESSION_rollback_transaction[] = { + {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; + static const WT_CONFIG_CHECK confchk_WT_SESSION_salvage[] = { {"force", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; @@ -887,7 +891,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", confchk_WT_SESSION_checkpoint, 5}, {"WT_SESSION.close", "", NULL, 0}, {"WT_SESSION.commit_transaction", - "commit_timestamp=,durable_timestamp=,sync=", confchk_WT_SESSION_commit_transaction, 3}, + "commit_timestamp=,durable_timestamp=,operation_timeout_ms=0," + "sync=", + confchk_WT_SESSION_commit_transaction, 4}, {"WT_SESSION.compact", "timeout=1200", confchk_WT_SESSION_compact, 1}, {"WT_SESSION.create", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," @@ -939,7 +945,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "isolation=read-committed", confchk_WT_SESSION_reconfigure, 3}, {"WT_SESSION.rename", "", NULL, 0}, {"WT_SESSION.reset", "", NULL, 0}, - {"WT_SESSION.rollback_transaction", "", NULL, 0}, + {"WT_SESSION.rollback_transaction", "operation_timeout_ms=0", + confchk_WT_SESSION_rollback_transaction, 1}, {"WT_SESSION.salvage", "force=false", confchk_WT_SESSION_salvage, 1}, {"WT_SESSION.strerror", "", NULL, 0}, {"WT_SESSION.timestamp_transaction", diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index a4380ee13c9..bc941a51cf0 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -1160,7 +1160,7 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa */ int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, - WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) + WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, WT_TIME_WINDOW *on_disk_tw) { WT_CURSOR *hs_cursor; WT_CURSOR_BTREE *hs_cbt; @@ -1341,6 +1341,21 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma break; } + /* + * If we find a history store record that either corresponds to the on-disk value or is + * newer than it then we should use the on-disk value as the base value and apply our + * modifies on top of it. + */ + if (on_disk_tw->start_ts < hs_start_ts_tmp || + (on_disk_tw->start_ts == hs_start_ts_tmp && + on_disk_tw->start_txn <= hs_cbt->upd_value->tw.start_txn)) { + /* Fallback to the onpage value as the base value. */ + orig_hs_value_buf = hs_value; + hs_value = on_disk_buf; + upd_type = WT_UPDATE_STANDARD; + break; + } + WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_durable_ts_tmp, &durable_timestamp_tmp, &upd_type_full, hs_value)); upd_type = (uint8_t)upd_type_full; diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index a96d929cb7a..baa8fbbdb50 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -1075,12 +1075,13 @@ struct __wt_update { /* AUTOMATIC FLAG VALUE GENERATION START */ #define WT_UPDATE_CLEARED_HS 0x01u /* Update that cleared the history store. */ -#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */ -#define WT_UPDATE_OBSOLETE 0x04u /* Update that is obsolete. */ -#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */ -#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */ -#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */ +#define WT_UPDATE_DS 0x02u /* Update has been written to the data store. */ +#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */ +#define WT_UPDATE_OBSOLETE 0x08u /* Update that is obsolete. */ +#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x10u /* Prepared update restored from data store. */ +#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x20u /* Fast truncate instantiation */ +#define WT_UPDATE_RESTORED_FROM_DS 0x40u /* Update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_HS 0x80u /* Update restored from history store. */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint8_t flags; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index b56dba415d5..c8b4427f972 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -771,8 +771,8 @@ extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *curso extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, - uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf, + WT_TIME_WINDOW *on_disk_tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index b1fc99d8ea0..0d98eb33759 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -993,7 +993,7 @@ retry: /* If there's no visible update in the update chain or ondisk, check the history store file. */ if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS)) WT_RET_NOTFOUND_OK(__wt_hs_find_upd(session, key, cbt->iface.value_format, recno, - cbt->upd_value, false, &cbt->upd_value->buf)); + cbt->upd_value, false, &cbt->upd_value->buf, &tw)); /* * Retry if we race with prepared commit or rollback. If we race with prepared rollback, the @@ -1045,8 +1045,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !F_ISSET(txn, WT_TXN_RUNNING)); - if (cfg != NULL) - WT_RET(__wt_txn_config(session, cfg)); + WT_RET(__wt_txn_config(session, cfg)); /* Allocate a snapshot if required. */ if (txn->isolation == WT_ISO_SNAPSHOT) { diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 977ad90cdc9..14bdfeb4af0 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1766,12 +1766,12 @@ struct __wt_session { * \c "read-committed"\, \c "snapshot"; default empty.} * @config{name, name of the transaction for tracing and debugging., a string; default * empty.} - * @config{operation_timeout_ms, when non-zero\, a requested limit on the number of elapsed - * real time milliseconds taken to complete database operations in this transaction. Time - * is measured from the start of each WiredTiger API call. There is no guarantee any - * operation will not take longer than this amount of time. If WiredTiger notices the limit - * has been exceeded\, an operation may return a WT_ROLLBACK error. Default is to have no - * limit., an integer greater than or equal to 1; default \c 0.} + * @config{operation_timeout_ms, when non-zero\, a requested limit on the time taken to + * complete operations in this transaction. Time is measured in real time milliseconds from + * the start of each WiredTiger API call. There is no guarantee any operation will not take + * longer than this amount of time. If WiredTiger notices the limit has been exceeded\, an + * operation may return a WT_ROLLBACK error. Default is to have no limit., an integer + * greater than or equal to 1; default \c 0.} * @config{priority, priority of the transaction for resolving conflicts. Transactions with * higher values are less likely to abort., an integer between -100 and 100; default \c 0.} * @config{read_timestamp, read using the specified timestamp. The supplied value must not @@ -1820,6 +1820,12 @@ struct __wt_session { * supplied value must not be older than the commit timestamp set for the current * transaction. The value must also not be older than the current stable timestamp. See * @ref transaction_timestamps., a string; default empty.} + * @config{operation_timeout_ms, when non-zero\, a requested limit on the time taken to + * complete operations in this transaction. Time is measured in real time milliseconds from + * the start of each WiredTiger API call. There is no guarantee any operation will not take + * longer than this amount of time. If WiredTiger notices the limit has been exceeded\, an + * operation may return a WT_ROLLBACK error. Default is to have no limit., an integer + * greater than or equal to 1; default \c 0.} * @config{sync, override whether to sync log records when the transaction commits\, * inherited from ::wiredtiger_open \c transaction_sync. The \c background setting * initiates a background synchronization intended to be used with a later call to @@ -1869,7 +1875,14 @@ struct __wt_session { * @snippet ex_all.c transaction commit/rollback * * @param session the session handle - * @configempty{WT_SESSION.rollback_transaction, see dist/api_data.py} + * @configstart{WT_SESSION.rollback_transaction, see dist/api_data.py} + * @config{operation_timeout_ms, when non-zero\, a requested limit on the time taken to + * complete operations in this transaction. Time is measured in real time milliseconds from + * the start of each WiredTiger API call. There is no guarantee any operation will not take + * longer than this amount of time. If WiredTiger notices the limit has been exceeded\, an + * operation may return a WT_ROLLBACK error. Default is to have no limit., an integer + * greater than or equal to 1; default \c 0.} + * @configend * @errors */ int __F(rollback_transaction)(WT_SESSION *session, const char *config); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 843d5464218..1e185469c99 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -590,6 +590,17 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) continue; } + /* + * If we've selected an update, it should be flagged as being destined for the data store. + * + * If not, it's either because we're not doing a history store reconciliation or because the + * update is globally visible (in which case, subsequent updates become irrelevant for + * reconciliation). + */ + WT_ASSERT(session, + F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) || + __wt_txn_tw_start_visible_all(session, &upd_select.tw)); + WT_TIME_WINDOW_COPY(&tw, &upd_select.tw); switch (upd->type) { @@ -839,6 +850,18 @@ __wt_rec_row_leaf( r->ovfl_items = true; } } else { + /* + * If we've selected an update, it should be flagged as being destined for the data + * store. + * + * If not, it's either because we're not doing a history store reconciliation or because + * the update is globally visible (in which case, subsequent updates become irrelevant + * for reconciliation). + */ + WT_ASSERT(session, + F_ISSET(upd, WT_UPDATE_DS) || !F_ISSET(r, WT_REC_HS) || + __wt_txn_tw_start_visible_all(session, &upd_select.tw)); + /* The first time we find an overflow record, discard the underlying blocks. */ if (F_ISSET(vpack, WT_CELL_UNPACK_OVERFLOW) && vpack->raw != WT_CELL_VALUE_OVFL_RM) WT_ERR(__wt_ovfl_remove(session, page, vpack)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index e52608ef767..db52be2f55b 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -232,7 +232,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v WT_DECL_RET; WT_PAGE *page; WT_TIME_WINDOW *select_tw; - WT_UPDATE *first_txn_upd, *first_upd, *upd, *last_upd, *tombstone; + WT_UPDATE *first_txn_upd, *first_upd, *upd, *last_upd, *same_txn_valid_upd, *tombstone; wt_timestamp_t max_ts; size_t upd_memsize; uint64_t max_txn, txnid; @@ -249,7 +249,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v WT_TIME_WINDOW_INIT(select_tw); page = r->page; - first_txn_upd = upd = last_upd = tombstone = NULL; + first_txn_upd = upd = last_upd = same_txn_valid_upd = tombstone = NULL; upd_memsize = 0; max_ts = WT_TS_NONE; max_txn = WT_TXN_NONE; @@ -287,8 +287,13 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v * a concurrent transaction commits or rolls back while we are examining its updates. This * check is not required for history store updates as they are implicitly committed. As * prepared transaction IDs are globally visible, need to check the update state as well. + * + * If an earlier reconciliation chose this update (it is marked as being destined for the + * data store), we should select it regardless of visibility if we haven't already selected + * one. This is important as it is never ok to shift the on-disk value backwards in the + * update chain. */ - if (!is_hs_page && + if (!F_ISSET(upd, WT_UPDATE_DS) && !is_hs_page && (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) : !__txn_visible_id(session, txnid))) { /* @@ -417,12 +422,53 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v /* Find the update this tombstone applies to. */ if (!__wt_txn_upd_visible_all(session, upd)) { - while (upd->next != NULL && upd->next->txnid == WT_TXN_ABORTED) - upd = upd->next; + /* + * Loop until a valid update from a different transaction is found in the update + * list. + */ + while (upd->next != NULL) { + if (upd->next->txnid == WT_TXN_ABORTED) + upd = upd->next; + else if (upd->next->txnid != WT_TXN_NONE && + tombstone->txnid == upd->next->txnid) { + upd = upd->next; + /* Save the latest update from the same transaction. */ + if (same_txn_valid_upd == NULL) + same_txn_valid_upd = upd; + } else + break; + } + WT_ASSERT(session, upd->next == NULL || upd->next->txnid != WT_TXN_ABORTED); if (upd->next == NULL) last_upd = upd; upd_select->upd = upd = upd->next; + + /* + * If there is no on-disk update and any valid update from a different transaction + * is not found in the update list, write the same transaction update itself to disk + * to avoid blocking the eviction. + */ + if (vpack == NULL && upd == NULL) + upd_select->upd = upd = same_txn_valid_upd; + else if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) { + /* + * The selected update from a different transaction is also a tombstone, use the + * update from the same transaction as the selected update. + */ + WT_ASSERT(session, + same_txn_valid_upd != NULL && + same_txn_valid_upd->type != WT_UPDATE_TOMBSTONE); + upd_select->upd = upd = same_txn_valid_upd; + } else if (same_txn_valid_upd != NULL && vpack != NULL && + WT_TIME_WINDOW_HAS_STOP(&vpack->tw)) { + /* + * The on-disk version has a valid stop timestamp, use the update from the same + * transaction as the selected update. + */ + WT_ASSERT(session, same_txn_valid_upd->type != WT_UPDATE_TOMBSTONE); + upd_select->upd = upd = same_txn_valid_upd; + } } } if (upd != NULL) @@ -539,6 +585,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v upd_select->upd != NULL && upd_select->upd->type == WT_UPDATE_TOMBSTONE ? NULL : upd_select->upd, supd_restore, upd_memsize)); + /* + * Mark the selected update (and potentially the tombstone preceding it) as being destined + * for the data store. Subsequent reconciliations should know that they can select this + * update regardless of visibility. + */ + if (upd_select->upd != NULL) + F_SET(upd_select->upd, WT_UPDATE_DS); + if (tombstone != NULL) + F_SET(tombstone, WT_UPDATE_DS); upd_saved = upd_select->upd_saved = true; } diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index ffdf33b4fa5..6f830b93691 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -454,6 +454,33 @@ done: return (ret); } +/* + * __txn_config_operation_timeout -- + * Configure a transactions operation timeout duration. + */ +static int +__txn_config_operation_timeout(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONFIG_ITEM cval; + WT_TXN *txn; + + txn = session->txn; + + if (cfg == NULL) + return (0); + + /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */ + WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval)); + + /* + * The default configuration value is 0, we can't tell if they're setting it back to 0 or, if + * the default was automatically passed in. + */ + if (cval.val != 0) + txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND); + return (0); +} + /* * __wt_txn_config -- * Configure a transaction. @@ -467,6 +494,9 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) txn = session->txn; + if (cfg == NULL) + return (0); + WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? @@ -474,9 +504,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; - /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */ - WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval)); - txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND); + WT_RET(__txn_config_operation_timeout(session, cfg)); /* * The default sync setting is inherited from the connection, but can be overridden by an @@ -1251,6 +1279,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); + /* Configure the timeout for this commit operation. */ + WT_ERR(__txn_config_operation_timeout(session, cfg)); + /* * Clear the prepared round up flag if the transaction is not prepared. There is no rounding up * to do in that case. @@ -1647,8 +1678,6 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) u_int i; bool prepare, readonly; - WT_UNUSED(cfg); - cursor = NULL; txn = session->txn; prepare = F_ISSET(txn, WT_TXN_PREPARE); @@ -1660,6 +1689,9 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) if (txn->notify != NULL) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0)); + /* Configure the timeout for this rollback operation. */ + WT_RET(__txn_config_operation_timeout(session, cfg)); + /* * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each * page within each file are done at the same time. diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am index cf58cecc8a6..553738fd418 100644 --- a/src/third_party/wiredtiger/test/csuite/Makefile.am +++ b/src/third_party/wiredtiger/test/csuite/Makefile.am @@ -121,8 +121,7 @@ all_TESTS += test_wt3874_pad_byte_collator test_wt4105_large_doc_small_upd_SOURCES = wt4105_large_doc_small_upd/main.c noinst_PROGRAMS += test_wt4105_large_doc_small_upd -# Temporarily disabled (WT-5579) -# all_TESTS += test_wt4105_large_doc_small_upd +all_TESTS += test_wt4105_large_doc_small_upd test_wt4117_checksum_SOURCES = wt4117_checksum/main.c noinst_PROGRAMS += test_wt4117_checksum diff --git a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c index b56a638c6ed..c831180c130 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c @@ -130,7 +130,7 @@ main(int argc, char *argv[]) modify_entry.offset = offset; modify_entry.size = modify_entry.data.size; /* FIXME-WT-6113: extend timeout to pass the test */ - (void)alarm(7); + (void)alarm(15); testutil_check(c->modify(c, &modify_entry, 1)); (void)alarm(0); testutil_check(session2->commit_transaction(session2, NULL)); diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index c6cc86249da..8d8893f4bad 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -556,6 +556,7 @@ tasks: posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static - func: "make check all" vars: + smp_command: -j 1 test_env_vars: MSAN_OPTIONS=abort_on_error=1:disable_coredump=0 MSAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer TESTUTIL_SLOW_MACHINE=1 - name: make-check-asan-test @@ -571,6 +572,7 @@ tasks: posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static - func: "make check all" vars: + smp_command: -j 1 test_env_vars: ASAN_OPTIONS=detect_leaks=1:abort_on_error=1:disable_coredump=0 ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer TESTUTIL_SLOW_MACHINE=1 - name: make-check-linux-no-ftruncate-test diff --git a/src/third_party/wiredtiger/test/suite/test_compact02.py b/src/third_party/wiredtiger/test/suite/test_compact02.py index e466aa81a95..0b68b695e38 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact02.py +++ b/src/third_party/wiredtiger/test/suite/test_compact02.py @@ -149,7 +149,7 @@ class test_compact02(wttest.WiredTigerTestCase): # Compact can collide with eviction, if that happens we retry. Wait for # a long time, the check for EBUSY means we're not retrying on any real # errors. - for i in range(1, 80): + for i in range(1, 100): if not self.raisesBusy( lambda: self.session.compact(self.uri, None)): break diff --git a/src/third_party/wiredtiger/test/suite/test_prepare08.py b/src/third_party/wiredtiger/test/suite/test_prepare08.py index 87e9997b110..9b2ac655ede 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare08.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare08.py @@ -34,12 +34,12 @@ from wtdataset import SimpleDataSet def timestamp_str(t): return '%x' % t -# test_prepare07.py -# Test to ensure prepared tombstones are properly aborted even when they are written +# test_prepare08.py +# Test to ensure prepared tombstones are properly aborted/committed even when they are written # to the data store. class test_prepare08(wttest.WiredTigerTestCase): # Force a small cache. - conn_config = 'cache_size=10MB,eviction_dirty_trigger=80,eviction_updates_trigger=80' + conn_config = 'cache_size=5MB,eviction_dirty_trigger=80,eviction_updates_trigger=80' def updates(self, ds, uri, nrows, value, ts): cursor = self.session.open_cursor(uri) @@ -47,7 +47,16 @@ class test_prepare08(wttest.WiredTigerTestCase): for i in range(1, nrows): cursor.set_key(ds.key(i)) cursor.set_value(value) - self.assertEquals(cursor.insert(), 0) + self.assertEquals(cursor.update(), 0) + self.session.commit_transaction('commit_timestamp=' + timestamp_str(ts)) + cursor.close() + + def removes(self, ds, uri, nrows, ts): + cursor = self.session.open_cursor(uri) + self.session.begin_transaction('isolation=snapshot') + for i in range(1, nrows): + cursor.set_key(ds.key(i)) + self.assertEquals(cursor.remove(), 0) self.session.commit_transaction('commit_timestamp=' + timestamp_str(ts)) cursor.close() @@ -56,50 +65,220 @@ class test_prepare08(wttest.WiredTigerTestCase): self.session.begin_transaction('ignore_prepare=true,read_timestamp=' + timestamp_str(ts)) for i in range(1, nrows): cursor.set_key(ds.key(i)) - self.assertEquals(cursor.search(), 0) - self.assertEquals(cursor.get_value(),value) + if value == None: + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + else: + self.assertEquals(cursor.search(), 0) + self.assertEquals(cursor.get_value(),value) self.session.commit_transaction() cursor.close() - def test_prepare(self): + def test_prepare_delete_rollback(self): + nrows = 2000 + # Create a small table. - uri = "table:test" - nrows = 1000 - ds = SimpleDataSet(self, uri, 0, key_format="S", value_format='u') - ds.populate() + uri_1 = "table:test_prepare08_1" + ds_1 = SimpleDataSet(self, uri_1, 0, key_format="S", value_format='u') + ds_1.populate() + + uri_2 = "table:test_prepare08_2" + ds_2 = SimpleDataSet(self, uri_2, 0, key_format="S", value_format='u') + ds_2.populate() value_a = b"aaaaa" * 100 value_b = b"bbbbb" * 100 + value_c = b"ccccc" * 100 + value_d = b"ddddd" * 100 + value_e = b"eeeee" * 100 # Commit some updates along with a prepared update, which is not resolved. self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10)) self.conn.set_timestamp('stable_timestamp=' + timestamp_str(10)) # Initially load huge data - self.updates(ds, uri, nrows, value_a, 20) + self.updates(ds_1, uri_1, nrows, value_a, 20) + self.updates(ds_2, uri_2, nrows, value_a, 20) + # Add some more updates - self.updates(ds, uri, nrows, value_b, 30) + self.updates(ds_1, uri_1, nrows, value_b, 30) + self.updates(ds_2, uri_2, nrows, value_b, 30) + + # Verify the updates + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 30) + + self.check(ds_2, uri_2, nrows, value_a, 20) + self.check(ds_2, uri_2, nrows, value_b, 30) # Checkpoint self.session.checkpoint() - # Remove the updates from a prepare session and and keep it open. + # Remove the updates from a prepare session and keep it open. session_p = self.conn.open_session() - cursor_p = session_p.open_cursor(uri) + cursor_p = session_p.open_cursor(uri_1) session_p.begin_transaction('isolation=snapshot') for i in range(1, nrows): - cursor_p.set_key(ds.key(i)) + cursor_p.set_key(ds_1.key(i)) self.assertEquals(cursor_p.remove(), 0) session_p.prepare_transaction('prepare_timestamp=' + timestamp_str(40)) - self.check(ds, uri, nrows, value_a, 20) - self.check(ds, uri, nrows, value_b, 50) + # Adding more updates to other table should trigger eviction on uri_1 + self.updates(ds_2, uri_2, nrows, value_c, 40) + self.updates(ds_2, uri_2, nrows, value_d, 50) + self.updates(ds_2, uri_2, nrows, value_e, 60) + + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 50) #rollback the prepared session session_p.rollback_transaction() - self.check(ds, uri, nrows, value_a, 20) - self.check(ds, uri, nrows, value_b, 50) + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 50) + + # close sessions. + cursor_p.close() + session_p.close() + self.session.close() + + def test_prepare_update_delete_commit(self): + nrows = 2000 + + # Create a small table. + uri_1 = "table:test_prepare10_1" + ds_1 = SimpleDataSet(self, uri_1, 0, key_format="S", value_format='u') + ds_1.populate() + + # Create another small table. + uri_2 = "table:test_prepare10_2" + ds_2 = SimpleDataSet(self, uri_2, 0, key_format="S", value_format='u') + ds_2.populate() + + value_a = b"aaaaa" * 100 + value_b = b"bbbbb" * 100 + value_c = b"ccccc" * 100 + value_d = b"ddddd" * 100 + value_e = b"eeeee" * 100 + + # Commit some updates along with a prepared update, which is not resolved. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10)) + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(10)) + + # Initially load huge data + self.updates(ds_1, uri_1, nrows, value_a, 20) + self.updates(ds_2, uri_2, nrows, value_a, 20) + + # Add some more updates + self.updates(ds_1, uri_1, nrows, value_b, 30) + self.updates(ds_2, uri_2, nrows, value_b, 30) + + # Verify the updates + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 30) + + self.check(ds_2, uri_2, nrows, value_a, 20) + self.check(ds_2, uri_2, nrows, value_b, 30) + + # Checkpoint + self.session.checkpoint() + + # Remove the updates from a prepare session and keep it open. + session_p = self.conn.open_session() + cursor_p = session_p.open_cursor(uri_1) + session_p.begin_transaction('isolation=snapshot') + for i in range(1, nrows): + cursor_p.set_key(ds_1.key(i)) + cursor_p.set_value(value_c) + self.assertEquals(cursor_p.update(), 0) + cursor_p.set_key(ds_1.key(i)) + self.assertEquals(cursor_p.remove(), 0) + session_p.prepare_transaction('prepare_timestamp=' + timestamp_str(40)) + + # Adding more updates to other table should trigger eviction on uri_1 + self.updates(ds_2, uri_2, nrows, value_c, 40) + self.updates(ds_2, uri_2, nrows, value_d, 50) + self.updates(ds_2, uri_2, nrows, value_e, 60) + + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 30) + self.check(ds_1, uri_1, nrows, value_b, 50) + + # Commit the prepared session + session_p.commit_transaction('commit_timestamp=' + timestamp_str(50) + ',durable_timestamp=' + timestamp_str(60)) + + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, nrows, value_b, 30) + self.check(ds_1, uri_1, 0, None, 50) + + # close sessions. + cursor_p.close() + session_p.close() + self.session.close() + + def test_prepare_update_delete_commit_with_no_base_update(self): + nrows = 2000 + + # Create a small table. + uri_1 = "table:test_prepare10_1" + ds_1 = SimpleDataSet(self, uri_1, 0, key_format="S", value_format='u') + ds_1.populate() + + # Create another small table. + uri_2 = "table:test_prepare10_2" + ds_2 = SimpleDataSet(self, uri_2, 0, key_format="S", value_format='u') + ds_2.populate() + + value_a = b"aaaaa" * 100 + value_b = b"bbbbb" * 100 + value_c = b"ccccc" * 100 + value_d = b"ddddd" * 100 + value_e = b"eeeee" * 100 + + # Commit some updates along with a prepared update, which is not resolved. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10)) + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(10)) + + # Initially load huge data + self.updates(ds_1, uri_1, nrows, value_a, 20) + self.updates(ds_2, uri_2, nrows, value_a, 20) + + # Remove updates from one table and add some more updates to another table + self.removes(ds_1, uri_1, nrows, 30) + self.updates(ds_2, uri_2, nrows, value_b, 30) + + # Checkpoint + self.session.checkpoint() + + # Remove the updates from a prepare session and and keep it open. + session_p = self.conn.open_session() + cursor_p = session_p.open_cursor(uri_1) + session_p.begin_transaction('isolation=snapshot') + for i in range(1, nrows): + cursor_p.set_key(ds_1.key(i)) + cursor_p.set_value(value_c) + self.assertEquals(cursor_p.update(), 0) + cursor_p.set_key(ds_1.key(i)) + cursor_p.set_value(value_d) + self.assertEquals(cursor_p.update(), 0) + cursor_p.set_key(ds_1.key(i)) + self.assertEquals(cursor_p.remove(), 0) + session_p.prepare_transaction('prepare_timestamp=' + timestamp_str(40)) + + # Adding more updates to other table should trigger eviction on uri_1 + self.updates(ds_2, uri_2, nrows, value_c, 40) + self.updates(ds_2, uri_2, nrows, value_d, 50) + self.updates(ds_2, uri_2, nrows, value_e, 60) + + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, 0, None, 30) + self.check(ds_1, uri_1, 0, None, 50) + + # Commit the prepared session + session_p.commit_transaction('commit_timestamp=' + timestamp_str(50) + ',durable_timestamp=' + timestamp_str(60)) + + self.check(ds_1, uri_1, nrows, value_a, 20) + self.check(ds_1, uri_1, 0, None, 30) + self.check(ds_1, uri_1, 0, None, 50) # close sessions. cursor_p.close() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare10.py b/src/third_party/wiredtiger/test/suite/test_prepare10.py index 8a6548d9ee5..42408fac3b0 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare10.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare10.py @@ -133,7 +133,7 @@ class test_prepare10(wttest.WiredTigerTestCase): cursor3.reset() session3.begin_transaction() - # Remove the updates from a prepare session and and keep it open. + # Insert the updates from a prepare session and keep it open. session_p = self.conn.open_session() cursor_p = session_p.open_cursor(uri) session_p.begin_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py index 05e165fdfcf..1938193b77b 100755 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py @@ -50,7 +50,7 @@ class test_rollback_to_stable10(test_rollback_to_stable_base): scenarios = make_scenarios(prepare_values) def conn_config(self): - config = 'cache_size=5MB,statistics=(all),statistics_log=(json,on_close,wait=1),log=(enabled=true),timing_stress_for_test=[history_store_checkpoint_delay]' + config = 'cache_size=6MB,statistics=(all),statistics_log=(json,on_close,wait=1),log=(enabled=true),timing_stress_for_test=[history_store_checkpoint_delay]' return config def simulate_crash_restart(self, olddir, newdir): diff --git a/src/third_party/wiredtiger/test/suite/test_txn21.py b/src/third_party/wiredtiger/test/suite/test_txn21.py index 5abf907ac84..06db7ce76aa 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn21.py +++ b/src/third_party/wiredtiger/test/suite/test_txn21.py @@ -43,7 +43,17 @@ class test_txn21(wttest.WiredTigerTestCase): # Transaction-level configuration. def test_operation_timeout_txn(self): + # Test during begin. self.session.begin_transaction('operation_timeout_ms=2000') + self.session.rollback_transaction() + + # Test during rollback. + self.session.begin_transaction() + self.session.rollback_transaction('operation_timeout_ms=2000') + + # Test during commit. + self.session.begin_transaction() + self.session.commit_transaction('operation_timeout_ms=2000') if __name__ == '__main__': wttest.run() -- cgit v1.2.1