diff options
3 files changed, 33 insertions, 155 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 701ab11e416..d6e0d1830ed 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "54336f44ebf1d6012bce4c9105b4f0dc93fb2bd0" + "commit": "21548312740005cfb6f3970ed15f8285f82ab92c" } diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 042bdadbbd2..acf0fc07c02 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -27,7 +27,9 @@ __rollback_delete_hs(WT_SESSION_IMPL *session, WT_ITEM *key, wt_timestamp_t ts) WT_CURSOR *hs_cursor; WT_DECL_ITEM(hs_key); WT_DECL_RET; - WT_TIME_WINDOW *hs_tw; + wt_timestamp_t hs_start_ts; + uint64_t hs_counter; + uint32_t hs_btree_id; /* Open a history store table cursor. */ WT_RET(__wt_curhs_open(session, NULL, &hs_cursor)); @@ -49,14 +51,12 @@ __rollback_delete_hs(WT_SESSION_IMPL *session, WT_ITEM *key, wt_timestamp_t ts) hs_cursor->set_key(hs_cursor, 4, S2BT(session)->id, key, WT_TS_MAX, UINT64_MAX); ret = __wt_curhs_search_near_before(session, hs_cursor); for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) { - /* Retrieve the time window from the history cursor. */ - __wt_hs_upd_time_window(hs_cursor, &hs_tw); - if (hs_tw->start_ts < ts) + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); + if (hs_start_ts < ts) break; - WT_ERR(hs_cursor->remove(hs_cursor)); WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed); - if (hs_tw->start_ts == ts) + if (hs_start_ts == ts) WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_rts); else WT_STAT_CONN_DATA_INCR(session, cache_hs_key_truncate_rts_unstable); @@ -399,7 +399,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, * become obsolete according to the checkpoint. */ if (__rollback_txn_visible_id(session, hs_tw->stop_txn) && - hs_tw->durable_stop_ts <= pinned_ts) { + hs_stop_durable_ts <= pinned_ts) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), "history store stop is obsolete with time window: %s and pinned timestamp: %s", __wt_time_window_to_string(hs_tw, tw_string), @@ -421,7 +421,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, * the written proper timestamp, so comparing against it with history store shouldn't have * any problem. */ - if (hs_tw->start_ts <= unpack->tw.start_ts || unpack->tw.prepare) { + if (hs_start_ts <= unpack->tw.start_ts || unpack->tw.prepare) { if (type == WT_UPDATE_MODIFY) WT_ERR(__wt_modify_apply_item( session, S2BT(session)->value_format, full_value, hs_value->data)); @@ -431,9 +431,11 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, } } else __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "history store update more recent than on-disk update with time window: %s and type: " - "%" PRIu8, - __wt_time_window_to_string(hs_tw, tw_string), type); + "history store update more recent than on-disk update with start timestamp: %s," + " durable timestamp: %s, stop timestamp: %s and type: %" PRIu8, + __wt_timestamp_to_string(hs_start_ts, ts_string[0]), + __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), + __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), type); /* * Verify the history store timestamps are in order. The start timestamp may be equal to the @@ -465,38 +467,32 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start); /* - * Validate the timestamps in the key and the cell are same. This must be validated only - * after verifying it's stop time window is not globally visible. The start timestamps of - * the time window are cleared when they are globally visible and there will be no stop - * timestamp in the history store whenever a prepared update is written to the data store. - */ - WT_ASSERT(session, - (hs_tw->start_ts == WT_TS_NONE || hs_tw->start_ts == hs_start_ts) && - (hs_tw->durable_start_ts == WT_TS_NONE || hs_tw->durable_start_ts == hs_durable_ts) && - ((hs_tw->durable_stop_ts == 0 && hs_stop_durable_ts == WT_TS_MAX) || - hs_tw->durable_stop_ts == hs_stop_durable_ts)); - - /* * Stop processing when we find a stable update according to the given timestamp and * transaction id. */ if (__rollback_txn_visible_id(session, hs_tw->start_txn) && - hs_tw->durable_start_ts <= rollback_timestamp) { + hs_durable_ts <= rollback_timestamp) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "history store update valid with time window: %s, type: %" PRIu8 - " and stable timestamp: %s", - __wt_time_window_to_string(hs_tw, tw_string), type, - __wt_timestamp_to_string(rollback_timestamp, ts_string[0])); + "history store update valid with start timestamp: %s, durable timestamp: %s, stop " + "timestamp: %s, stable timestamp: %s, txnid: %" PRIu64 " and type: %" PRIu8, + __wt_timestamp_to_string(hs_start_ts, ts_string[0]), + __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), + __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), + __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, type); WT_ASSERT(session, unpack->tw.prepare || hs_tw->start_ts <= unpack->tw.start_ts); valid_update_found = true; break; } __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "history store update aborted with time window: %s, type: %" PRIu8 - " and stable timestamp: %s", - __wt_time_window_to_string(hs_tw, tw_string), type, - __wt_timestamp_to_string(rollback_timestamp, ts_string[3])); + "history store update aborted with start timestamp: %s, durable timestamp: %s, stop " + "timestamp: %s, stable timestamp: %s, start txnid: %" PRIu64 ", stop txnid: %" PRIu64 + " and type: %" PRIu8, + __wt_timestamp_to_string(hs_start_ts, ts_string[0]), + __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), + __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), + __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, + hs_tw->stop_txn, type); /* * Start time point of the current record may be used as stop time point of the previous @@ -553,7 +549,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, * timestamp and txnid, we need to restore that as well. */ if (__rollback_txn_visible_id(session, hs_tw->stop_txn) && - hs_tw->durable_stop_ts <= rollback_timestamp) { + hs_stop_durable_ts <= rollback_timestamp) { /* * The restoring tombstone timestamp must be zero or less than previous update start * timestamp or the on-disk update is an out of order prepared. @@ -1287,11 +1283,10 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ WT_CURSOR *hs_cursor; WT_DECL_ITEM(hs_key); WT_DECL_RET; - WT_TIME_WINDOW *hs_tw; wt_timestamp_t hs_start_ts; uint64_t hs_counter; uint32_t hs_btree_id; - char tw_string[WT_TIME_STRING_SIZE]; + char ts_string[WT_TS_INT_STRING_SIZE]; hs_cursor = NULL; @@ -1310,12 +1305,9 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ /* We shouldn't cross the btree search space. */ WT_ASSERT(session, btree_id == hs_btree_id); - /* Retrieve the time window from the history cursor. */ - __wt_hs_upd_time_window(hs_cursor, &hs_tw); - __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "rollback to stable history store cleanup of update with time window: %s", - __wt_time_window_to_string(hs_tw, tw_string)); + "rollback to stable history store cleanup of update with start timestamp: %s", + __wt_timestamp_to_string(hs_start_ts, ts_string)); WT_ERR(hs_cursor->remove(hs_cursor)); WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed); diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable37.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable37.py deleted file mode 100644 index dc8fd321df9..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable37.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -from wiredtiger import stat -from wtdataset import SimpleDataSet -from wtscenario import make_scenarios -from helper import simulate_crash_restart -from test_rollback_to_stable01 import test_rollback_to_stable_base - -# test_rollback_to_stable37.py -# Test that the rollback to stable to restore proper stable update from history store when a no timestamp -# update has rewritten the history store data. -class test_rollback_to_stable37(test_rollback_to_stable_base): - conn_config = 'cache_size=1GB,statistics=(all),statistics_log=(json,on_close,wait=1),log=(enabled=false)' - - format_values = [ - ('column', dict(key_format='r', value_format='S')), - ('row_integer', dict(key_format='i', value_format='S')), - ] - - scenarios = make_scenarios(format_values) - - def test_rollback_to_stable(self): - uri = 'table:test_rollback_to_stable37' - nrows = 1000 - - if self.value_format == '8t': - value_a = 97 - value_b = 98 - value_c = 99 - value_d = 100 - else: - value_a = 'a' * 10 - value_b = 'b' * 10 - value_c = 'c' * 10 - value_d = 'd' * 10 - - # Create our table. - ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format) - ds.populate() - - # Pin oldest and stable to timestamp 1. - self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) + - ',stable_timestamp=' + self.timestamp_str(1)) - - # Insert 300 updates to the same key. - for i in range (20, 320): - if self.value_format == '8t': - self.large_updates(uri, value_a, ds, nrows, False, i) - else: - self.large_updates(uri, value_a + str(i), ds, nrows, False, i) - - old_reader_session = self.conn.open_session() - old_reader_session.begin_transaction('read_timestamp=' + self.timestamp_str(10)) - - self.large_updates(uri, value_b, ds, nrows, False, 2000) - self.check(value_b, uri, nrows,2000) - - self.evict_cursor(uri, nrows, value_b) - - # Insert update without a timestamp. - self.large_updates(uri, value_c, ds, nrows, False, 0) - self.check(value_c, uri, nrows, 0) - - self.evict_cursor(uri, nrows, value_c) - - self.large_updates(uri, value_d, ds, nrows, False, 3000) - self.check(value_d, uri, nrows, 3000) - - old_reader_session.rollback_transaction() - self.session.checkpoint() - - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(2000)) - self.session.checkpoint() - - self.conn.rollback_to_stable() - - self.check(value_c, uri, nrows, 1000) - self.check(value_c, uri, nrows, 2000) - self.check(value_c, uri, nrows, 3000) - - stat_cursor = self.session.open_cursor('statistics:', None, None) - keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2] - stat_cursor.close() - - self.assertEqual(keys_removed, 0) - -if __name__ == '__main__': - wttest.run() |