diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-06-25 15:59:47 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2021-06-25 16:00:37 +1000 |
commit | bd09da168ea707379ffed826022dd14a48ca62f0 (patch) | |
tree | bdcfc85d8614bd92f6efd1d71be780cc8c398f2e | |
parent | 28d56a3e4cd38a6060539ea7a2824bd076b58d9d (diff) | |
download | mongo-bd09da168ea707379ffed826022dd14a48ca62f0.tar.gz |
Import wiredtiger: a41345737223a2432b548566b516ea5f3bd06131 from branch mongodb-5.0
ref: 363c7384ed..a413457372
for: 5.0.0-rc5
WT-7675 Query last ckpt timestamp changes without taking checkpoint
WT-7699 Fix RTS handling to abort an out of order prepared transaction
WT-7706 Use same transaction update when on-disk value is an aborted prepared update
WT-7710 Fix to use history store btree to initialise history store cursor
Reverted ticket(s):
WT-7443 Add error message when bulk cursor can't get exclusive access to dhandle
10 files changed, 240 insertions, 85 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 13670219790..8a969307b1b 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "363c7384edce63df337f110492c6424c5f13a451" + "commit": "a41345737223a2432b548566b516ea5f3bd06131" } diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index 878b1449c27..04e53e5b4f2 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -833,11 +833,6 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, c session, ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags)); else ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags); - - /* Check whether the exclusive open for a bulk load succeeded. */ - if (bulk && ret == EBUSY) - WT_RET_MSG(session, EBUSY, "bulk-load is only supported on newly created objects"); - WT_RET(ret); WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c index 299c06a4fa7..aecc01ade8b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_hs.c +++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c @@ -1118,7 +1118,9 @@ __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp) /* Open the file cursor for operations on the regular history store .*/ WT_ERR(__curhs_file_cursor_open(session, &hs_cursor->file_cursor)); - WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp)); + WT_WITH_BTREE(session, CUR2BT(hs_cursor->file_cursor), + ret = __wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp)); + WT_ERR(ret); WT_TIME_WINDOW_INIT(&hs_cursor->time_window); hs_cursor->btree_id = 0; WT_ERR(__wt_scr_alloc(session, 0, &hs_cursor->datastore_key)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 0e5523b6454..cc91c5833e0 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -517,6 +517,16 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v */ WT_ASSERT(session, same_txn_valid_upd->type != WT_UPDATE_TOMBSTONE); upd_select->upd = upd = same_txn_valid_upd; + + } else if (same_txn_valid_upd != NULL && vpack != NULL && vpack->tw.prepare) { + /* + * The on-disk version is from an aborted prepare transaction. Therefore, use + * the update from the same transaction as the selected update. We are sure that + * the on-disk prepared update has been aborted because otherwise we would have + * chosen it as an update this tombstone can be applied to. + */ + WT_ASSERT(session, same_txn_valid_upd->type != WT_UPDATE_TOMBSTONE); + upd_select->upd = upd = same_txn_valid_upd; } } } diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index c7ebb3654f8..ff051d14564 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -346,17 +346,17 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page /* * Do not include history store updates greater than on-disk data store version to construct - * a full update to restore. Include the most recent updates than the on-disk version - * shouldn't be problem as the on-disk version in history store is always a full update. It - * is better to not to include those updates as it unnecessarily increases the rollback to - * stable time. + * a full update to restore except when the on-disk update is prepared. Including more + * recent updates than the on-disk version shouldn't be problem as the on-disk version in + * history store is always a full update. It is better to not to include those updates as it + * unnecessarily increases the rollback to stable time. * * Comparing with timestamps here has no problem unlike in search flow where the timestamps * may be reset during reconciliation. RTS detects an on-disk update is unstable based on * the written proper timestamp, so comparing against it with history store shouldn't have * any problem. */ - if (hs_start_ts <= unpack->tw.start_ts) { + if (hs_start_ts <= unpack->tw.start_ts || unpack->tw.prepare) { if (type == WT_UPDATE_MODIFY) WT_ERR(__wt_modify_apply_item( session, S2BT(session)->value_format, &full_value, hs_value->data)); @@ -423,7 +423,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, type); - WT_ASSERT(session, hs_tw->start_ts <= unpack->tw.start_ts); + WT_ASSERT(session, unpack->tw.prepare || hs_tw->start_ts <= unpack->tw.start_ts); valid_update_found = true; break; } diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index 6acd265fd2d..9fc79a78d72 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -187,10 +187,11 @@ __txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, cons */ if (ts == WT_TS_NONE) return (WT_NOTFOUND); - } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len)) - /* Read-only value forever. No lock needed. */ + } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len)) { + /* Read-only value forever. Make sure we don't used a cached version. */ + WT_BARRIER(); ts = txn_global->last_ckpt_timestamp; - else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) { + } else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) { if (!txn_global->has_oldest_timestamp) return (WT_NOTFOUND); ts = txn_global->oldest_timestamp; diff --git a/src/third_party/wiredtiger/test/suite/test_bulk01.py b/src/third_party/wiredtiger/test/suite/test_bulk01.py index c65ea704a21..a39deac3866 100755 --- a/src/third_party/wiredtiger/test/suite/test_bulk01.py +++ b/src/third_party/wiredtiger/test/suite/test_bulk01.py @@ -211,9 +211,8 @@ class test_bulk_load(wttest.WiredTigerTestCase): cursor = self.session.open_cursor(uri, None) cursor[simple_key(cursor, 1)] = simple_value(cursor, 1) # Don't close the insert cursor, we want EBUSY. - msg = '/bulk-load is only supported on newly created objects/' - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.open_cursor(uri, None, "bulk"), msg) + self.assertRaises(wiredtiger.WiredTigerError, + lambda: self.session.open_cursor(uri, None, "bulk")) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_bulk03.py b/src/third_party/wiredtiger/test/suite/test_bulk03.py deleted file mode 100755 index 730960264ef..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_bulk03.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# test_bulk03.py -# This test module is designed to check that colgroup bulk-cursor meets expectations in terms -# of error codes and error messages. -# - -import os -import wiredtiger, wttest -from wtdataset import SimpleDataSet, simple_key, simple_value -from wtscenario import make_scenarios - -# Smoke test bulk-load. -class test_colgroup_bulk_load(wttest.WiredTigerTestCase): - basename = 'test_schema01' - tablename = 'table:' + basename - cgname = 'colgroup:' + basename - err_msg = '/bulk-load is only supported on newly created objects/' - - # Test that bulk-load objects cannot be opened by other cursors. - def test_bulk_load_busy_cols(self): - # Create a table with columns. - self.session.create(self.tablename, 'key_format=5s,value_format=HQ,' + - 'columns=(country,year,population),' + - 'colgroups=(year,population)') - - # Create a column group. - self.session.create(self.cgname + ':year', 'columns=(year)') - self.session.create(self.cgname + ':population', 'columns=(population)') - - # Create a column cursor. - self.session.open_cursor(self.tablename, None) - - # Create a second column cursor in bulk mode. Don't close the insert cursor, we want EBUSY - # and the error message. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.open_cursor(self.tablename, None, "bulk"), self.err_msg) - -if __name__ == '__main__': - wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare_hs05.py b/src/third_party/wiredtiger/test/suite/test_prepare_hs05.py new file mode 100644 index 00000000000..ae79bd247f5 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_prepare_hs05.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import make_scenarios +from wiredtiger import stat, WT_NOTFOUND + +def timestamp_str(t): + return '%x' % t + +# test_prepare_hs05.py +# Test that after aborting prepare transaction, correct update from the history store is restored. +class test_prepare_hs05(wttest.WiredTigerTestCase): + conn_config = 'cache_size=50MB' + session_config = 'isolation=snapshot' + + def test_check_prepare_abort_hs_restore(self): + uri = 'table:test_prepare_hs05' + create_params = 'key_format=S,value_format=S' + self.session.create(uri, create_params) + + value1 = 'a' * 5 + value2 = 'b' * 5 + value3 = 'c' * 5 + + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1)) + cursor = self.session.open_cursor(uri) + + key = 1 + + self.session.begin_transaction() + cursor[str(key)] = value1 + cursor.set_key(str(key)) + self.session.commit_transaction('commit_timestamp=' + timestamp_str(2)) + + # Commit update and remove operation in the same transaction. + self.session.begin_transaction() + cursor[str(key)] = value2 + cursor.set_key(str(key)) + cursor.remove() + self.session.commit_transaction('commit_timestamp=' + timestamp_str(3)) + + # Add a prepared update for the key. + self.session.begin_transaction() + cursor[str(key)] = value3 + self.session.prepare_transaction('prepare_timestamp='+ timestamp_str(4)) + + # Try to evict the page with prepared update. This will ensure that prepared update is + # written as the on-disk version and the older versions are moved to the history store. + session2 = self.conn.open_session() + session2.begin_transaction('ignore_prepare=true') + cursor2 = session2.open_cursor(uri, None, "debug=(release_evict=true)") + cursor2.set_key(str(key)) + self.assertEquals(cursor2.search(), WT_NOTFOUND) + cursor2.reset() + + # This should abort the prepared transaction. + self.session.rollback_transaction() + + self.session.checkpoint() + + # We should be able to read the older version of the key from the history store. + self.session.begin_transaction('read_timestamp='+timestamp_str(2)) + cursor.set_key(str(key)) + self.assertEqual(cursor.search(), 0) + self.assertEqual(cursor.get_value(), value1) + self.session.rollback_transaction() + + # The latest version should be marked deleted. + self.session.begin_transaction() + cursor.set_key(str(key)) + self.assertEqual(cursor.search(), WT_NOTFOUND) + self.session.rollback_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py new file mode 100644 index 00000000000..1e898523c39 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from wiredtiger import stat +from wtscenario import make_scenarios +from helper import simulate_crash_restart +from wtdataset import SimpleDataSet +from test_rollback_to_stable01 import test_rollback_to_stable_base + +def timestamp_str(t): + return '%x' % t + +# test_rollback_to_stable21.py +# Test rollback to stable when an out of order prepared transaction is written to disk +class test_rollback_to_stable21(test_rollback_to_stable_base): + key_format_values = [ + ('column', dict(key_format='r')), + ('integer_row', dict(key_format='i')), + ] + + scenarios = make_scenarios(key_format_values) + + def conn_config(self): + config = 'cache_size=250MB,statistics=(all),statistics_log=(json,on_close,wait=1)' + return config + + def test_rollback_to_stable(self): + nrows = 1000 + + # Prepare transactions for column store table is not yet supported. + if self.key_format == 'r': + self.skipTest('Prepare transactions for column store table is not yet supported') + + # Create a table without logging. + uri = "table:rollback_to_stable21" + ds = SimpleDataSet( + self, uri, 0, key_format=self.key_format, value_format="S", config='log=(enabled=false)') + ds.populate() + + # Pin oldest and stable timestamps to 10. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) + + ',stable_timestamp=' + timestamp_str(10)) + + valuea = 'a' * 400 + valueb = 'b' * 400 + + cursor = self.session.open_cursor(uri) + self.session.begin_transaction() + for i in range(1, nrows + 1): + cursor[i] = valuea + + self.session.commit_transaction('commit_timestamp=' + timestamp_str(30)) + + self.session.begin_transaction() + for i in range(1, nrows + 1): + cursor[i] = valueb + + cursor.reset() + cursor.close() + self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(20)) + + s = self.conn.open_session() + s.begin_transaction('ignore_prepare = true') + # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. + evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") + + for i in range(1, nrows + 1): + evict_cursor.set_key(i) + self.assertEquals(evict_cursor.search(), 0) + self.assertEqual(evict_cursor.get_value(), valuea) + evict_cursor.reset() + + s.rollback_transaction() + self.conn.set_timestamp('stable_timestamp=' + timestamp_str(40)) + s.checkpoint() + + # Rollback the prepared transaction + self.session.rollback_transaction() + + # Simulate a server crash and restart. + self.pr("restart") + simulate_crash_restart(self, ".", "RESTART") + self.pr("restart complete") + + self.check(valuea, uri, nrows, 40) + + stat_cursor = self.session.open_cursor('statistics:', None, None) + hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] + stat_cursor.close() + + self.assertGreater(hs_removed, 0) |