diff options
author | Luke Chen <luke.chen@mongodb.com> | 2022-05-02 05:20:02 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-02 05:51:17 +0000 |
commit | aafe6d097dabd495cedae36ebeaa26b0e480e13f (patch) | |
tree | 562c6461f3179e49a9f2fb70d2566256a02afc13 | |
parent | 661b97c827f003a1e067cb9c470a498529368d64 (diff) | |
download | mongo-aafe6d097dabd495cedae36ebeaa26b0e480e13f.tar.gz |
Import wiredtiger: a39fc77503274781b8641e9e845ea37ab1d3fe71 from branch mongodb-master
ref: e3f48f2713..a39fc77503
for: 6.1.0-rc0
WT-9055 Remove out-of-order timestamp support
45 files changed, 476 insertions, 1269 deletions
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 74f6f7c6921..8cd75906d5f 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -239,7 +239,7 @@ conn_stats = [ CacheStat('cache_eviction_empty_score', 'eviction empty score', 'no_clear,no_scale'), CacheStat('cache_eviction_fail', 'pages selected for eviction unable to be evicted'), CacheStat('cache_eviction_fail_active_children_on_an_internal_page', 'pages selected for eviction unable to be evicted because of active children on an internal page'), - CacheStat('cache_eviction_fail_checkpoint_out_of_order_ts', 'pages selected for eviction unable to be evicted because of race between checkpoint and out of order timestamps handling'), + CacheStat('cache_eviction_fail_checkpoint_mm_ts', 'pages selected for eviction unable to be evicted because of race between checkpoint and mixed mode timestamps handling'), CacheStat('cache_eviction_fail_in_reconciliation', 'pages selected for eviction unable to be evicted because of failure in reconciliation'), CacheStat('cache_eviction_force', 'forced eviction - pages selected count'), CacheStat('cache_eviction_force_long_update_list', 'forced eviction - pages selected because of a large number of updates to a single item'), @@ -809,10 +809,10 @@ conn_dsrc_stats = [ CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), CacheStat('cache_eviction_blocked_checkpoint_hs', 'checkpoint of history store file blocked non-history store page eviction'), - CacheStat('cache_eviction_blocked_ooo_checkpoint_race_1', 'eviction gave up due to detecting an out of order on disk value behind the last update on the chain'), - CacheStat('cache_eviction_blocked_ooo_checkpoint_race_2', 'eviction gave up due to detecting an out of order tombstone ahead of the selected on disk update'), - CacheStat('cache_eviction_blocked_ooo_checkpoint_race_3', 'eviction gave up due to detecting an out of order tombstone ahead of the selected on disk update after validating the update chain'), - CacheStat('cache_eviction_blocked_ooo_checkpoint_race_4', 'eviction gave up due to detecting out of order timestamps on the update chain after the selected on disk update'), + CacheStat('cache_eviction_blocked_mm_checkpoint_race_1', 'eviction gave up due to detecting a mixed mode on disk value behind the last update on the chain'), + CacheStat('cache_eviction_blocked_mm_checkpoint_race_2', 'eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk update'), + CacheStat('cache_eviction_blocked_mm_checkpoint_race_3', 'eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk update after validating the update chain'), + CacheStat('cache_eviction_blocked_mm_checkpoint_race_4', 'eviction gave up due to detecting mixed mode timestamps on the update chain after the selected on disk update'), CacheStat('cache_eviction_clean', 'unmodified pages evicted'), CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'), CacheStat('cache_eviction_dirty', 'modified pages evicted'), @@ -841,11 +841,11 @@ conn_dsrc_stats = [ CacheStat('cache_hs_insert_reverse_modify', 'the number of times reverse modify inserted to history store'), CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'), CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'), - CacheStat('cache_hs_order_remove', 'history store table truncation to remove range of updates due to out-of-order timestamp update on data page'), + CacheStat('cache_hs_order_remove', 'history store table truncation to remove range of updates due to mixed mode timestamp update on data page'), CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'), CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'), - CacheStat('cache_hs_order_reinsert', 'history store table out-of-order updates that were fixed up by reinserting with the fixed timestamp'), - CacheStat('cache_hs_order_lose_durable_timestamp', 'history store table out-of-order resolved updates that lose their durable timestamp'), + CacheStat('cache_hs_order_reinsert', 'history store table mixed mode updates that were fixed up by reinserting with the fixed timestamp'), + CacheStat('cache_hs_order_lose_durable_timestamp', 'history store table mixed mode resolved updates that lose their durable timestamp'), CacheStat('cache_hs_read', 'history store table reads'), CacheStat('cache_hs_read_miss', 'history store table reads missed'), CacheStat('cache_hs_read_squash', 'history store table reads requiring squashed modifies'), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 785f146b60f..d0bad46f318 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "e3f48f2713189b973427d19803b705f15cfeec08" + "commit": "a39fc77503274781b8641e9e845ea37ab1d3fe71" } diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 94ab6df3482..4a7fe673f11 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -482,22 +482,14 @@ __conn_dhandle_config_parse_ts(WT_SESSION_IMPL *session) * Timestamp usage configuration: Ignore the "always", "key_consistent" and "ordered" keywords: * "always" and "key_consistent" were never written into databases in the wild, and the default * behavior is the same as "ordered". - * - * FIXME: WT-9055 MongoDB builds for the 6.0 release use ordered as the default behavior, while - * WiredTiger standalone still uses out-of-order as the default. */ WT_RET(__wt_config_gets(session, cfg, "write_timestamp_usage", &cval)); if (WT_STRING_MATCH("mixed_mode", cval.str, cval.len)) LF_SET(WT_DHANDLE_TS_MIXED_MODE); else if (WT_STRING_MATCH("never", cval.str, cval.len)) LF_SET(WT_DHANDLE_TS_NEVER); -#ifdef WT_STANDALONE_BUILD - else if (WT_STRING_MATCH("ordered", cval.str, cval.len)) - LF_SET(WT_DHANDLE_TS_ORDERED); -#else else LF_SET(WT_DHANDLE_TS_ORDERED); -#endif /* Reset the flags. */ dhandle->ts_flags = flags; diff --git a/src/third_party/wiredtiger/src/docs/arch-hs.dox b/src/third_party/wiredtiger/src/docs/arch-hs.dox index 862ce7442d0..d44e9a24d7b 100644 --- a/src/third_party/wiredtiger/src/docs/arch-hs.dox +++ b/src/third_party/wiredtiger/src/docs/arch-hs.dox @@ -85,8 +85,7 @@ isolation level and doesn't hold a valid snapshot. @section arch_hs_reconciliation History store and reconciliation When a dirty page is reconciled on a user file btree, the update chain is examined and the latest committed update is chosen as the on-disk value. All older updates are added to the history store -table assuming they are not yet obsolete. Additionally any out of order timestamps will -be corrected. +table assuming they are not yet obsolete. Additionally any mixed mode timestamps will be applied. Consider the following update chain for a user table with btree id 1000 and data store key "AAA": diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 2bc165326df..ea9e8f99b66 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -669,7 +669,7 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool /* * If checkpoint is running concurrently, set the checkpoint running flag and we will abort the - * eviction if we detect out of order timestamp updates. + * eviction if we detect any mixed mode timestamp updates. */ if (conn->txn_global.checkpoint_running) LF_SET(WT_REC_CHECKPOINT_RUNNING); diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c index 6169286024b..14719341a0f 100644 --- a/src/third_party/wiredtiger/src/history/hs_rec.c +++ b/src/third_party/wiredtiger/src/history/hs_rec.c @@ -9,8 +9,8 @@ #include "wt_internal.h" static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, - uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, - bool error_on_ooo_ts, uint64_t *hs_counter); + uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool mm_tombstone, + bool error_on_mm_ts, uint64_t *hs_counter); /* * __hs_verbose_cache_stats -- @@ -66,7 +66,7 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree) */ static int __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key, - const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool error_on_ooo_ts) + const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool error_on_mm_ts) { WT_CURSOR_BTREE *hs_cbt; WT_DECL_ITEM(hs_key); @@ -188,10 +188,7 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, * timestamp. */ if (ret == 0) { - /* - * Check if the current history store update's stop timestamp is out of order with respect - * to the update to be inserted before before moving onto the next record. - */ + /* Check if the current history store update's stop timestamp is less than the update. */ if (hs_cbt->upd_value->tw.stop_ts <= tw->start_ts) WT_ERR_NOTFOUND_OK(cursor->next(cursor), true); else @@ -203,10 +200,10 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, /* * It is possible to insert a globally visible update into the history store with larger - * timestamps ahead of it. An example would be a mixed-mode update getting moved to the history + * timestamps ahead of it. An example would be a mixed mode update getting moved to the history * store. This scenario can avoid detection earlier in reconciliation and result in an EBUSY - * being returned as it detects out-of-order timestamps. To prevent this we allow globally - * visible updates to fix history store content even if eviction is running concurrently with a + * being returned as it detects mixed mode timestamps. To prevent this we allow globally visible + * updates to fix history store content even if eviction is running concurrently with a * checkpoint. * * This is safe because global visibility considers the checkpoint transaction id and timestamp @@ -214,13 +211,13 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, * visible to checkpoint and the modifications it makes to the history store will be the same as * what checkpoint would've done. */ - if (error_on_ooo_ts && __wt_txn_tw_start_visible_all(session, tw)) { - error_on_ooo_ts = false; + if (error_on_mm_ts && __wt_txn_tw_start_visible_all(session, tw)) { + error_on_mm_ts = false; } if (ret == 0) WT_ERR(__hs_delete_reinsert_from_pos(session, cursor, btree->id, key, tw->start_ts + 1, - true, false, error_on_ooo_ts, &counter)); + true, false, error_on_mm_ts, &counter)); #ifdef HAVE_DIAGNOSTIC /* @@ -310,10 +307,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult #define MAX_REVERSE_MODIFY_NUM 16 WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM]; WT_UPDATE_VECTOR updates; - WT_UPDATE_VECTOR out_of_order_ts_updates; WT_SAVE_UPD *list; - WT_UPDATE *first_globally_visible_upd, *fix_ts_upd, *min_ts_upd, *out_of_order_ts_upd; - WT_UPDATE *newest_hs, *non_aborted_upd, *oldest_upd, *prev_upd, *ref_upd, *tombstone, *upd; + WT_UPDATE *newest_hs, *non_ts_upd, *oldest_upd, *prev_upd, *ref_upd, *tombstone, *upd; WT_TIME_WINDOW tw; wt_off_t hs_size; uint64_t insert_cnt, max_hs_size, modify_cnt; @@ -321,30 +316,20 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult uint32_t i; uint8_t *p; int nentries; - bool enable_reverse_modify, error_on_ooo_ts, hs_inserted, squashed; + bool enable_reverse_modify, error_on_mm_ts, hs_inserted, squashed; r->cache_write_hs = false; btree = S2BT(session); prev_upd = NULL; WT_TIME_WINDOW_INIT(&tw); insert_cnt = 0; - error_on_ooo_ts = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING); + error_on_mm_ts = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING); cache_hs_insert_full_update = cache_hs_insert_reverse_modify = cache_hs_write_squash = 0; WT_RET(__wt_curhs_open(session, NULL, &hs_cursor)); F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED); __wt_update_vector_init(session, &updates); - /* - * We use another stack to store the out-of-order timestamp updates (including updates without a - * timestamp). We walk the update chain from the newest to the oldest. Once an out-of-order - * timestamp update is detected, and it has a lower timestamp than the head of the stack, it is - * pushed to the stack. When we are inserting updates to the history store, we compare the - * update's timestamp with the head of the stack. If it is larger than the out-of-order - * timestamp, we fix the timestamp by inserting with the out-of-order timestamp. If the update - * we are inserting is the head of the stack, we pop it from the stack. - */ - __wt_update_vector_init(session, &out_of_order_ts_updates); if (!btree->hs_entries) btree->hs_entries = true; @@ -397,10 +382,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult WT_ERR(__wt_illegal_value(session, r->page->type)); } - newest_hs = first_globally_visible_upd = min_ts_upd = out_of_order_ts_upd = NULL; + non_ts_upd = newest_hs = NULL; ref_upd = list->onpage_upd; - __wt_update_vector_clear(&out_of_order_ts_updates); __wt_update_vector_clear(&updates); /* @@ -438,44 +422,33 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult * 4) We have a single tombstone on the chain, it is simply ignored. */ squashed = false; - for (upd = list->onpage_upd, non_aborted_upd = prev_upd = NULL; upd != NULL; - prev_upd = non_aborted_upd, upd = upd->next) { + for (upd = list->onpage_upd, prev_upd = NULL; upd != NULL; upd = upd->next) { if (upd->txnid == WT_TXN_ABORTED) continue; - non_aborted_upd = upd; - - /* Detect out of order timestamp update. */ - if (min_ts_upd != NULL && min_ts_upd->start_ts < upd->start_ts && - out_of_order_ts_upd != min_ts_upd) { + /* Detect any mixed mode timestamp updates. */ + if (prev_upd != NULL && prev_upd->start_ts < upd->start_ts) { + WT_ASSERT(session, prev_upd->start_ts == WT_TS_NONE); /* - * Fail the eviction if we detect out of order timestamps and the error flag is set. - * We cannot modify the history store to fix the out of order timestamp updates as - * it may make the history store checkpoint inconsistent. + * Fail the eviction if we detect mixed mode timestamps and the error flag is set. + * We cannot modify the history store to fix the mixed mode timestamp updates as it + * may make the history store checkpoint inconsistent. */ - if (error_on_ooo_ts) { + if (error_on_mm_ts) { ret = EBUSY; - WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); + WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_mm_ts); goto err; } /* - * Always insert full update to the history store if we detect out of order - * timestamp update. + * Always insert full update to the history store if we detect mixed mode timestamp + * update. */ enable_reverse_modify = false; - WT_ERR(__wt_update_vector_push(&out_of_order_ts_updates, min_ts_upd)); - out_of_order_ts_upd = min_ts_upd; - } else if (upd->prepare_state != WT_PREPARE_INPROGRESS && - (min_ts_upd == NULL || upd->start_ts <= min_ts_upd->start_ts)) - min_ts_upd = upd; + } WT_ERR(__wt_update_vector_push(&updates, upd)); - /* Track the first update that is globally visible. */ - if (first_globally_visible_upd == NULL && __wt_txn_upd_visible_all(session, upd)) - first_globally_visible_upd = upd; - /* * Always insert full update to the history store if we write a prepared update to the * data store. @@ -506,11 +479,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult squashed = true; } + prev_upd = upd; + /* - * No need to continue if we see the first self contained value after the first globally - * visible value. + * No need to continue if we found a first self contained value that is globally + * visible. */ - if (first_globally_visible_upd != NULL && WT_UPDATE_DATA_VALUE(upd)) + if (__wt_txn_upd_visible_all(session, upd) && WT_UPDATE_DATA_VALUE(upd)) break; /* @@ -519,35 +494,40 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult */ if (upd->type == WT_UPDATE_STANDARD && F_ISSET(upd, WT_UPDATE_HS)) break; + + /* + * Save the first non timestamped update in the update chain. This is used to reset all + * the following update timestamps in the chain. + */ + if (non_ts_upd == NULL && upd->start_ts == WT_TS_NONE) { + WT_ASSERT(session, upd->durable_ts == WT_TS_NONE); + non_ts_upd = upd; + } } prev_upd = upd = NULL; WT_ASSERT(session, updates.size > 0); - __wt_update_vector_peek(&updates, &oldest_upd); WT_ASSERT(session, oldest_upd->type == WT_UPDATE_STANDARD || oldest_upd->type == WT_UPDATE_TOMBSTONE); /* - * Fix the history store record here if the oldest update is a tombstone because we don't - * have the cursor placed at the correct place to fix the history store records when - * inserting the first update and it may be skipped if there is nothing to insert to the - * history store. + * Fix the history store record here if the oldest update is a mixed mode tombstone. This + * situation is possible only when the tombstone is globally visible. Delete any updates of + * the key in the history store with higher timestamp. */ - if (oldest_upd->type == WT_UPDATE_TOMBSTONE) { - if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < oldest_upd->start_ts) - fix_ts_upd = out_of_order_ts_upd; - else - fix_ts_upd = oldest_upd; + if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd->start_ts == WT_TS_NONE) { + WT_ERR(__wt_hs_delete_key_from_ts( + session, hs_cursor, btree->id, key, false, error_on_mm_ts)); - if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) { - /* Delete and reinsert any update of the key with a higher timestamp. */ - WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, - fix_ts_upd->start_ts + 1, true, false, error_on_ooo_ts)); - F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS); - } + /* + * Reset the non timestamp update if it is the last update in the chain. Inserting this + * update to the history store will reset any higher timestamps of the key. + */ + if (oldest_upd == non_ts_upd) + non_ts_upd = NULL; } /* Skip if we have nothing to insert to the history store. */ @@ -560,7 +540,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult /* Construct the oldest full update. */ WT_ERR(__hs_next_upd_full_value(session, &updates, NULL, full_value, &upd)); - hs_inserted = false; /* @@ -579,14 +558,16 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult tombstone = NULL; __wt_update_vector_peek(&updates, &prev_upd); - if (out_of_order_ts_updates.size > 0) { - __wt_update_vector_peek(&out_of_order_ts_updates, &out_of_order_ts_upd); - } else - out_of_order_ts_upd = NULL; + /* + * Reset the non timestamped update pointer once all the previous updates are inserted + * into the history store. + */ + if (upd == non_ts_upd) + non_ts_upd = NULL; - if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < upd->start_ts) { - tw.durable_start_ts = out_of_order_ts_upd->durable_ts; - tw.start_ts = out_of_order_ts_upd->start_ts; + if (non_ts_upd != NULL) { + tw.durable_start_ts = WT_TS_NONE; + tw.start_ts = WT_TS_NONE; } else { tw.durable_start_ts = upd->durable_ts; tw.start_ts = upd->start_ts; @@ -612,27 +593,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult */ WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts); - /* - * Pop from the out of order timestamp updates stack if the previous update or the - * current update is at the head of the stack. We need to check both cases because - * if there is a tombstone older than the out of order timestamp, we would not pop - * it because we skip the tombstone. Pop it when we are inserting it instead. - * - * Here it is assumed that the out of order update is equal to the oldest update - * among the multiple out of order consecutive updates that have same timestamps. - * For instance, U1@10 -> U2@10 -> U3@10 -> U4@20, U3 which is the oldest update - * will be the out of order update. - */ - if (out_of_order_ts_upd != NULL && - (out_of_order_ts_upd == prev_upd || out_of_order_ts_upd == upd)) { - __wt_update_vector_pop(&out_of_order_ts_updates, &out_of_order_ts_upd); - out_of_order_ts_upd = NULL; - } - - if (out_of_order_ts_upd != NULL && - out_of_order_ts_upd->start_ts < prev_upd->start_ts) { - tw.durable_stop_ts = out_of_order_ts_upd->durable_ts; - tw.stop_ts = out_of_order_ts_upd->start_ts; + if (non_ts_upd != NULL) { + tw.durable_stop_ts = WT_TS_NONE; + tw.stop_ts = WT_TS_NONE; } else { tw.durable_stop_ts = prev_upd->durable_ts; tw.stop_ts = prev_upd->start_ts; @@ -643,6 +606,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult tombstone = prev_upd; } + /* + * Reset the non timestamped update pointer once all the previous updates are inserted + * into the history store. + */ + if (prev_upd == non_ts_upd) + non_ts_upd = NULL; + WT_ERR( __hs_next_upd_full_value(session, &updates, full_value, prev_full_value, &prev_upd)); @@ -705,14 +675,14 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult entries, &nentries) == 0) { WT_ERR(__wt_modify_pack(hs_cursor, entries, nentries, &modify_value)); WT_ERR(__hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_MODIFY, - modify_value, &tw, error_on_ooo_ts)); + modify_value, &tw, error_on_mm_ts)); ++cache_hs_insert_reverse_modify; __wt_scr_free(session, &modify_value); ++modify_cnt; } else { modify_cnt = 0; WT_ERR(__hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_STANDARD, - full_value, &tw, error_on_ooo_ts)); + full_value, &tw, error_on_mm_ts)); ++cache_hs_insert_full_update; } @@ -731,19 +701,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult if (upd == newest_hs) break; } - - /* - * In the case that the onpage value is an out of order timestamp update and the update - * older than it is a tombstone, it remains in the stack. - */ - WT_ASSERT(session, out_of_order_ts_updates.size <= 1); -#ifdef HAVE_DIAGNOSTIC - if (out_of_order_ts_updates.size == 1) { - __wt_update_vector_peek(&out_of_order_ts_updates, &upd); - WT_ASSERT(session, - upd->txnid == list->onpage_upd->txnid && upd->start_ts == list->onpage_upd->start_ts); - } -#endif } WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size)); @@ -767,7 +724,6 @@ err: if (modify_value != NULL) __wt_scr_free(session, &modify_value); __wt_update_vector_free(&updates); - __wt_update_vector_free(&out_of_order_ts_updates); __wt_scr_free(session, &full_value); __wt_scr_free(session, &prev_full_value); @@ -790,7 +746,7 @@ err: */ int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts) + const WT_ITEM *key, bool reinsert, bool error_on_mm_ts) { WT_DECL_RET; WT_ITEM hs_key; @@ -799,15 +755,9 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3 uint32_t hs_btree_id; bool hs_read_all_flag; - /* - * If we delete all the updates of the key from the history store, we should not reinsert any - * update except when an out-of-order tombstone is not globally visible yet. - */ - WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert); - hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL); - hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts); + hs_cursor->set_key(hs_cursor, 3, btree_id, key, WT_TS_NONE); /* * Setting the flag WT_CURSTD_HS_READ_ALL before searching the history store optimizes the * search routine as we do not skip globally visible tombstones during the search. @@ -823,8 +773,8 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3 ++hs_counter; } - WT_ERR(__hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert, - ooo_tombstone, error_on_ooo_ts, &hs_counter)); + WT_ERR(__hs_delete_reinsert_from_pos( + session, hs_cursor, btree_id, key, WT_TS_NONE, reinsert, true, error_on_mm_ts, &hs_counter)); done: err: @@ -842,7 +792,7 @@ err: */ static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts, + const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool mm_tombstone, bool error_on_mm_ts, uint64_t *counter) { WT_CURSOR *hs_insert_cursor; @@ -871,9 +821,9 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui /* * If we delete all the updates of the key from the history store, we should not reinsert any - * update except when an out-of-order tombstone is not globally visible yet. + * update except when a mixed mode tombstone is not globally visible yet. */ - WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert); + WT_ASSERT(session, mm_tombstone || ts > WT_TS_NONE || !reinsert); for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { /* Ignore records that are obsolete. */ @@ -902,37 +852,24 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui WT_ERR(ret); /* - * Fail the eviction if we detect out of order timestamps when we've passed the error return - * flag. We cannot modify the history store to fix the out of order timestamp updates as it may - * make the history store checkpoint inconsistent. + * Fail the eviction if we detect mixed mode timestamps when we've passed the error return flag. + * We cannot modify the history store to fix the mixed mode timestamp updates as it may make the + * history store checkpoint inconsistent. */ - if (error_on_ooo_ts) { + if (error_on_mm_ts) { ret = EBUSY; - WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); + WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_mm_ts); goto err; } /* - * The goal of this function is to move out-of-order content to maintain ordering in the + * The goal of this function is to move mixed mode timestamp content to maintain ordering in the * history store. We do this by removing content with higher timestamps and reinserting it - * behind (from search's point of view) the newly inserted update. Even though these updates - * will all have the same timestamp, they cannot be discarded since older readers may need to - * see them after they've been moved due to their transaction id. - * - * For example, if we're inserting an update at timestamp 3 with value ddd: - * btree key ts counter value stop_ts - * 2 foo 5 0 aaa 6 - * 2 foo 6 0 bbb 7 - * 2 foo 7 0 ccc 8 - * - * We want to end up with this: - * btree key ts counter value stop_ts - * 2 foo 3 0 aaa 3 - * 2 foo 3 1 bbb 3 - * 2 foo 3 2 ccc 3 - * 2 foo 3 3 ddd 3 + * with zero timestamp (from search's point of view) the newly inserted update. Even though + * these updates will all have the same timestamp, they cannot be discarded since older readers + * may need to see them after they've been moved due to their transaction id. * - * Another example, if we're inserting an update at timestamp 0 with value ddd: + * For example, if we're inserting an update at timestamp 0 with value ddd: * btree key ts counter value stop_ts * 2 foo 5 0 aaa 6 * 2 foo 6 0 bbb 7 @@ -945,19 +882,19 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui * 2 foo 0 2 ccc 0 * 2 foo 0 3 ddd 0 * - * Another example, if we're inserting an update at timestamp 3 with value ddd - * that is an out of order with a stop timestamp of 6: + * Another example, if we're inserting an update at timestamp 0 with value ddd + * that is an mixed mode update with a stop timestamp of 6: * btree key ts counter value stop_ts - * 2 foo 1 0 aaa 6 + * 2 foo 0 0 aaa 6 * 2 foo 6 0 bbb 7 * 2 foo 7 0 ccc 8 * * We want to end up with this: * btree key ts counter value stop_ts - * 2 foo 1 1 aaa 3 - * 2 foo 3 2 bbb 3 - * 2 foo 3 3 ccc 3 - * 2 foo 3 4 ddd 3 + * 2 foo 0 1 aaa 0 + * 2 foo 0 2 bbb 0 + * 2 foo 0 3 ccc 0 + * 2 foo 0 4 ddd 0 */ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { /* We shouldn't have crossed the btree and user key search space. */ @@ -968,14 +905,14 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui WT_ASSERT(session, cmp == 0); #endif /* - * If we got here, we've got out-of-order updates in the history store. + * If we got here, we've got mixed mode updates in the history store. * * Our strategy to rectify this is to remove all records for the same key with a timestamp - * higher or equal than the specified timestamp and reinsert them at the smaller timestamp, + * higher or equal than the specified timestamp and reinsert them at the zero timestamp, * which is the timestamp of the update we are about to insert to the history store. * * It is possible that the cursor next call can find an update that was reinserted when it - * had an out of order tombstone with respect to the new update. Continue the search by + * had an mixed mode tombstone with respect to the new update. Continue the search by * ignoring them. */ __wt_hs_upd_time_window(hs_cursor, &twp); @@ -1000,7 +937,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui ++cache_hs_order_lose_durable_timestamp; __wt_verbose(session, WT_VERB_TIMESTAMP, - "fixing existing out-of-order updates by moving them; start_ts=%s, " + "fixing existing mixed mode updates by moving them; start_ts=%s, " "durable_start_ts=%s, " "stop_ts=%s, durable_stop_ts=%s, new_ts=%s", __wt_timestamp_to_string(hs_cbt->upd_value->tw.start_ts, ts_string[0]), @@ -1010,11 +947,11 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui __wt_timestamp_to_string(ts, ts_string[4])); /* - * Use the original start time window's timestamps if it isn't out of order with respect - * to the new update. + * Use the original start time window's timestamps if it's timestamp is less than to the + * mixed mode new update. */ if (hs_cbt->upd_value->tw.start_ts >= ts) - hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ooo_tombstone ? ts : ts - 1; + hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = mm_tombstone ? ts : ts - 1; else { hs_insert_tw.start_ts = hs_cbt->upd_value->tw.start_ts; hs_insert_tw.durable_start_ts = hs_cbt->upd_value->tw.durable_start_ts; @@ -1026,7 +963,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui * another moved update OR the update itself triggered the correction. In either case, * we should preserve the stop transaction id. */ - hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ooo_tombstone ? ts : ts - 1; + hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = mm_tombstone ? ts : ts - 1; hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn; /* Extract the underlying value for reinsertion. */ @@ -1034,7 +971,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value)); /* Reinsert the update with corrected timestamps. */ - if (ooo_tombstone && hs_ts == ts) + if (mm_tombstone && hs_ts == ts) *counter = hs_counter; /* Insert the value back with different timestamps. */ @@ -1048,7 +985,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui ++cache_hs_order_reinsert; } - /* Delete the out-of-order entry. */ + /* Delete the mixed mode entry. */ WT_ERR(hs_cursor->remove(hs_cursor)); ++cache_hs_order_remove; } diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 8eb9b1c3f49..68c68872900 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -1195,12 +1195,11 @@ struct __wt_update { /* AUTOMATIC FLAG VALUE GENERATION START 0 */ #define WT_UPDATE_DS 0x01u /* Update has been written to the data store. */ -#define WT_UPDATE_FIXED_HS 0x02u /* Update that fixed the history store. */ -#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */ -#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */ -#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */ -#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */ -#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */ +#define WT_UPDATE_HS 0x02u /* Update has been written to history store. */ +#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x04u /* Prepared update restored from data store. */ +#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x08u /* Fast truncate instantiation */ +#define WT_UPDATE_RESTORED_FROM_DS 0x10u /* Update restored from data store. */ +#define WT_UPDATE_RESTORED_FROM_HS 0x20u /* Update restored from history store. */ /* AUTOMATIC FLAG VALUE GENERATION STOP 8 */ uint8_t flags; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index a9d60a50efc..d83f3be685b 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -774,8 +774,8 @@ extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM * extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, - uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, - bool error_on_ooo_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + uint32_t btree_id, const WT_ITEM *key, bool reinsert, bool error_on_mm_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key, const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, WT_ITEM *base_value_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -1236,9 +1236,8 @@ extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, - wt_timestamp_t ts, uint64_t recno, WT_ITEM *rowkey, bool reinsert) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t recno, + WT_ITEM *rowkey, bool reinsert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index d9df3ef8ffa..3b5e9e427ac 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -309,8 +309,8 @@ typedef struct { WT_TIME_WINDOW tw; - bool upd_saved; /* An element on the row's update chain was saved */ - bool ooo_tombstone; /* Out-of-order tombstone */ + bool upd_saved; /* An element on the row's update chain was saved */ + bool mm_tombstone; /* Mixed mode tombstone */ } WT_UPDATE_SELECT; /* diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 090b7a932d5..91e5ae7f0f5 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -396,10 +396,10 @@ struct __wt_connection_stats { int64_t cache_eviction_get_ref_empty2; int64_t cache_eviction_aggressive_set; int64_t cache_eviction_empty_score; - int64_t cache_eviction_blocked_ooo_checkpoint_race_1; - int64_t cache_eviction_blocked_ooo_checkpoint_race_2; - int64_t cache_eviction_blocked_ooo_checkpoint_race_3; - int64_t cache_eviction_blocked_ooo_checkpoint_race_4; + int64_t cache_eviction_blocked_mm_checkpoint_race_1; + int64_t cache_eviction_blocked_mm_checkpoint_race_2; + int64_t cache_eviction_blocked_mm_checkpoint_race_3; + int64_t cache_eviction_blocked_mm_checkpoint_race_4; int64_t cache_eviction_walk_passes; int64_t cache_eviction_queue_empty; int64_t cache_eviction_queue_not_empty; @@ -453,9 +453,9 @@ struct __wt_connection_stats { int64_t cache_hs_insert; int64_t cache_hs_insert_restart; int64_t cache_hs_ondisk_max; - int64_t cache_hs_ondisk; int64_t cache_hs_order_lose_durable_timestamp; int64_t cache_hs_order_reinsert; + int64_t cache_hs_ondisk; int64_t cache_hs_read; int64_t cache_hs_read_miss; int64_t cache_hs_read_squash; @@ -498,7 +498,7 @@ struct __wt_connection_stats { int64_t cache_eviction_fail; int64_t cache_eviction_fail_active_children_on_an_internal_page; int64_t cache_eviction_fail_in_reconciliation; - int64_t cache_eviction_fail_checkpoint_out_of_order_ts; + int64_t cache_eviction_fail_checkpoint_mm_ts; int64_t cache_eviction_walk; int64_t cache_write; int64_t cache_write_restore; @@ -906,10 +906,10 @@ struct __wt_dsrc_stats { int64_t cache_eviction_checkpoint; int64_t cache_eviction_blocked_checkpoint_hs; int64_t cache_eviction_fail; - int64_t cache_eviction_blocked_ooo_checkpoint_race_1; - int64_t cache_eviction_blocked_ooo_checkpoint_race_2; - int64_t cache_eviction_blocked_ooo_checkpoint_race_3; - int64_t cache_eviction_blocked_ooo_checkpoint_race_4; + int64_t cache_eviction_blocked_mm_checkpoint_race_1; + int64_t cache_eviction_blocked_mm_checkpoint_race_2; + int64_t cache_eviction_blocked_mm_checkpoint_race_3; + int64_t cache_eviction_blocked_mm_checkpoint_race_4; int64_t cache_eviction_walk_passes; int64_t cache_eviction_target_page_lt10; int64_t cache_eviction_target_page_lt32; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index f27e0e79d94..fe4e72ba22d 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5312,25 +5312,25 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); /*! cache: eviction empty score */ #define WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE 1063 /*! - * cache: eviction gave up due to detecting an out of order on disk value + * cache: eviction gave up due to detecting a mixed mode on disk value * behind the last update on the chain */ -#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_1 1064 +#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_1 1064 /*! - * cache: eviction gave up due to detecting an out of order tombstone - * ahead of the selected on disk update + * cache: eviction gave up due to detecting a mixed mode tombstone ahead + * of the selected on disk update */ -#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_2 1065 +#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_2 1065 /*! - * cache: eviction gave up due to detecting an out of order tombstone - * ahead of the selected on disk update after validating the update chain + * cache: eviction gave up due to detecting a mixed mode tombstone ahead + * of the selected on disk update after validating the update chain */ -#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_3 1066 +#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_3 1066 /*! - * cache: eviction gave up due to detecting out of order timestamps on - * the update chain after the selected on disk update + * cache: eviction gave up due to detecting mixed mode timestamps on the + * update chain after the selected on disk update */ -#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_4 1067 +#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_4 1067 /*! cache: eviction passes of a file */ #define WT_STAT_CONN_CACHE_EVICTION_WALK_PASSES 1068 /*! cache: eviction server candidate queue empty when topping up */ @@ -5467,18 +5467,18 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1119 /*! cache: history store table max on-disk size */ #define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1120 -/*! cache: history store table on-disk size */ -#define WT_STAT_CONN_CACHE_HS_ONDISK 1121 /*! - * cache: history store table out-of-order resolved updates that lose - * their durable timestamp + * cache: history store table mixed mode resolved updates that lose their + * durable timestamp */ -#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1122 +#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1121 /*! - * cache: history store table out-of-order updates that were fixed up by + * cache: history store table mixed mode updates that were fixed up by * reinserting with the fixed timestamp */ -#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1123 +#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1122 +/*! cache: history store table on-disk size */ +#define WT_STAT_CONN_CACHE_HS_ONDISK 1123 /*! cache: history store table reads */ #define WT_STAT_CONN_CACHE_HS_READ 1124 /*! cache: history store table reads missed */ @@ -5504,7 +5504,7 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1130 /*! * cache: history store table truncation to remove range of updates due - * to out-of-order timestamp update on data page + * to mixed mode timestamp update on data page */ #define WT_STAT_CONN_CACHE_HS_ORDER_REMOVE 1131 /*! cache: history store table writes requiring squashed modifies */ @@ -5586,9 +5586,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1165 /*! * cache: pages selected for eviction unable to be evicted because of - * race between checkpoint and out of order timestamps handling + * race between checkpoint and mixed mode timestamps handling */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL_CHECKPOINT_OUT_OF_ORDER_TS 1166 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL_CHECKPOINT_MM_TS 1166 /*! cache: pages walked for eviction */ #define WT_STAT_CONN_CACHE_EVICTION_WALK 1167 /*! cache: pages written from cache */ @@ -6565,25 +6565,25 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); /*! cache: data source pages selected for eviction unable to be evicted */ #define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2051 /*! - * cache: eviction gave up due to detecting an out of order on disk value + * cache: eviction gave up due to detecting a mixed mode on disk value * behind the last update on the chain */ -#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_1 2052 +#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_1 2052 /*! - * cache: eviction gave up due to detecting an out of order tombstone - * ahead of the selected on disk update + * cache: eviction gave up due to detecting a mixed mode tombstone ahead + * of the selected on disk update */ -#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_2 2053 +#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_2 2053 /*! - * cache: eviction gave up due to detecting an out of order tombstone - * ahead of the selected on disk update after validating the update chain + * cache: eviction gave up due to detecting a mixed mode tombstone ahead + * of the selected on disk update after validating the update chain */ -#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_3 2054 +#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_3 2054 /*! - * cache: eviction gave up due to detecting out of order timestamps on - * the update chain after the selected on disk update + * cache: eviction gave up due to detecting mixed mode timestamps on the + * update chain after the selected on disk update */ -#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_OOO_CHECKPOINT_RACE_4 2055 +#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_MM_CHECKPOINT_RACE_4 2055 /*! cache: eviction walk passes of a file */ #define WT_STAT_DSRC_CACHE_EVICTION_WALK_PASSES 2056 /*! cache: eviction walk target pages histogram - 0-9 */ @@ -6630,12 +6630,12 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); /*! cache: history store table insert calls that returned restart */ #define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2073 /*! - * cache: history store table out-of-order resolved updates that lose - * their durable timestamp + * cache: history store table mixed mode resolved updates that lose their + * durable timestamp */ #define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2074 /*! - * cache: history store table out-of-order updates that were fixed up by + * cache: history store table mixed mode updates that were fixed up by * reinserting with the fixed timestamp */ #define WT_STAT_DSRC_CACHE_HS_ORDER_REINSERT 2075 @@ -6664,7 +6664,7 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 2082 /*! * cache: history store table truncation to remove range of updates due - * to out-of-order timestamp update on data page + * to mixed mode timestamp update on data page */ #define WT_STAT_DSRC_CACHE_HS_ORDER_REMOVE 2083 /*! cache: history store table writes requiring squashed modifies */ diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c index 166cdc9a871..489ae9e0677 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_col.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c @@ -764,12 +764,11 @@ __wt_rec_col_fix( if (upd->type == WT_UPDATE_TOMBSTONE) { /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, remove any - * historical versions that are greater in the history store for this key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for this key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, upd_select.tw.durable_stop_ts, recno, NULL, false)); + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, recno, NULL, false)); val = 0; @@ -782,12 +781,11 @@ __wt_rec_col_fix( /* Write the time window. */ if (!WT_TIME_WINDOW_IS_EMPTY(&upd_select.tw)) { /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, remove - * any historical versions that are greater in the history store for this key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for this key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, upd_select.tw.durable_stop_ts, recno, NULL, true)); + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, recno, NULL, true)); WT_ERR(__wt_rec_col_fix_addtw( session, r, (uint32_t)(recno - curstartrecno), &upd_select.tw)); @@ -1422,24 +1420,20 @@ record_loop: data = upd->data; size = upd->size; /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, - * remove any historical versions that are greater in the history store for this - * key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for this key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, twp->durable_stop_ts, src_recno, NULL, true)); + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, src_recno, NULL, true)); break; case WT_UPDATE_TOMBSTONE: /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, - * remove any historical versions that are greater in the history store for this - * key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for this key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, twp->durable_stop_ts, src_recno, NULL, false)); + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, src_recno, NULL, false)); deleted = true; twp = &clear_tw; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index ce944eaf5f6..2400531c8e5 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -829,13 +829,12 @@ __wt_rec_row_leaf( /* Take the value from the update. */ WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, twp, 0)); /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, remove - * any historical versions that are greater in the history store for that key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for that key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) { + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) { WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true)); - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, true)); + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, WT_RECNO_OOB, tmpkey, true)); } dictionary = true; break; @@ -861,13 +860,12 @@ __wt_rec_row_leaf( } /* - * When an out-of-order or mixed-mode tombstone is getting written to disk, remove - * any historical versions that are greater in the history store for this key. + * When a mixed mode tombstone is getting written to disk, remove any historical + * versions that are greater in the history store for this key. */ - if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) { + if (upd_select.mm_tombstone && r->hs_clear_on_tombstone) { WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true)); - WT_ERR(__wt_rec_hs_clear_on_tombstone( - session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, false)); + WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, WT_RECNO_OOB, tmpkey, false)); } /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 3266e56110d..2a597ef976c 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -214,17 +214,17 @@ __rec_need_save_upd( } /* - * __timestamp_out_of_order_fix -- + * __timestamp_mm_fix -- * If we found a tombstone with a time point earlier than the update it applies to, which can - * happen if the application performs operations with timestamps out-of-order, make it invisible + * happen if the application performs operations with timestamps mixed mode, make it invisible * by making the start time point match the stop time point of the tombstone. We don't guarantee * that older readers will be able to continue reading content that has been made invisible by - * out-of-order updates. Note that we carefully don't take this path when the stop time point is + * mixed mode updates. Note that we carefully don't take this path when the stop time point is * equal to the start time point. While unusual, it is permitted for a single transaction to * insert and then remove a record. We don't want to generate a warning in that case. */ static inline bool -__timestamp_out_of_order_fix(WT_SESSION_IMPL *session, WT_TIME_WINDOW *select_tw) +__timestamp_mm_fix(WT_SESSION_IMPL *session, WT_TIME_WINDOW *select_tw) { char time_string[WT_TIME_STRING_SIZE]; @@ -235,8 +235,9 @@ __timestamp_out_of_order_fix(WT_SESSION_IMPL *session, WT_TIME_WINDOW *select_tw WT_ASSERT(session, select_tw->stop_txn >= select_tw->start_txn); if (select_tw->stop_ts < select_tw->start_ts) { + WT_ASSERT(session, select_tw->stop_ts == WT_TS_NONE); __wt_verbose(session, WT_VERB_TIMESTAMP, - "Warning: fixing out-of-order timestamps remove earlier than value; time window %s", + "Warning: fixing mixed mode timestamps remove earlier than value; time window %s", __wt_time_window_to_string(select_tw, time_string)); select_tw->durable_start_ts = select_tw->durable_stop_ts; @@ -266,15 +267,15 @@ __rec_validate_upd_chain(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *s return (0); /* - * No need to check out of order timestamps for any reconciliation that doesn't involve history + * No need to check mixed mode timestamps for any reconciliation that doesn't involve history * store (in-memory database, metadata, and history store reconciliation itself). */ if (!F_ISSET(r, WT_REC_HS)) return (0); /* - * If eviction reconciliation starts before checkpoint, it is fine to evict out of order - * timestamp updates. + * If eviction reconciliation starts before checkpoint, it is fine to evict mixed mode timestamp + * updates. */ if (!F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)) return (0); @@ -285,7 +286,8 @@ __rec_validate_upd_chain(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *s * of the selected update. */ if (select_tw->stop_ts < select_tw->start_ts) { - WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_ooo_checkpoint_race_2); + WT_ASSERT(session, select_tw->stop_ts == WT_TS_NONE); + WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_mm_checkpoint_race_2); return (EBUSY); } @@ -315,7 +317,8 @@ __rec_validate_upd_chain(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *s /* Validate that the updates older than us have older timestamps. */ if (prev_upd->start_ts < upd->start_ts) { - WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_ooo_checkpoint_race_4); + WT_ASSERT(session, prev_upd->start_ts == WT_TS_NONE); + WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_mm_checkpoint_race_4); return (EBUSY); } @@ -337,7 +340,7 @@ __rec_validate_upd_chain(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *s } /* - * Check that the on-page time window isn't out-of-order. Don't check against ondisk prepared + * Check that the on-page time window isn't mixed mode. Don't check against ondisk prepared * update. It is either committed or rolled back if we are here. If we haven't seen an update * with the flag WT_UPDATE_RESTORED_FROM_DS we check against the ondisk value. * @@ -357,7 +360,8 @@ __rec_validate_upd_chain(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *s prev_upd->durable_ts >= vpack->tw.durable_stop_ts); if (prev_upd->start_ts < vpack->tw.start_ts || (WT_TIME_WINDOW_HAS_STOP(&vpack->tw) && prev_upd->start_ts < vpack->tw.stop_ts)) { - WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_ooo_checkpoint_race_1); + WT_ASSERT(session, prev_upd->start_ts == WT_TS_NONE); + WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_mm_checkpoint_race_1); return (EBUSY); } } @@ -387,7 +391,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W */ upd_select->upd = NULL; upd_select->upd_saved = false; - upd_select->ooo_tombstone = false; + upd_select->mm_tombstone = false; select_tw = &upd_select->tw; WT_TIME_WINDOW_INIT(select_tw); @@ -687,8 +691,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W WT_RET(__rec_validate_upd_chain(session, r, onpage_upd, select_tw, vpack)); /* - * Set the flag if the selected tombstone is an out-of-order or mixed mode to an update. Based - * on this flag, the caller functions perform the history store truncation for this key. + * Set the flag if the selected tombstone is a mixed mode to an update. Based on this flag, the + * caller functions perform the history store truncation for this key. */ if (!is_hs_page && tombstone != NULL && !F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) { @@ -697,7 +701,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W /* * The selected update can be the tombstone itself when the tombstone is globally visible. * Compare the tombstone's timestamp with either the next update in the update list or the - * on-disk cell timestamp to determine if the tombstone is an out-of-order or mixed mode. + * on-disk cell timestamp to determine if the tombstone is a mixed mode. */ if (tombstone == upd) { upd = upd->next; @@ -709,20 +713,20 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W if ((upd != NULL && upd->start_ts > tombstone->start_ts) || (vpack != NULL && vpack->tw.start_ts > tombstone->start_ts)) - upd_select->ooo_tombstone = true; + upd_select->mm_tombstone = true; } /* - * Fixup any out of order timestamps, assert that checkpoint wasn't running when this round of + * Fixup any mixed mode timestamps, assert that checkpoint wasn't running when this round of * reconciliation started. * * Returning EBUSY here is okay as the previous call to validate the update chain wouldn't have * caught the situation where only a tombstone is selected. */ - if (__timestamp_out_of_order_fix(session, select_tw) && F_ISSET(r, WT_REC_HS) && + if (__timestamp_mm_fix(session, select_tw) && F_ISSET(r, WT_REC_HS) && F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)) { /* Catch this case in diagnostic builds. */ - WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_ooo_checkpoint_race_3); + WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_mm_checkpoint_race_3); WT_ASSERT(session, false); WT_RET(EBUSY); } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 8d5cf93255e..fad9e8fd3ad 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -2717,8 +2717,8 @@ err: * history store contents associated with that key. */ int -__wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t ts, - uint64_t recno, WT_ITEM *rowkey, bool reinsert) +__wt_rec_hs_clear_on_tombstone( + WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t recno, WT_ITEM *rowkey, bool reinsert) { WT_BTREE *btree; WT_ITEM hs_recno_key, *key; @@ -2746,13 +2746,12 @@ __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_tim /* * From WT_TS_NONE delete/reinsert all the history store content of the key. The test of * WT_REC_CHECKPOINT_RUNNING asks the function to fail with EBUSY if we are trying to evict an - * out of order or mixed-mode update while a checkpoint is in progress; such eviction can race - * with the checkpoint itself and lead to history store inconsistency. (Note: - * WT_REC_CHECKPOINT_RUNNING is set only during evictions, and never in the checkpoint thread - * itself.) + * mixed-mode update while a checkpoint is in progress; such eviction can race with the + * checkpoint itself and lead to history store inconsistency. (Note: WT_REC_CHECKPOINT_RUNNING + * is set only during evictions, and never in the checkpoint thread itself.) */ - WT_RET(__wt_hs_delete_key_from_ts(session, r->hs_cursor, btree->id, key, ts, reinsert, true, - F_ISSET(r, WT_REC_CHECKPOINT_RUNNING))); + WT_RET(__wt_hs_delete_key_from_ts( + session, r->hs_cursor, btree->id, key, reinsert, F_ISSET(r, WT_REC_CHECKPOINT_RUNNING))); /* Fail 0.01% of the time. */ if (F_ISSET(r, WT_REC_EVICT) && diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 47e87a8c4d4..e26093d3353 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -55,13 +55,13 @@ static const char *const __stats_dsrc_desc[] = { "cache: checkpoint blocked page eviction", "cache: checkpoint of history store file blocked non-history store page eviction", "cache: data source pages selected for eviction unable to be evicted", - "cache: eviction gave up due to detecting an out of order on disk value behind the last update " - "on the chain", - "cache: eviction gave up due to detecting an out of order tombstone ahead of the selected on " - "disk update", - "cache: eviction gave up due to detecting an out of order tombstone ahead of the selected on " - "disk update after validating the update chain", - "cache: eviction gave up due to detecting out of order timestamps on the update chain after the " + "cache: eviction gave up due to detecting a mixed mode on disk value behind the last update on " + "the chain", + "cache: eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk " + "update", + "cache: eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk " + "update after validating the update chain", + "cache: eviction gave up due to detecting mixed mode timestamps on the update chain after the " "selected on disk update", "cache: eviction walk passes of a file", "cache: eviction walk target pages histogram - 0-9", @@ -81,9 +81,9 @@ static const char *const __stats_dsrc_desc[] = { "cache: hazard pointer blocked page eviction", "cache: history store table insert calls", "cache: history store table insert calls that returned restart", - "cache: history store table out-of-order resolved updates that lose their durable timestamp", - "cache: history store table out-of-order updates that were fixed up by reinserting with the " - "fixed timestamp", + "cache: history store table mixed mode resolved updates that lose their durable timestamp", + "cache: history store table mixed mode updates that were fixed up by reinserting with the fixed " + "timestamp", "cache: history store table reads", "cache: history store table reads missed", "cache: history store table reads requiring squashed modifies", @@ -92,7 +92,7 @@ static const char *const __stats_dsrc_desc[] = { "cache: history store table truncation to remove an update", "cache: history store table truncation to remove range of updates due to key being removed from " "the data page during reconciliation", - "cache: history store table truncation to remove range of updates due to out-of-order timestamp " + "cache: history store table truncation to remove range of updates due to mixed mode timestamp " "update on data page", "cache: history store table writes requiring squashed modifies", "cache: in-memory page passed criteria to be split", @@ -332,10 +332,10 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cache_eviction_checkpoint = 0; stats->cache_eviction_blocked_checkpoint_hs = 0; stats->cache_eviction_fail = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_1 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_2 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_3 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_4 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_1 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_2 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_3 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_4 = 0; stats->cache_eviction_walk_passes = 0; stats->cache_eviction_target_page_lt10 = 0; stats->cache_eviction_target_page_lt32 = 0; @@ -584,14 +584,14 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to) to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; to->cache_eviction_blocked_checkpoint_hs += from->cache_eviction_blocked_checkpoint_hs; to->cache_eviction_fail += from->cache_eviction_fail; - to->cache_eviction_blocked_ooo_checkpoint_race_1 += - from->cache_eviction_blocked_ooo_checkpoint_race_1; - to->cache_eviction_blocked_ooo_checkpoint_race_2 += - from->cache_eviction_blocked_ooo_checkpoint_race_2; - to->cache_eviction_blocked_ooo_checkpoint_race_3 += - from->cache_eviction_blocked_ooo_checkpoint_race_3; - to->cache_eviction_blocked_ooo_checkpoint_race_4 += - from->cache_eviction_blocked_ooo_checkpoint_race_4; + to->cache_eviction_blocked_mm_checkpoint_race_1 += + from->cache_eviction_blocked_mm_checkpoint_race_1; + to->cache_eviction_blocked_mm_checkpoint_race_2 += + from->cache_eviction_blocked_mm_checkpoint_race_2; + to->cache_eviction_blocked_mm_checkpoint_race_3 += + from->cache_eviction_blocked_mm_checkpoint_race_3; + to->cache_eviction_blocked_mm_checkpoint_race_4 += + from->cache_eviction_blocked_mm_checkpoint_race_4; to->cache_eviction_walk_passes += from->cache_eviction_walk_passes; to->cache_eviction_target_page_lt10 += from->cache_eviction_target_page_lt10; to->cache_eviction_target_page_lt32 += from->cache_eviction_target_page_lt32; @@ -835,14 +835,14 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to) to->cache_eviction_blocked_checkpoint_hs += WT_STAT_READ(from, cache_eviction_blocked_checkpoint_hs); to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail); - to->cache_eviction_blocked_ooo_checkpoint_race_1 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_1); - to->cache_eviction_blocked_ooo_checkpoint_race_2 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_2); - to->cache_eviction_blocked_ooo_checkpoint_race_3 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_3); - to->cache_eviction_blocked_ooo_checkpoint_race_4 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_4); + to->cache_eviction_blocked_mm_checkpoint_race_1 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_1); + to->cache_eviction_blocked_mm_checkpoint_race_2 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_2); + to->cache_eviction_blocked_mm_checkpoint_race_3 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_3); + to->cache_eviction_blocked_mm_checkpoint_race_4 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_4); to->cache_eviction_walk_passes += WT_STAT_READ(from, cache_eviction_walk_passes); to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10); to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32); @@ -1097,13 +1097,13 @@ static const char *const __stats_connection_desc[] = { "cache: eviction calls to get a page found queue empty after locking", "cache: eviction currently operating in aggressive mode", "cache: eviction empty score", - "cache: eviction gave up due to detecting an out of order on disk value behind the last update " - "on the chain", - "cache: eviction gave up due to detecting an out of order tombstone ahead of the selected on " - "disk update", - "cache: eviction gave up due to detecting an out of order tombstone ahead of the selected on " - "disk update after validating the update chain", - "cache: eviction gave up due to detecting out of order timestamps on the update chain after the " + "cache: eviction gave up due to detecting a mixed mode on disk value behind the last update on " + "the chain", + "cache: eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk " + "update", + "cache: eviction gave up due to detecting a mixed mode tombstone ahead of the selected on disk " + "update after validating the update chain", + "cache: eviction gave up due to detecting mixed mode timestamps on the update chain after the " "selected on disk update", "cache: eviction passes of a file", "cache: eviction server candidate queue empty when topping up", @@ -1161,10 +1161,10 @@ static const char *const __stats_connection_desc[] = { "cache: history store table insert calls", "cache: history store table insert calls that returned restart", "cache: history store table max on-disk size", + "cache: history store table mixed mode resolved updates that lose their durable timestamp", + "cache: history store table mixed mode updates that were fixed up by reinserting with the fixed " + "timestamp", "cache: history store table on-disk size", - "cache: history store table out-of-order resolved updates that lose their durable timestamp", - "cache: history store table out-of-order updates that were fixed up by reinserting with the " - "fixed timestamp", "cache: history store table reads", "cache: history store table reads missed", "cache: history store table reads requiring squashed modifies", @@ -1173,7 +1173,7 @@ static const char *const __stats_connection_desc[] = { "cache: history store table truncation to remove an update", "cache: history store table truncation to remove range of updates due to key being removed from " "the data page during reconciliation", - "cache: history store table truncation to remove range of updates due to out-of-order timestamp " + "cache: history store table truncation to remove range of updates due to mixed mode timestamp " "update on data page", "cache: history store table writes requiring squashed modifies", "cache: in-memory page passed criteria to be split", @@ -1211,7 +1211,7 @@ static const char *const __stats_connection_desc[] = { "internal page", "cache: pages selected for eviction unable to be evicted because of failure in reconciliation", "cache: pages selected for eviction unable to be evicted because of race between checkpoint and " - "out of order timestamps handling", + "mixed mode timestamps handling", "cache: pages walked for eviction", "cache: pages written from cache", "cache: pages written requiring in-memory restoration", @@ -1668,10 +1668,10 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_get_ref_empty2 = 0; /* not clearing cache_eviction_aggressive_set */ /* not clearing cache_eviction_empty_score */ - stats->cache_eviction_blocked_ooo_checkpoint_race_1 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_2 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_3 = 0; - stats->cache_eviction_blocked_ooo_checkpoint_race_4 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_1 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_2 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_3 = 0; + stats->cache_eviction_blocked_mm_checkpoint_race_4 = 0; stats->cache_eviction_walk_passes = 0; stats->cache_eviction_queue_empty = 0; stats->cache_eviction_queue_not_empty = 0; @@ -1725,9 +1725,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_hs_insert = 0; stats->cache_hs_insert_restart = 0; /* not clearing cache_hs_ondisk_max */ - /* not clearing cache_hs_ondisk */ stats->cache_hs_order_lose_durable_timestamp = 0; stats->cache_hs_order_reinsert = 0; + /* not clearing cache_hs_ondisk */ stats->cache_hs_read = 0; stats->cache_hs_read_miss = 0; stats->cache_hs_read_squash = 0; @@ -1770,7 +1770,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_fail = 0; stats->cache_eviction_fail_active_children_on_an_internal_page = 0; stats->cache_eviction_fail_in_reconciliation = 0; - stats->cache_eviction_fail_checkpoint_out_of_order_ts = 0; + stats->cache_eviction_fail_checkpoint_mm_ts = 0; stats->cache_eviction_walk = 0; stats->cache_write = 0; stats->cache_write_restore = 0; @@ -2200,14 +2200,14 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->cache_eviction_get_ref_empty2 += WT_STAT_READ(from, cache_eviction_get_ref_empty2); to->cache_eviction_aggressive_set += WT_STAT_READ(from, cache_eviction_aggressive_set); to->cache_eviction_empty_score += WT_STAT_READ(from, cache_eviction_empty_score); - to->cache_eviction_blocked_ooo_checkpoint_race_1 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_1); - to->cache_eviction_blocked_ooo_checkpoint_race_2 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_2); - to->cache_eviction_blocked_ooo_checkpoint_race_3 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_3); - to->cache_eviction_blocked_ooo_checkpoint_race_4 += - WT_STAT_READ(from, cache_eviction_blocked_ooo_checkpoint_race_4); + to->cache_eviction_blocked_mm_checkpoint_race_1 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_1); + to->cache_eviction_blocked_mm_checkpoint_race_2 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_2); + to->cache_eviction_blocked_mm_checkpoint_race_3 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_3); + to->cache_eviction_blocked_mm_checkpoint_race_4 += + WT_STAT_READ(from, cache_eviction_blocked_mm_checkpoint_race_4); to->cache_eviction_walk_passes += WT_STAT_READ(from, cache_eviction_walk_passes); to->cache_eviction_queue_empty += WT_STAT_READ(from, cache_eviction_queue_empty); to->cache_eviction_queue_not_empty += WT_STAT_READ(from, cache_eviction_queue_not_empty); @@ -2270,10 +2270,10 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->cache_hs_insert += WT_STAT_READ(from, cache_hs_insert); to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart); to->cache_hs_ondisk_max += WT_STAT_READ(from, cache_hs_ondisk_max); - to->cache_hs_ondisk += WT_STAT_READ(from, cache_hs_ondisk); to->cache_hs_order_lose_durable_timestamp += WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp); to->cache_hs_order_reinsert += WT_STAT_READ(from, cache_hs_order_reinsert); + to->cache_hs_ondisk += WT_STAT_READ(from, cache_hs_ondisk); to->cache_hs_read += WT_STAT_READ(from, cache_hs_read); to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss); to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash); @@ -2329,8 +2329,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * WT_STAT_READ(from, cache_eviction_fail_active_children_on_an_internal_page); to->cache_eviction_fail_in_reconciliation += WT_STAT_READ(from, cache_eviction_fail_in_reconciliation); - to->cache_eviction_fail_checkpoint_out_of_order_ts += - WT_STAT_READ(from, cache_eviction_fail_checkpoint_out_of_order_ts); + to->cache_eviction_fail_checkpoint_mm_ts += + WT_STAT_READ(from, cache_eviction_fail_checkpoint_mm_ts); to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk); to->cache_write += WT_STAT_READ(from, cache_write); to->cache_write_restore += WT_STAT_READ(from, cache_write_restore); diff --git a/src/third_party/wiredtiger/src/support/timestamp.c b/src/third_party/wiredtiger/src/support/timestamp.c index 0b6a008de9c..2a75c7f965e 100644 --- a/src/third_party/wiredtiger/src/support/timestamp.c +++ b/src/third_party/wiredtiger/src/support/timestamp.c @@ -270,9 +270,9 @@ __wt_time_aggregate_validate( __wt_time_aggregate_to_string(ta, time_string[0])); /* - * In the case of out of order timestamps, we assign the start point to the stop point and - * newest start durable timestamp may be larger than newest stop timestamp. Check whether start - * and stop are equal first. + * In the case of mixed mode timestamps, we assign the start point to the stop point and newest + * start durable timestamp may be larger than newest stop timestamp. Check whether start and + * stop are equal first. */ if (ta->newest_start_durable_ts != ta->newest_stop_durable_ts && ta->newest_start_durable_ts > ta->newest_stop_ts) @@ -429,7 +429,7 @@ __wt_time_value_validate( __wt_time_window_to_string(tw, time_string[0])); /* - * In the case of out of order timestamps, we assign start time point to the stop point and + * In the case of mixed mode timestamps, we assign start time point to the stop point and * durable start timestamp may be larger than stop timestamp. Check whether start and stop are * equal first. */ diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 96295cd4dd3..1443e7b28d3 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -848,22 +848,13 @@ __txn_timestamp_usage_check(WT_SESSION_IMPL *session, WT_TXN_OP *op, WT_UPDATE * name = btree->dhandle->name; txn_has_ts = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_HAS_TS_DURABLE); - /* - * Skip timestamp usage checks unless a usage configuration is set. - * - * FIXME: WT-9055 Once WT-9055 goes in, there are no more cases where usage configurations are - * not set, as ordered will be the default. - */ - if (!LF_ISSET(WT_DHANDLE_TS_MIXED_MODE | WT_DHANDLE_TS_NEVER | WT_DHANDLE_TS_ORDERED)) - return (0); - /* Timestamps are ignored on logged files. */ if (F_ISSET(btree, WT_BTREE_LOGGED)) return (0); /* * Do not check for timestamp usage in recovery. We don't expect recovery to be using timestamps - * when applying commits, and it is possible that timestamps may be out of order in log replay. + * when applying commits, and it is possible that timestamps may be mixed mode in log replay. */ if (F_ISSET(S2C(session), WT_CONN_RECOVERING)) return (0); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 9da2c938d08..f60f88d8e61 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -445,16 +445,16 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, /* * Verify the history store timestamps are in order. The start timestamp may be equal to the - * stop timestamp if the original update's commit timestamp is out of order. We may see - * records newer than or equal to the onpage value if eviction runs concurrently with - * checkpoint. In that case, don't verify the first record. + * stop timestamp if the original update's commit timestamp is in order. We may see records + * newer than or equal to the onpage value if eviction runs concurrently with checkpoint. In + * that case, don't verify the first record. * * It is possible during a prepared transaction rollback, the history store update that have * its own stop timestamp doesn't get removed leads to duplicate records in history store * after further operations on that same key. Rollback to stable should ignore such records * for timestamp ordering verification. * - * If we have fixed the out-of-order timestamps, then the newer update reinserted with an + * If we have fixed the mixed mode timestamps, then the newer update reinserted with an * older timestamp may have a durable timestamp that is smaller than the current stop * durable timestamp. * @@ -558,7 +558,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_ROW *rip, hs_stop_durable_ts <= rollback_timestamp) { /* * The restoring tombstone timestamp must be zero or less than previous update start - * timestamp or the on-disk update is an out of order prepared. + * timestamp. */ WT_ASSERT(session, hs_stop_durable_ts == WT_TS_NONE || hs_stop_durable_ts < newer_hs_durable_ts || diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h index f60dcbd1fc5..28e24a626d4 100644 --- a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h +++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h @@ -41,7 +41,7 @@ #define OPERATION_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(iS) #define OPERATION_TRACKING_TABLE_CONFIG \ "key_format=" OPERATION_TRACKING_KEY_FORMAT ",value_format=" OPERATION_TRACKING_VALUE_FORMAT \ - ",log=(enabled=true)" + ",log=(enabled=true),write_timestamp_usage=mixed_mode" /* * Default schema for tracking schema operations on collections (key_format: Collection id / @@ -51,7 +51,7 @@ #define SCHEMA_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(i) #define SCHEMA_TRACKING_TABLE_CONFIG \ "key_format=" SCHEMA_TRACKING_KEY_FORMAT ",value_format=" SCHEMA_TRACKING_VALUE_FORMAT \ - ",log=(enabled=true)" + ",log=(enabled=true),write_timestamp_usage=mixed_mode" namespace test_harness { /* Tracking operations. */ diff --git a/src/third_party/wiredtiger/test/suite/test_assert06.py b/src/third_party/wiredtiger/test/suite/test_assert06.py index 0ccac0d01fb..4f0dc6737f5 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert06.py +++ b/src/third_party/wiredtiger/test/suite/test_assert06.py @@ -71,7 +71,7 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): # Then alter the setting and verify the inconsistent usage is detected. uri = 'file:assert06' self.session.create(uri, - 'key_format={},value_format={}'.format(self.key_format, self.value_format)) + 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format)) c = self.session.open_cursor(uri) # Insert a data item at timestamp 2. @@ -81,10 +81,9 @@ class test_assert06(wttest.WiredTigerTestCase, suite_subprocess): self.apply_timestamps(2, True) self.session.commit_transaction() - # Modify the data item at timestamp 1, illegally moving the timestamp backward. + # Modify the data item at non timestamp, illegally moving the timestamp backward. self.session.begin_transaction() c[key] = ds.value(2) - self.apply_timestamps(1, True) self.session.commit_transaction() # Insert a non-timestamped item. diff --git a/src/third_party/wiredtiger/test/suite/test_cursor18.py b/src/third_party/wiredtiger/test/suite/test_cursor18.py index 2eb7028758f..8e1faf7a873 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor18.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor18.py @@ -309,7 +309,7 @@ class test_cursor18(wttest.WiredTigerTestCase): version_cursor.set_key(1) self.assertEquals(version_cursor.search(), 0) self.assertEquals(version_cursor.get_key(), 1) - self.verify_value(version_cursor, 1, 0, WT_TS_MAX, WT_TS_MAX, 3, 1, 8, 0, 0) + self.verify_value(version_cursor, 1, 0, WT_TS_MAX, WT_TS_MAX, 3, 1, 4, 0, 0) self.assertEquals(version_cursor.next(), 0) self.assertEquals(version_cursor.get_key(), 1) self.verify_value(version_cursor, 1, 1, 1, 0, 3, 1, 0, 1, 0) diff --git a/src/third_party/wiredtiger/test/suite/test_durable_ts01.py b/src/third_party/wiredtiger/test/suite/test_durable_ts01.py index 6d45643f54b..75db4db0c75 100644 --- a/src/third_party/wiredtiger/test/suite/test_durable_ts01.py +++ b/src/third_party/wiredtiger/test/suite/test_durable_ts01.py @@ -123,7 +123,7 @@ class test_durable_ts01(wttest.WiredTigerTestCase): self.assertEquals(cursor.update(), 0) self.assertEquals(cursor.next(), 0) - session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(200)) + session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(230)) # Set a stable timestamp so that first update value is durable. # (Must be done after preparing since preparing before stable is prohibited.) diff --git a/src/third_party/wiredtiger/test/suite/test_hs11.py b/src/third_party/wiredtiger/test/suite/test_hs11.py index 647a7bfece5..ceb18f14ec9 100755 --- a/src/third_party/wiredtiger/test/suite/test_hs11.py +++ b/src/third_party/wiredtiger/test/suite/test_hs11.py @@ -60,7 +60,7 @@ class test_hs11(wttest.WiredTigerTestCase): def test_non_ts_updates_clears_hs(self): uri = 'table:test_hs11' - create_params = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + create_params = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, create_params) if self.value_format == '8t': diff --git a/src/third_party/wiredtiger/test/suite/test_hs16.py b/src/third_party/wiredtiger/test/suite/test_hs16.py index edfa207a57d..d4d2e5006cd 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs16.py +++ b/src/third_party/wiredtiger/test/suite/test_hs16.py @@ -47,7 +47,7 @@ class test_hs16(wttest.WiredTigerTestCase): def test_hs16(self): uri = 'table:test_hs16' - create_params = 'key_format={}, value_format={}'.format(self.key_format, self.value_format) + create_params = 'key_format={}, value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, create_params) cursor = self.session.open_cursor(uri) diff --git a/src/third_party/wiredtiger/test/suite/test_hs18.py b/src/third_party/wiredtiger/test/suite/test_hs18.py index 1600431a974..1b4bbc9e666 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs18.py +++ b/src/third_party/wiredtiger/test/suite/test_hs18.py @@ -66,7 +66,7 @@ class test_hs18(wttest.WiredTigerTestCase): def test_base_scenario(self): uri = 'table:test_base_scenario' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) session2 = self.setUpSessionOpen(self.conn) cursor = self.session.open_cursor(uri) @@ -133,7 +133,7 @@ class test_hs18(wttest.WiredTigerTestCase): # Test that we don't get the wrong value if we read with a timestamp originally. def test_read_timestamp_weirdness(self): uri = 'table:test_hs18' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) cursor = self.session.open_cursor(uri) session2 = self.setUpSessionOpen(self.conn) @@ -204,7 +204,7 @@ class test_hs18(wttest.WiredTigerTestCase): # Test that forces us to ignore tombstone in order to not remove the first non timestamped updated. def test_ignore_tombstone(self): uri = 'table:test_ignore_tombstone' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) session2 = self.setUpSessionOpen(self.conn) cursor = self.session.open_cursor(uri) @@ -266,7 +266,7 @@ class test_hs18(wttest.WiredTigerTestCase): # Test older readers for each of the updates moved to the history store. def test_multiple_older_readers(self): uri = 'table:test_multiple_older_readers' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) cursor = self.session.open_cursor(uri) @@ -336,7 +336,7 @@ class test_hs18(wttest.WiredTigerTestCase): def test_multiple_older_readers_with_multiple_mixed_mode(self): uri = 'table:test_multiple_older_readers' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) cursor = self.session.open_cursor(uri) @@ -453,7 +453,7 @@ class test_hs18(wttest.WiredTigerTestCase): return uri = 'table:test_modifies' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) cursor = self.session.open_cursor(uri) session_ts_reader = self.setUpSessionOpen(self.conn) diff --git a/src/third_party/wiredtiger/test/suite/test_hs22.py b/src/third_party/wiredtiger/test/suite/test_hs22.py deleted file mode 100644 index 3cfc223a0b7..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_hs22.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -import wttest -from wtscenario import make_scenarios - -''' - test_hs22.py - Test the following cases with out of order(OOO) timestamps: - - OOO update followed by a tombstone. - - Multiple OOO updates in a single transaction. - - Most recent OOO updates that require squashing. -''' -class test_hs22(wttest.WiredTigerTestCase): - conn_config = 'cache_size=50MB' - - format_values = [ - ('column', dict(key_format='r', key1=1, key2=2, value_format='S')), - ('column-fix', dict(key_format='r', key1=1, key2=2, value_format='8t')), - ('string-row', dict(key_format='S', key1=str(0), key2=str(1), value_format='S')), - ] - - scenarios = make_scenarios(format_values) - - def test_onpage_out_of_order_timestamp_update(self): - uri = 'table:test_hs22' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(uri, format) - cursor = self.session.open_cursor(uri) - self.conn.set_timestamp( - 'oldest_timestamp=' + self.timestamp_str(1) + - ',stable_timestamp=' + self.timestamp_str(1)) - - key1 = self.key1 - key2 = self.key2 - - if self.value_format == '8t': - value1 = 97 # 'a' - value2 = 98 # 'b' - else: - value1 = 'a' - value2 = 'b' - - # Insert a key. - self.session.begin_transaction() - cursor[key1] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10)) - - # Remove the key. - self.session.begin_transaction() - cursor.set_key(key1) - self.assertEqual(cursor.remove(), 0) - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20)) - - # Do an out of order timestamp - # update and write it to the data - # store later. - self.session.begin_transaction() - cursor[key1] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(15)) - - # Insert another key. - self.session.begin_transaction() - cursor[key2] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20)) - - # Update the key. - self.session.begin_transaction() - cursor[key2] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30)) - - # Do a checkpoint to trigger - # history store reconciliation. - self.session.checkpoint() - - evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") - - # Search the key to evict it. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(15)) - self.assertEqual(evict_cursor[key1], value2) - self.assertEqual(evict_cursor.reset(), 0) - self.session.rollback_transaction() - - # Search the key again to verify the data is still as expected. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(15)) - self.assertEqual(cursor[key1], value2) - self.session.rollback_transaction() - - def test_out_of_order_timestamp_update_newer_than_tombstone(self): - uri = 'table:test_hs22' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(uri, format) - cursor = self.session.open_cursor(uri) - self.conn.set_timestamp( - 'oldest_timestamp=' + self.timestamp_str(1) + - ',stable_timestamp=' + self.timestamp_str(1)) - - key1 = self.key1 - key2 = self.key2 - - if self.value_format == '8t': - value1 = 97 # 'a' - value2 = 98 # 'b' - else: - value1 = 'a' - value2 = 'b' - - # Insert a key. - self.session.begin_transaction() - cursor[key1] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10)) - - # Remove a key. - self.session.begin_transaction() - cursor.set_key(key1) - self.assertEqual(cursor.remove(), 0) - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20)) - - # Do an out of order timestamp - # update and write it to the - # history store later. - self.session.begin_transaction() - cursor[key1] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(15)) - - # Add another update. - self.session.begin_transaction() - cursor[key1] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20)) - - # Insert another key. - self.session.begin_transaction() - cursor[key2] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20)) - - # Update the key. - self.session.begin_transaction() - cursor[key2] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30)) - - # Do a checkpoint to trigger - # history store reconciliation. - self.session.checkpoint() - - evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") - - # Search the key to evict it. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(15)) - self.assertEqual(evict_cursor[key1], value2) - self.assertEqual(evict_cursor.reset(), 0) - self.session.rollback_transaction() - - # Search the key again to verify the data is still as expected. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(15)) - self.assertEqual(cursor[key1], value2) - self.session.rollback_transaction() - - def test_out_of_order_timestamp_update_same_txn(self): - uri = 'table:test_hs22' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(uri, format) - cursor = self.session.open_cursor(uri) - self.conn.set_timestamp( - 'oldest_timestamp=' + self.timestamp_str(1) + - ',stable_timestamp=' + self.timestamp_str(1)) - - key1 = self.key1 - - if self.value_format == '8t': - value1 = 97 # 'a' - value2 = 98 # 'b' - value3 = 99 # 'c' - value4 = 100 # 'd' - else: - value1 = 'a' - value2 = 'b' - value3 = 'c' - value4 = 'd' - - self.session.begin_transaction() - self.session.timestamp_transaction('commit_timestamp=' + self.timestamp_str(2)) - cursor[key1] = value1 - self.session.timestamp_transaction('commit_timestamp=' + self.timestamp_str(3)) - cursor[key1] = value2 - self.session.timestamp_transaction('commit_timestamp=' + self.timestamp_str(4)) - cursor[key1] = value3 - self.session.timestamp_transaction('commit_timestamp=' + self.timestamp_str(3)) - cursor[key1] = value4 - self.session.commit_transaction() - - # Do a checkpoint to trigger - # history store reconciliation. - self.session.checkpoint() - - evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") - - # Search the key to evict it. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(10)) - self.assertEqual(evict_cursor[key1], value4) - self.assertEqual(evict_cursor.reset(), 0) - self.session.rollback_transaction() - - # Search the key again to verify the data is still as expected. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(10)) - self.assertEqual(cursor[key1], value4) - self.session.rollback_transaction() - - def test_out_of_order_timestamp_squash_updates(self): - uri = 'table:test_hs22' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(uri, format) - cursor = self.session.open_cursor(uri) - self.conn.set_timestamp( - 'oldest_timestamp=' + self.timestamp_str(1) + - ',stable_timestamp=' + self.timestamp_str(1)) - - key1 = self.key1 - - if self.value_format == '8t': - value1 = 97 # 'a' - value2 = 98 # 'b' - value3 = 99 # 'c' - value4 = 100 # 'd' - value5 = 101 # 'e' - else: - value1 = 'a' - value2 = 'b' - value3 = 'c' - value4 = 'd' - value5 = 'e' - - self.session.begin_transaction() - cursor[key1] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(15)) - - self.session.begin_transaction() - cursor[key1] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(18)) - - self.session.begin_transaction() - cursor.set_key(key1) - cursor.remove() - cursor[key1] = value3 - cursor[key1] = value4 - cursor[key1] = value5 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(11)) - - # Do a checkpoint to trigger - # history store reconciliation. - self.session.checkpoint() - - evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") - - # Search the key to evict it. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(18)) - self.assertEqual(evict_cursor[key1], value5) - self.assertEqual(evict_cursor.reset(), 0) - self.session.rollback_transaction() - - # Search the key again to verify the data is still as expected. - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(18)) - self.assertEqual(cursor[key1], value5) - self.session.rollback_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_hs23.py b/src/third_party/wiredtiger/test/suite/test_hs23.py deleted file mode 100644 index b251688f594..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_hs23.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -import wttest -from wtscenario import make_scenarios - -# test_hs23.py -# Test the case that we have update, out of order timestamp -# update, and update again in the same transaction -class test_hs23(wttest.WiredTigerTestCase): - conn_config = 'cache_size=50MB' - - format_values = [ - ('column', dict(key_format='r', key=1, value_format='S')), - ('column-fix', dict(key_format='r', key=1, value_format='8t')), - ('string-row', dict(key_format='S', key=str(0), value_format='S')), - ] - - scenarios = make_scenarios(format_values) - - def test(self): - uri = 'table:test_hs23' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) - self.session.create(uri, format) - cursor = self.session.open_cursor(uri) - self.conn.set_timestamp( - 'oldest_timestamp=' + self.timestamp_str(1) + ',stable_timestamp=' + self.timestamp_str(1)) - - key = self.key - - if self.value_format == '8t': - value1 = 97 # 'a' - value2 = 98 # 'b' - value3 = 99 # 'c' - value4 = 100 # 'd' - value5 = 101 # 'e' - else: - value1 = 'a' - value2 = 'b' - value3 = 'c' - value4 = 'd' - value5 = 'e' - - # Insert a key. - self.session.begin_transaction() - cursor[key] = value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(2)) - - # Update at 10, update at 20, update at 15 (out of order), and - # update at 20 in the same transaction - self.session.begin_transaction() - cursor.set_key(key) - cursor.set_value(value2) - self.session.timestamp_transaction( - 'commit_timestamp=' + self.timestamp_str(10)) - self.assertEquals(cursor.update(), 0) - - cursor.set_key(key) - cursor.set_value(value3) - self.session.timestamp_transaction( - 'commit_timestamp=' + self.timestamp_str(20)) - self.assertEquals(cursor.update(), 0) - - cursor.set_key(key) - cursor.set_value(value4) - self.session.timestamp_transaction( - 'commit_timestamp=' + self.timestamp_str(15)) - self.assertEquals(cursor.update(), 0) - - cursor.set_key(key) - cursor.set_value(value5) - self.session.timestamp_transaction( - 'commit_timestamp=' + self.timestamp_str(20)) - self.assertEquals(cursor.update(), 0) - self.session.commit_transaction() - - # Do a checkpoint to trigger - # history store reconciliation. - self.session.checkpoint() - - evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") - - # Search the key to evict it. - self.session.begin_transaction() - self.assertEqual(evict_cursor[key], value5) - self.assertEqual(evict_cursor.reset(), 0) - self.session.rollback_transaction() - - # Search the latest update - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(20)) - self.assertEqual(cursor[key], value5) - self.session.rollback_transaction() - - # Search the out of order timestamp update - self.session.begin_transaction("read_timestamp=" + self.timestamp_str(15)) - self.assertEqual(cursor[key], value4) - self.session.rollback_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_hs24.py b/src/third_party/wiredtiger/test/suite/test_hs24.py index 4845b027258..640530ac8ce 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs24.py +++ b/src/third_party/wiredtiger/test/suite/test_hs24.py @@ -33,7 +33,7 @@ from helper import simulate_crash_restart from wtscenario import make_scenarios # test_hs24.py -# Test that out of order timestamp fix racing with checkpointing the history store doesn't create inconsistent checkpoint. +# Test that mixed mode timestamp fix racing with checkpointing the history store doesn't create inconsistent checkpoint. class test_hs24(wttest.WiredTigerTestCase): format_values = [ ('column', dict(key_format='r', value_format='S')), @@ -55,7 +55,7 @@ class test_hs24(wttest.WiredTigerTestCase): numrows = 2000 def moresetup(self): - self.format = 'key_format={},value_format={}'. format(self.key_format, self.value_format) + self.format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'. format(self.key_format, self.value_format) if self.value_format == '8t': self.value1 = 97 self.value2 = 98 @@ -176,51 +176,3 @@ class test_hs24(wttest.WiredTigerTestCase): session.commit_transaction() cursor.close() session.close() - - def test_out_of_order_ts(self): - self.moresetup() - self.session.create(self.uri, self.format) - self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) - cursor = self.session.open_cursor(self.uri) - for i in range(1, self.numrows + 1): - self.session.begin_transaction() - cursor[i] = self.value1 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(2)) - self.session.begin_transaction() - cursor[i] = self.value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(3)) - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(2)) - for i in range(1, self.numrows + 1): - self.session.begin_transaction() - cursor[i] = self.value3 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(6)) - cursor.close() - thread = threading.Thread(target=self.out_of_order_ts_commits) - thread.start() - self.session.checkpoint() - thread.join() - simulate_crash_restart(self, '.', "RESTART") - cursor = self.session.open_cursor(self.uri) - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(2)) - # Check we can only see the version at timestamp 2, it's either committed by the out of - # order timestamp commit thread before the checkpoint starts or value1. - newer_data_visible = False - for i in range(1, self.numrows + 1): - value = cursor[i] - if not newer_data_visible: - newer_data_visible = value != self.value4 - if newer_data_visible: - self.assertEquals(value, self.value1) - else: - self.assertEquals(value, self.value4) - self.session.rollback_transaction() - - def out_of_order_ts_commits(self): - session = self.setUpSessionOpen(self.conn) - cursor = session.open_cursor(self.uri) - for i in range(1, self.numrows + 1): - session.begin_transaction() - cursor[i] = self.value4 - session.commit_transaction('commit_timestamp=' + self.timestamp_str(4)) - cursor.close() - session.close() diff --git a/src/third_party/wiredtiger/test/suite/test_hs29.py b/src/third_party/wiredtiger/test/suite/test_hs29.py index da81c6a94d3..4540f4b4332 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs29.py +++ b/src/third_party/wiredtiger/test/suite/test_hs29.py @@ -33,7 +33,7 @@ import wttest # occurs: # - The reconciliation process opens one history store cursor. # - The function hs_delete_reinsert_from_pos creates a history store cursor too. This means we need -# an update with an OOO timestamp to trigger that function. +# an update with an mixed mode timestamp which is not globally visible to trigger that function. # - The function wt_rec_hs_clear_on_tombstone creates a history store cursor as well. This means we # need a tombstone to trigger the function, i.e a deleted key. class test_hs29(wttest.WiredTigerTestCase): @@ -42,8 +42,12 @@ class test_hs29(wttest.WiredTigerTestCase): # Create a table. uri = "table:test_hs_cursor" - self.session.create(uri, 'key_format=S,value_format=S') - + self.session.create(uri, 'key_format=S,value_format=S,write_timestamp_usage=mixed_mode') + + # Pin oldest and stable to timestamp 1. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) + + ',stable_timestamp=' + self.timestamp_str(1)) + # Open one cursor to operate on the table and another one to perform eviction. cursor = self.session.open_cursor(uri) cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)") @@ -76,16 +80,24 @@ class test_hs29(wttest.WiredTigerTestCase): self.assertEqual(cursor2.get_value(), '22') self.assertEqual(cursor2.reset(), 0) + old_reader_session = self.conn.open_session() + old_reader_cursor = old_reader_session.open_cursor(uri) + old_reader_session.begin_transaction('read_timestamp=' + self.timestamp_str(2)) + # Remove the first key without giving a ts. self.session.begin_transaction() cursor.set_key('1') cursor.remove() self.session.commit_transaction() - # Update the second key with out of order timestamp. + # Update the second key with mixed mode timestamp. self.session.begin_transaction() cursor['2'] = '222' - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5)) + self.session.commit_transaction() + + # Pin stable to timestamp 20. + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(20)) + self.session.checkpoint() # Close the connection to trigger a final checkpoint and reconciliation. self.conn.close() diff --git a/src/third_party/wiredtiger/test/suite/test_hs31.py b/src/third_party/wiredtiger/test/suite/test_hs31.py index d793ce11c44..d502d0f6a5a 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs31.py +++ b/src/third_party/wiredtiger/test/suite/test_hs31.py @@ -31,7 +31,7 @@ from wtscenario import make_scenarios from wiredtiger import stat # test_hs31.py -# Ensure that tombstone with out of order timestamp clear the history store records. +# Ensure that tombstone with no timestamp clear the history store records. class test_hs31(wttest.WiredTigerTestCase): conn_config = 'cache_size=5MB,statistics=(all)' format_values = [ @@ -41,17 +41,12 @@ class test_hs31(wttest.WiredTigerTestCase): ('string-row', dict(key_format='S', value_format='S')), ] - ooo_values = [ - ('out-of-order', dict(ooo_value=True)), - ('mixed-mode', dict(ooo_value=False)), - ] - globally_visible_before_ckpt_values = [ ('globally_visible_before_ckpt', dict(globally_visible_before_ckpt=True)), ('no_globally_visible_before_ckpt', dict(globally_visible_before_ckpt=False)), ] - scenarios = make_scenarios(format_values, ooo_values, globally_visible_before_ckpt_values) + scenarios = make_scenarios(format_values, globally_visible_before_ckpt_values) nrows = 1000 def create_key(self, i): @@ -65,9 +60,9 @@ class test_hs31(wttest.WiredTigerTestCase): stat_cursor.close() return val - def test_ooo_tombstone_clear_hs(self): + def test_mm_tombstone_clear_hs(self): uri = 'file:test_hs31' - create_params = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + create_params = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, create_params) if self.value_format == '8t': @@ -101,44 +96,39 @@ class test_hs31(wttest.WiredTigerTestCase): cursor2.reset() self.session.rollback_transaction() - if not self.ooo_value: - self.session.breakpoint() - # Start a long running transaction to stop the oldest id being advanced. - session2 = self.conn.open_session() - session2.begin_transaction() - long_cursor = session2.open_cursor(uri, None) - long_cursor[self.create_key(self.nrows + 10)] = value1 - long_cursor.reset() - long_cursor.close() + self.session.breakpoint() + # Start a long running transaction to stop the oldest id being advanced. + session2 = self.conn.open_session() + session2.begin_transaction() + long_cursor = session2.open_cursor(uri, None) + long_cursor[self.create_key(self.nrows + 10)] = value1 + long_cursor.reset() + long_cursor.close() # Remove the key with an ooo or mm timestamp. for i in range(1, self.nrows): self.session.begin_transaction() cursor.set_key(self.create_key(i)) cursor.remove() - if self.ooo_value: - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5)) - else: - self.session.commit_transaction() + self.session.commit_transaction() if not self.globally_visible_before_ckpt: # Reconcile to write the stop time window. self.session.checkpoint() - if not self.ooo_value: - self.session.breakpoint() - # Ensure that old reader can read the history content. - long_cursor = session2.open_cursor(uri, None) - for i in range(1, self.nrows): - long_cursor.set_key(self.create_key(i)) - self.assertEqual(long_cursor.search(), 0) - self.assertEqual(long_cursor.get_value(), value1) - long_cursor.reset() - long_cursor.close() + self.session.breakpoint() + # Ensure that old reader can read the history content. + long_cursor = session2.open_cursor(uri, None) + for i in range(1, self.nrows): + long_cursor.set_key(self.create_key(i)) + self.assertEqual(long_cursor.search(), 0) + self.assertEqual(long_cursor.get_value(), value1) + long_cursor.reset() + long_cursor.close() - # Rollback the long running transaction. - session2.rollback_transaction() - session2.close() + # Rollback the long running transaction. + session2.rollback_transaction() + session2.close() # Pin oldest and stable to timestamp 5 so that the ooo tombstone is globally visible. self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + @@ -179,4 +169,3 @@ class test_hs31(wttest.WiredTigerTestCase): hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal) self.assertGreater(hs_truncate, 0) - diff --git a/src/third_party/wiredtiger/test/suite/test_hs_evict_race01.py b/src/third_party/wiredtiger/test/suite/test_hs_evict_race01.py index 50f3761030b..a6b8d6bf7f5 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs_evict_race01.py +++ b/src/third_party/wiredtiger/test/suite/test_hs_evict_race01.py @@ -31,16 +31,16 @@ import wttest, threading from helper import simulate_crash_restart from wtscenario import make_scenarios -# Test a bug that can occur when an out of order update gets insert after a checkpoint begins +# Test a bug that can occur when a mixed mode update gets insert after a checkpoint begins # but before the checkpoint processes the btree. Evict that update before checkpoint but fail the -# eviction due to out of order timestamps. +# eviction due to mixed mode timestamps. # # Without the related change this test would fail as a result of an inconsistent checkpoint. Due to # a flag being set on an update incorrectly. Specific ordering is required to reproduce: # 1. Start a checkpoint, sleep the checkpoint after it takes it snapshot and before it # processes our btree. -# 2. Insert the out of order update. -# 3. Evict the out of order update. +# 2. Insert the mixed mode update. +# 3. Evict the mixed mode update. # 4. Complete the checkpoint. # 5. Simulate a crash. # 6. Read the value and see if it matches the expected value. @@ -59,8 +59,8 @@ class test_hs_evict_race01(wttest.WiredTigerTestCase): value3 = 'ccccc' value4 = 'ddddd' - def test_out_of_order_ts(self): - self.session.create(self.uri, 'key_format={},value_format=S'.format(self.key_format)) + def test_mm_ts(self): + self.session.create(self.uri, 'key_format={},value_format=S,write_timestamp_usage=mixed_mode'.format(self.key_format)) self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) cursor = self.session.open_cursor(self.uri) # Insert a value at timestamp 4 @@ -77,7 +77,7 @@ class test_hs_evict_race01(wttest.WiredTigerTestCase): cursor.close() # Create a thread. - ooo_thread = threading.Thread(target=self.out_of_order_update_and_evict) + ooo_thread = threading.Thread(target=self.no_timestamp_update_and_evict) # Start the thread ooo_thread.start() @@ -91,13 +91,13 @@ class test_hs_evict_race01(wttest.WiredTigerTestCase): self.assertEquals(self.value1, cursor[1]) self.session.rollback_transaction() - def out_of_order_update_and_evict(self): + def no_timestamp_update_and_evict(self): sleep(0.5) session = self.setUpSessionOpen(self.conn) cursor = session.open_cursor(self.uri) session.begin_transaction() cursor[1] = self.value4 - session.commit_transaction('commit_timestamp=' + self.timestamp_str(5)) + session.commit_transaction() cursor.close() sleep(1.5) evict_cursor = session.open_cursor(self.uri, None, "debug=(release_evict)") diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py index 99b1e3a4d29..9667820c282 100755 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py @@ -77,9 +77,9 @@ class test_rollback_to_stable11(test_rollback_to_stable_base): ',stable_timestamp=' + self.timestamp_str(10)) # Perform several updates. - self.large_updates(uri, value_a, ds, nrows, self.prepare, 20) - self.large_updates(uri, value_a, ds, nrows, self.prepare, 20) - self.large_updates(uri, value_a, ds, nrows, self.prepare, 20) + self.large_updates(uri, value_a, ds, nrows, self.prepare, 12) + self.large_updates(uri, value_a, ds, nrows, self.prepare, 14) + self.large_updates(uri, value_a, ds, nrows, self.prepare, 16) self.large_updates(uri, value_b, ds, nrows, self.prepare, 20) # Verify data is visible and correct. @@ -104,12 +104,12 @@ class test_rollback_to_stable11(test_rollback_to_stable_base): # Perform several updates. self.large_updates(uri, value_c, ds, nrows, self.prepare, 30) - self.large_updates(uri, value_c, ds, nrows, self.prepare, 30) - self.large_updates(uri, value_c, ds, nrows, self.prepare, 30) - self.large_updates(uri, value_d, ds, nrows, self.prepare, 30) + self.large_updates(uri, value_c, ds, nrows, self.prepare, 32) + self.large_updates(uri, value_c, ds, nrows, self.prepare, 34) + self.large_updates(uri, value_d, ds, nrows, self.prepare, 36) # Verify data is visible and correct. - self.check(value_d, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_d, uri, nrows, None, 37 if self.prepare else 36) # Checkpoint to ensure that all the updates are flushed to disk. self.session.checkpoint() diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py deleted file mode 100644 index d9270a14ff5..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable21.py +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# [TEST_TAGS] -# rollback_to_stable:prepare -# rollback_to_stable:out_of_order_timestamps -# [END_TAGS] - -from wiredtiger import stat, WT_NOTFOUND -from wtscenario import make_scenarios -from helper import simulate_crash_restart -from wtdataset import SimpleDataSet -from test_rollback_to_stable01 import test_rollback_to_stable_base - -# test_rollback_to_stable21.py -# Test rollback to stable when an out of order prepared transaction is written to disk -class test_rollback_to_stable21(test_rollback_to_stable_base): - format_values = [ - ('column', dict(key_format='r', value_format='S')), - ('column_fix', dict(key_format='r', value_format='8t')), - ('row_integer', dict(key_format='i', value_format='S')), - ] - - scenarios = make_scenarios(format_values) - - def conn_config(self): - config = 'cache_size=250MB,statistics=(all),statistics_log=(json,on_close,wait=1)' - return config - - def test_rollback_to_stable(self): - nrows = 1000 - - # Create a table without logging. - uri = "table:rollback_to_stable21" - ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format) - ds.populate() - - if self.value_format == '8t': - valuea = 97 - valueb = 98 - else: - valuea = 'a' * 400 - valueb = 'b' * 400 - - # Pin oldest and stable timestamps to 10. - self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + - ',stable_timestamp=' + self.timestamp_str(10)) - - cursor = self.session.open_cursor(uri) - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valuea - - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30)) - - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valueb - - cursor.reset() - cursor.close() - self.session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(20)) - - s = self.conn.open_session() - s.begin_transaction('ignore_prepare = true') - # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. - evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") - - for i in range(1, nrows + 1): - evict_cursor.set_key(i) - self.assertEquals(evict_cursor.search(), 0) - self.assertEqual(evict_cursor.get_value(), valuea) - evict_cursor.reset() - - s.rollback_transaction() - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(40)) - s.checkpoint() - - # Rollback the prepared transaction - self.session.rollback_transaction() - - # Simulate a server crash and restart. - self.pr("restart") - simulate_crash_restart(self, ".", "RESTART") - self.pr("restart complete") - - self.check(valuea, uri, nrows, None, 40) - - stat_cursor = self.session.open_cursor('statistics:', None, None) - hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] - stat_cursor.close() - - self.assertGreater(hs_removed, 0) - - def test_rollback_to_stable_with_different_tombstone(self): - nrows = 1000 - - # Create a table without logging. - uri = "table:rollback_to_stable21" - ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format) - ds.populate() - - if self.value_format == '8t': - valuea = 97 - valueb = 98 - else: - valuea = 'a' * 400 - valueb = 'b' * 400 - - # Pin oldest and stable timestamps to 10. - self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + - ',stable_timestamp=' + self.timestamp_str(10)) - - cursor = self.session.open_cursor(uri) - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valuea - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30)) - - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor.set_key(i) - cursor.remove() - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(40)) - - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valueb - - cursor.reset() - cursor.close() - self.session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(20)) - - s = self.conn.open_session() - s.begin_transaction('ignore_prepare = true, read_timestamp = ' + self.timestamp_str(30)) - # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. - evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") - - for i in range(1, nrows + 1): - evict_cursor.set_key(i) - self.assertEquals(evict_cursor.search(), 0) - self.assertEqual(evict_cursor.get_value(), valuea) - evict_cursor.reset() - - s.rollback_transaction() - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(40)) - s.checkpoint() - - # Rollback the prepared transaction - self.session.rollback_transaction() - - # Simulate a server crash and restart. - self.pr("restart") - simulate_crash_restart(self, ".", "RESTART") - self.pr("restart complete") - - self.check(valuea, uri, nrows, None, 30) - self.check(valuea, uri, 0, nrows, 40) - - stat_cursor = self.session.open_cursor('statistics:', None, None) - hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] - hs_restored_tombstone = stat_cursor[stat.conn.txn_rts_hs_restore_tombstones][2] - stat_cursor.close() - - self.assertGreater(hs_removed, 0) - self.assertGreater(hs_restored_tombstone, 0) - - def test_rollback_to_stable_with_same_tombstone(self): - nrows = 1000 - - # Create a table without logging. - uri = "table:rollback_to_stable21" - ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format) - ds.populate() - - if self.value_format == '8t': - valuea = 97 - valueb = 98 - else: - valuea = 'a' * 400 - valueb = 'b' * 400 - - # Pin oldest and stable timestamps to 10. - self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + - ',stable_timestamp=' + self.timestamp_str(10)) - - cursor = self.session.open_cursor(uri) - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valuea - cursor.set_key(i) - cursor.remove() - - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30)) - - self.session.begin_transaction() - for i in range(1, nrows + 1): - cursor[i] = valueb - - cursor.reset() - cursor.close() - self.session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(20)) - - s = self.conn.open_session() - s.begin_transaction('ignore_prepare = true') - # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. - evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") - - for i in range(1, nrows + 1): - evict_cursor.set_key(i) - if self.value_format == '8t': - self.assertEquals(evict_cursor.search(), 0) - self.assertEquals(evict_cursor.get_value(), 0) - else: - self.assertEquals(evict_cursor.search(), WT_NOTFOUND) - evict_cursor.reset() - - s.rollback_transaction() - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(40)) - s.checkpoint() - - # Rollback the prepared transaction - self.session.rollback_transaction() - - # Simulate a server crash and restart. - self.pr("restart") - simulate_crash_restart(self, ".", "RESTART") - self.pr("restart complete") - - self.check(valuea, uri, 0, nrows, 40) - - stat_cursor = self.session.open_cursor('statistics:', None, None) - hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] - hs_restored_tombstone = stat_cursor[stat.conn.txn_rts_hs_restore_tombstones][2] - stat_cursor.close() - - # The udpate and delete operations are not inserted into the history store as they are not visible - self.assertEquals(hs_removed, 0) - self.assertEquals(hs_restored_tombstone, 0) diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable27.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable27.py index d8e8322335c..91eccc2730a 100644 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable27.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable27.py @@ -69,9 +69,10 @@ class test_rollback_to_stable27(test_rollback_to_stable_base): # Create a table. uri = "table:rollback_to_stable27" + mixed_mode=',write_timestamp_usage=mixed_mode' ds_config = ',log=(enabled=false)' if self.in_memory else '' ds = SimpleDataSet(self, uri, 0, - key_format=self.key_format, value_format="S", config=ds_config) + key_format=self.key_format, value_format="S", config = mixed_mode + ds_config) ds.populate() value_a = "aaaaa" * 10 diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable29.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable29.py index c6cecf3ae5d..4e36458cc9a 100755..100644 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable29.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable29.py @@ -33,9 +33,9 @@ from helper import simulate_crash_restart from test_rollback_to_stable01 import test_rollback_to_stable_base # test_rollback_to_stable29.py -# Test that the rollback to stable to verify the history store order when an out of order to a tombstone. +# Test that the rollback to stable to verify the history store order when a mixed mode update inserted to a tombstone. class test_rollback_to_stable29(test_rollback_to_stable_base): - conn_config = 'cache_size=25MB,statistics=(all),statistics_log=(json,on_close,wait=1)' + conn_config = 'cache_size=5MB,statistics=(all),statistics_log=(json,on_close,wait=1),log=(enabled=true)' format_values = [ ('column', dict(key_format='r', value_format='S')), @@ -47,7 +47,7 @@ class test_rollback_to_stable29(test_rollback_to_stable_base): def test_rollback_to_stable(self): uri = 'table:test_rollback_to_stable29' - nrows = 100 + nrows = 1000 if self.value_format == '8t': value_a = 97 @@ -61,7 +61,7 @@ class test_rollback_to_stable29(test_rollback_to_stable_base): value_d = 'd' * 100 # Create our table. - ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format) + ds = SimpleDataSet(self, uri, 0, key_format=self.key_format, value_format=self.value_format, config="write_timestamp_usage=mixed_mode") ds.populate() # Pin oldest and stable to timestamp 1. @@ -69,36 +69,46 @@ class test_rollback_to_stable29(test_rollback_to_stable_base): ',stable_timestamp=' + self.timestamp_str(1)) self.large_updates(uri, value_a, ds, nrows, False, 10) + + # Pin oldest and stable to timestamp 10. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + + ',stable_timestamp=' + self.timestamp_str(10)) + + old_reader_session = self.conn.open_session() + old_reader_cursor = old_reader_session.open_cursor(uri) + old_reader_session.begin_transaction('read_timestamp=' + self.timestamp_str(10)) + self.large_removes(uri, ds, nrows, False, 30) self.large_updates(uri, value_b, ds, nrows, False, 40) self.check(value_b, uri, nrows, None, 40) + + self.session.checkpoint() + self.evict_cursor(uri, nrows, value_b) + self.large_updates(uri, value_c, ds, nrows, False, 50) self.check(value_c, uri, nrows, None, 50) self.evict_cursor(uri, nrows, value_c) - # Insert an out of order update. - self.session.breakpoint() - self.large_updates(uri, value_d, ds, nrows, False, 20) + # Insert a mixed mode update. + self.large_updates(uri, value_d, ds, nrows, False, 0) - self.check(value_a, uri, nrows, None, 10) + self.check(value_d, uri, nrows, None, 10) self.check(value_d, uri, nrows, None, 40) self.check(value_d, uri, nrows, None, 50) self.check(value_d, uri, nrows, None, 20) - # Pin stable to timestamp 10. - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(10)) self.session.checkpoint() # Simulate a crash by copying to a new directory(RESTART). simulate_crash_restart(self, ".", "RESTART") - self.check(value_a, uri, nrows, None, 10) + self.check(value_d, uri, nrows, None, 10) stat_cursor = self.session.open_cursor('statistics:', None, None) hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] stat_cursor.close() - self.assertGreaterEqual(hs_removed, 3 * nrows) + self.assertGreaterEqual(hs_removed, 0) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp11.py b/src/third_party/wiredtiger/test/suite/test_timestamp11.py index b11e1144b37..9eacf4f6885 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp11.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp11.py @@ -46,7 +46,7 @@ class test_timestamp11(wttest.WiredTigerTestCase, suite_subprocess): def test_timestamp_range(self): base = 'timestamp11' uri = 'file:' + base - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) if self.key_format == 'r': diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp14.py b/src/third_party/wiredtiger/test/suite/test_timestamp14.py index bf1e57d7639..f0d748c09e7 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp14.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp14.py @@ -50,7 +50,7 @@ class test_timestamp14(wttest.WiredTigerTestCase, suite_subprocess): # In the absence of prepared transactions, all_durable is identical to # all_committed so let's enforce the all_durable values instead. all_durable_uri = self.uri + '_all_durable' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) session1 = self.setUpSessionOpen(self.conn) session2 = self.setUpSessionOpen(self.conn) session1.create(all_durable_uri, format) diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp17.py b/src/third_party/wiredtiger/test/suite/test_timestamp17.py index 688e32d5402..bfcda6d1530 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp17.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp17.py @@ -49,7 +49,7 @@ class test_timestamp17(wttest.WiredTigerTestCase, suite_subprocess): scenarios = make_scenarios(format_values) def test_inconsistent_timestamping(self): - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(self.uri, format) self.session.begin_transaction() cur1 = self.session.open_cursor(self.uri) @@ -64,11 +64,6 @@ class test_timestamp17(wttest.WiredTigerTestCase, suite_subprocess): cur1[1] = 3 self.session.commit_transaction('commit_timestamp=200') - self.session.begin_transaction() - cur1.set_key(1) - cur1.remove() - self.session.commit_transaction('commit_timestamp=100') - # Read before any updates and ensure we cannot find the key or value. # (For FLCS we expect to read zeros since the table extends nontransactionally.) self.session.begin_transaction('read_timestamp=20') @@ -81,46 +76,24 @@ class test_timestamp17(wttest.WiredTigerTestCase, suite_subprocess): self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) self.session.commit_transaction() - # Read at 25 and we should see 1. - self.session.begin_transaction('read_timestamp=25') - cur1.set_key(1) - search_success = cur1.search() - self.assertEqual(search_success, 0) - value1 = cur1.get_value() - self.session.commit_transaction() - self.assertEqual(1, value1) - - # Read at 50 and we should see 2. - self.session.begin_transaction('read_timestamp=50') + # Add a mixed mode tombstone + self.session.begin_transaction() cur1.set_key(1) - search_success = cur1.search() - self.assertEqual(search_success, 0) - value1 = cur1.get_value() + cur1.remove() self.session.commit_transaction() - self.assertEqual(2, value1) - # Read at 100 and we should not find anything. + # Read at 25, 50, 100 and 200 we should not find anything. # (For FLCS, deleted values read as zero.) - self.session.begin_transaction('read_timestamp=100') - cur1.set_key(1) - search_success = cur1.search() - if self.value_format == '8t': - self.assertEqual(search_success, 0) - self.assertEqual(cur1.get_value(), 0) - else: - self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) - self.session.commit_transaction() - - # Read at 200 and we should still not find anything. - self.session.begin_transaction('read_timestamp=200') - cur1.set_key(1) - search_success = cur1.search() - if self.value_format == '8t': - self.assertEqual(search_success, 0) - self.assertEqual(cur1.get_value(), 0) - else: - self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) - self.session.commit_transaction() + for ts in 25, 50, 100, 200: + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(ts)) + cur1.set_key(1) + search_success = cur1.search() + if self.value_format == '8t': + self.assertEqual(search_success, 0) + self.assertEqual(cur1.get_value(), 0) + else: + self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) + self.session.commit_transaction() # Read at 300 for further validation. self.session.begin_transaction('read_timestamp=300') @@ -137,25 +110,29 @@ class test_timestamp17(wttest.WiredTigerTestCase, suite_subprocess): # confirm we see the correct numbers. self.conn.set_timestamp('oldest_timestamp=49') - # Read at 49 and we should see 1. + # Read at 49 and we should not see anything. self.session.begin_transaction('read_timestamp=49') cur1.set_key(1) search_success = cur1.search() - self.assertEqual(search_success, 0) - value1 = cur1.get_value() + if self.value_format == '8t': + self.assertEqual(search_success, 0) + self.assertEqual(cur1.get_value(), 0) + else: + self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) self.session.commit_transaction() - self.assertEqual(1, value1) self.conn.set_timestamp('oldest_timestamp=99') - # Read at 99 and we should see 2. + # Read at 99 and we should not see anything. self.session.begin_transaction('read_timestamp=99') cur1.set_key(1) search_success = cur1.search() - self.assertEqual(search_success, 0) - value1 = cur1.get_value() + if self.value_format == '8t': + self.assertEqual(search_success, 0) + self.assertEqual(cur1.get_value(), 0) + else: + self.assertEqual(search_success, wiredtiger.WT_NOTFOUND) self.session.commit_transaction() - self.assertEqual(2, value1) # Move oldest to the point at which we deleted. self.conn.set_timestamp('oldest_timestamp=100') diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp18.py b/src/third_party/wiredtiger/test/suite/test_timestamp18.py index 8af6ec59104..966688f4d12 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp18.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp18.py @@ -57,7 +57,7 @@ class test_timestamp18(wttest.WiredTigerTestCase): def test_ts_writes_with_non_ts_write(self): uri = 'table:test_timestamp18' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) cursor = self.session.open_cursor(uri) diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp20.py b/src/third_party/wiredtiger/test/suite/test_timestamp20.py index 4eca70f1f30..3f6933785fc 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp20.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp20.py @@ -30,7 +30,7 @@ import wiredtiger, wttest from wtscenario import make_scenarios # test_timestamp20.py -# Exercise fixing up of out-of-order updates in the history store. +# Exercise fixing up of mixed mode updates in the history store. class test_timestamp20(wttest.WiredTigerTestCase): conn_config = 'cache_size=50MB' @@ -44,9 +44,22 @@ class test_timestamp20(wttest.WiredTigerTestCase): def get_key(self, i): return str(i) if self.key_format == 'S' else i + def evict(self, uri): + s = self.conn.open_session() + s.begin_transaction() + # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. + evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") + for i in range(1, 10000): + evict_cursor.set_key(self.get_key(i)) + self.assertEquals(evict_cursor.search(), 0) + evict_cursor.reset() + s.rollback_transaction() + evict_cursor.close() + s.close() + def test_timestamp20_standard(self): uri = 'table:test_timestamp20' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) cursor = self.session.open_cursor(uri) @@ -83,23 +96,24 @@ class test_timestamp20(wttest.WiredTigerTestCase): old_reader_cursor = old_reader_session.open_cursor(uri) old_reader_session.begin_transaction('read_timestamp=' + self.timestamp_str(20)) - # Now put two updates out of order. 5 will go to the history store and will trigger a + # Now put two updates mixed mode. no timestamp will go to the history store and will trigger a # correction to the existing contents. for i in range(1, 10000): self.session.begin_transaction() cursor[self.get_key(i)] = value4 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(25)) + self.session.commit_transaction() self.session.begin_transaction() cursor[self.get_key(i)] = value5 self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(40)) + self.evict(uri) self.session.begin_transaction('read_timestamp=' + self.timestamp_str(30)) for i in range(1, 10000): self.assertEqual(cursor[self.get_key(i)], value4) self.session.rollback_transaction() for i in range(1, 10000): - self.assertEqual(old_reader_cursor[self.get_key(i)], value2) + self.assertEqual(old_reader_cursor[self.get_key(i)], value3) old_reader_session.rollback_transaction() # In this test we're using modifies since they are more sensitive to corruptions. @@ -113,7 +127,7 @@ class test_timestamp20(wttest.WiredTigerTestCase): return uri = 'table:test_timestamp20' - format = 'key_format={},value_format={}'.format(self.key_format, self.value_format) + format = 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format) self.session.create(uri, format) self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1)) cursor = self.session.open_cursor(uri) @@ -157,18 +171,19 @@ class test_timestamp20(wttest.WiredTigerTestCase): self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0) self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(40)) - # Now put two updates out of order. 5 will go to the history store and will trigger a + # Now put two updates mixed mode. no timestamp will go to the history store and will trigger a # correction to the existing contents. for i in range(1, 10000): self.session.begin_transaction() cursor[self.get_key(i)] = value2 - self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(25)) + self.session.commit_transaction() self.session.begin_transaction() cursor[self.get_key(i)] = value3 self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(50)) + self.evict(uri) # Open up a new transaction and read at 30. - # We shouldn't be able to see past 5 due to txnid visibility. + # We shouldn't be able to see past no timestamp due to txnid visibility. self.session.begin_transaction('read_timestamp=' + self.timestamp_str(30)) for i in range(1, 10000): self.assertEqual(cursor[self.get_key(i)], value2) @@ -177,6 +192,7 @@ class test_timestamp20(wttest.WiredTigerTestCase): # Put together expected value. expected = list(value1) expected[100] = 'B' + expected[200] = 'C' expected = str().join(expected) # On the other hand, this older transaction SHOULD be able to read past the 5. diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp22.py b/src/third_party/wiredtiger/test/suite/test_timestamp22.py index 525ec06e615..884419ea567 100755 --- a/src/third_party/wiredtiger/test/suite/test_timestamp22.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp22.py @@ -43,6 +43,7 @@ class test_timestamp22(wttest.WiredTigerTestCase): rand = suite_random.suite_random() oldest_ts = 0 stable_ts = 0 + last_commit_ts = 0 last_durable = 0 SUCCESS = 'success' FAILURE = 'failure' @@ -147,8 +148,11 @@ class test_timestamp22(wttest.WiredTigerTestCase): # It's possible this will succeed, we'll check below. this_commit_ts = self.gen_ts(commit_ts) - # OOD does not work with prepared updates. Hence, the commit ts should always be - # greater than the last durable ts. + # OOO is not allowed. Hence, the commit ts should be greater than + # the last commit and last durable. + if this_commit_ts <= self.last_commit_ts: + this_commit_ts = self.last_commit_ts + 1 + if this_commit_ts <= self.last_durable: this_commit_ts = self.last_durable + 1 @@ -185,6 +189,14 @@ class test_timestamp22(wttest.WiredTigerTestCase): commit_config += ',durable_timestamp=' + self.timestamp_str(durable_ts) cursor = session.open_cursor(self.uri) prepare_ts = self.gen_ts(commit_ts) + + # OOO is not allowed. Hence, the prepare ts should be greater than + # the last commit and last durable. + if prepare_ts <= self.last_durable: + prepare_ts = self.last_durable + 1 + if prepare_ts <= self.last_commit_ts: + prepare_ts = self.last_commit_ts + 1 + prepare_config = 'prepare_timestamp=' + self.timestamp_str(prepare_ts) begin_config = '' if read_ts < 0 else 'read_timestamp=' + self.timestamp_str(read_ts) @@ -285,6 +297,7 @@ class test_timestamp22(wttest.WiredTigerTestCase): with self.expect(ok_commit, 'commit'): session.commit_transaction(commit_config) self.commit_value = value + self.last_commit_ts = commit_ts if do_prepare: self.last_durable = durable_ts if needs_rollback: @@ -417,8 +430,11 @@ class test_timestamp22(wttest.WiredTigerTestCase): else: read_ts = -1 # no read_timestamp used in txn - # OOD does not work with prepared updates. Hence, the commit ts should always be - # greater than the last durable ts. + # OOO is not allowed. Hence, the commit ts should be greater than + # the last commit and last durable. + if commit_ts <= self.last_commit_ts: + commit_ts = self.last_commit_ts + 1 + if commit_ts <= self.last_durable: commit_ts = self.last_durable + 1 diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp26.py b/src/third_party/wiredtiger/test/suite/test_timestamp26.py index 689ad41478a..85f6895eba3 100644 --- a/src/third_party/wiredtiger/test/suite/test_timestamp26.py +++ b/src/third_party/wiredtiger/test/suite/test_timestamp26.py @@ -217,7 +217,7 @@ class test_timestamp26_alter_inconsistent_update(wttest.WiredTigerTestCase): # verify the inconsistent usage is detected. uri = 'table:ts' self.session.create(uri, - 'key_format={},value_format={}'.format(self.key_format, self.value_format)) + 'key_format={},value_format={},write_timestamp_usage=mixed_mode'.format(self.key_format, self.value_format)) c = self.session.open_cursor(uri) key = ds.key(10) @@ -228,9 +228,8 @@ class test_timestamp26_alter_inconsistent_update(wttest.WiredTigerTestCase): c[key] = ds.value(10) self.session.commit_transaction() - # Update the data item at timestamp 1. + # Update the data item with no timestamp. self.session.begin_transaction() - self.session.timestamp_transaction('commit_timestamp=' + self.timestamp_str(1)) c[key] = ds.value(11) self.session.commit_transaction() diff --git a/src/third_party/wiredtiger/test/suite/test_txn26.py b/src/third_party/wiredtiger/test/suite/test_txn26.py index 1765db8a097..c4193073719 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn26.py +++ b/src/third_party/wiredtiger/test/suite/test_txn26.py @@ -35,7 +35,7 @@ from wtscenario import make_scenarios # test_txn26.py # Test that commit should fail if commit timestamp is smaller or equal to the active timestamp. -# Our handling of out of order timestamp relies on this to ensure repeated reads are working as +# Our handling of mixed mode timestamp relies on this to ensure repeated reads are working as # expected. class test_txn26(wttest.WiredTigerTestCase): conn_config = 'cache_size=50MB' diff --git a/src/third_party/wiredtiger/test/test_coverage.md b/src/third_party/wiredtiger/test/test_coverage.md index b186c7ac318..1286a488855 100644 --- a/src/third_party/wiredtiger/test/test_coverage.md +++ b/src/third_party/wiredtiger/test/test_coverage.md @@ -38,8 +38,6 @@ |Recovery||[test_txn04.py](../test/suite/test_txn04.py) |Recovery|Log Files|[test_txn19.py](../test/suite/test_txn19.py) |Rollback To Stable||[test_checkpoint_snapshot03.py](../test/suite/test_checkpoint_snapshot03.py), [test_rollback_to_stable16.py](../test/suite/test_rollback_to_stable16.py), [test_rollback_to_stable18.py](../test/suite/test_rollback_to_stable18.py) -|Rollback To Stable|Out Of Order Timestamps|[test_rollback_to_stable21.py](../test/suite/test_rollback_to_stable21.py) -|Rollback To Stable|Prepare|[test_rollback_to_stable21.py](../test/suite/test_rollback_to_stable21.py) |Salvage|Prepare|[test_prepare_hs03.py](../test/suite/test_prepare_hs03.py) |Schema Api||[test_schema03.py](../test/suite/test_schema03.py) |Session Api|Reconfigure|[test_reconfig04.py](../test/suite/test_reconfig04.py) |