diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-09-10 18:55:34 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2021-09-10 18:55:34 +1000 |
commit | bc8d0d1fb0fee5ab2e540122c6042c4d5f16ea93 (patch) | |
tree | 78e2d500487f4bc94a4c2bc36cb8f855ebb51c35 | |
parent | 2b0d538db8c0c9b9d7992d4489ba7171c721dfb7 (diff) | |
download | mongo-bc8d0d1fb0fee5ab2e540122c6042c4d5f16ea93.tar.gz |
Import wiredtiger: 1acbb32edf1cdecd61717d77b87a5072f47fd90b from branch mongodb-5.0
ref: 46476d5bd4..1acbb32edf
for: 5.0.3
WT-7630 Fix a history store entry remove with checkpoint reserved transaction-id
WT-7958 Include recovery in test/checkpoint
WT-8032 Add fail points in reconciliation for history store inserts.
WT-8047 Add mixed mode delete operations to test checkpoint
WT-8056 Fix a bug in RTS that incorrectly restores an update from HS lead to the key removal
33 files changed, 997 insertions, 223 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 4b4c98f6018..ca9d3f2ed6e 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -793,7 +793,9 @@ connection_runtime_config = [ intended for use with internal stress testing of WiredTiger.''', type='list', undoc=True, choices=[ - 'aggressive_sweep', 'backup_rename', 'checkpoint_slow', 'history_store_checkpoint_delay', + 'aggressive_sweep', 'backup_rename', 'checkpoint_reserved_txnid_delay', 'checkpoint_slow', + 'failpoint_history_store_delete_key_from_ts', 'failpoint_history_store_insert_1', + 'failpoint_history_store_insert_2', 'history_store_checkpoint_delay', 'history_store_search', 'history_store_sweep_race', 'prepare_checkpoint_delay', 'split_1', 'split_2', 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8']), Config('verbose', '[]', r''' diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 1b0afc64eda..7ed2f3b6473 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -113,6 +113,7 @@ Decrement Decrypt DeleteFileW Destructor +Deterministically EACCES EAGAIN EB @@ -151,6 +152,7 @@ FTRUNCATE FULLFSYNC FUNCSIG Facebook +Failpoint FindClose FindFirstFile FindNextFileW @@ -784,6 +786,7 @@ extern extlist fadvise fahrenheit +failpoint fallocate fallthrough fblocks diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index f35e56252a1..05efc9107ca 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -554,6 +554,8 @@ conn_stats = [ TxnStat('txn_prepare_active', 'prepared transactions currently active'), TxnStat('txn_prepare_commit', 'prepared transactions committed'), TxnStat('txn_prepare_rollback', 'prepared transactions rolled back'), + TxnStat('txn_prepare_rollback_do_not_remove_hs_update', 'prepared transactions rolled back and do not remove the history store entry'), + TxnStat('txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid', 'prepared transactions rolled back and fix the history store entry with checkpoint reserved transaction id'), TxnStat('txn_prepared_updates_committed', 'Number of prepared updates committed'), TxnStat('txn_prepared_updates', 'Number of prepared updates'), TxnStat('txn_prepared_updates_key_repeated', 'Number of prepared updates repeated on the same key'), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 7e2ca6a3e85..f3611add433 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "46476d5bd43df2a4dadd04104c91038cd867774c" + "commit": "1acbb32edf1cdecd61717d77b87a5072f47fd90b" } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index d718239a575..e2205b78047 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -1110,7 +1110,8 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) "disk %p", (void *)page->dsk)); if (page->dsk != NULL) - WT_RET(ds->f(ds, ", dsk_mem_size %" PRIu32, page->dsk->mem_size)); + WT_RET(ds->f(ds, ", dsk_mem_size %" PRIu32 ", write_gen: %" PRIu64, page->dsk->mem_size, + page->dsk->write_gen)); WT_RET(ds->f(ds, ", entries %" PRIu32, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index a3d9af315a4..cb0a104dc6b 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -138,10 +138,14 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 2}, {"timing_stress_for_test", "list", NULL, "choices=[\"aggressive_sweep\",\"backup_rename\"," - "\"checkpoint_slow\",\"history_store_checkpoint_delay\"," - "\"history_store_search\",\"history_store_sweep_race\"," - "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\"" - ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]", + "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\"," + "\"failpoint_history_store_delete_key_from_ts\"," + "\"failpoint_history_store_insert_1\"," + "\"failpoint_history_store_insert_2\"," + "\"history_store_checkpoint_delay\",\"history_store_search\"," + "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0}, {"verbose", "list", NULL, "choices=[\"api\",\"backup\",\"block\",\"checkpoint\"," @@ -863,10 +867,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6}, {"timing_stress_for_test", "list", NULL, "choices=[\"aggressive_sweep\",\"backup_rename\"," - "\"checkpoint_slow\",\"history_store_checkpoint_delay\"," - "\"history_store_search\",\"history_store_sweep_race\"," - "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\"" - ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]", + "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\"," + "\"failpoint_history_store_delete_key_from_ts\"," + "\"failpoint_history_store_insert_1\"," + "\"failpoint_history_store_insert_2\"," + "\"history_store_checkpoint_delay\",\"history_store_search\"," + "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -941,10 +949,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6}, {"timing_stress_for_test", "list", NULL, "choices=[\"aggressive_sweep\",\"backup_rename\"," - "\"checkpoint_slow\",\"history_store_checkpoint_delay\"," - "\"history_store_search\",\"history_store_sweep_race\"," - "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\"" - ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]", + "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\"," + "\"failpoint_history_store_delete_key_from_ts\"," + "\"failpoint_history_store_insert_1\"," + "\"failpoint_history_store_insert_2\"," + "\"history_store_checkpoint_delay\",\"history_store_search\"," + "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -1016,10 +1028,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6}, {"timing_stress_for_test", "list", NULL, "choices=[\"aggressive_sweep\",\"backup_rename\"," - "\"checkpoint_slow\",\"history_store_checkpoint_delay\"," - "\"history_store_search\",\"history_store_sweep_race\"," - "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\"" - ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]", + "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\"," + "\"failpoint_history_store_delete_key_from_ts\"," + "\"failpoint_history_store_insert_1\"," + "\"failpoint_history_store_insert_2\"," + "\"history_store_checkpoint_delay\",\"history_store_search\"," + "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -1089,10 +1105,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6}, {"timing_stress_for_test", "list", NULL, "choices=[\"aggressive_sweep\",\"backup_rename\"," - "\"checkpoint_slow\",\"history_store_checkpoint_delay\"," - "\"history_store_search\",\"history_store_sweep_race\"," - "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\"" - ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]", + "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\"," + "\"failpoint_history_store_delete_key_from_ts\"," + "\"failpoint_history_store_insert_1\"," + "\"failpoint_history_store_insert_2\"," + "\"history_store_checkpoint_delay\",\"history_store_search\"," + "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index a05b4b1c85d..95ca5acbac5 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -2124,11 +2124,18 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) * Each split race delay is controlled using a different flag to allow more effective race * condition detection, since enabling all delays at once can lead to an overall slowdown to the * point where race conditions aren't encountered. + * + * Fail points are also defined in this list and will occur randomly when enabled. */ static const WT_NAME_FLAG stress_types[] = { {"aggressive_sweep", WT_TIMING_STRESS_AGGRESSIVE_SWEEP}, {"backup_rename", WT_TIMING_STRESS_BACKUP_RENAME}, + {"checkpoint_reserved_txnid_delay", WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY}, {"checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW}, + {"failpoint_history_delete_key_from_ts", + WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS}, + {"failpoint_history_store_insert_1", WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1}, + {"failpoint_history_store_insert_2", WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2}, {"history_store_checkpoint_delay", WT_TIMING_STRESS_HS_CHECKPOINT_DELAY}, {"history_store_search", WT_TIMING_STRESS_HS_SEARCH}, {"history_store_sweep_race", WT_TIMING_STRESS_HS_SWEEP}, diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c index 7ac53585134..221de9ffe54 100644 --- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c +++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c @@ -240,3 +240,40 @@ __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize) conn->ckpt_signalled = true; } } + +/* + * __wt_checkpoint_reserved_session_init -- + * Initialize checkpoint reserved session. + */ +int +__wt_checkpoint_reserved_session_init(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + WT_ASSERT(session, conn->ckpt_reserved_session == NULL); + + return (__wt_open_internal_session( + conn, "ckpt-reserved", false, WT_SESSION_NO_RECONCILE, 0, &conn->ckpt_reserved_session)); +} + +/* + * __wt_checkpoint_reserved_session_destroy -- + * Release resources allocated for checkpoint reserved session. + */ +int +__wt_checkpoint_reserved_session_destroy(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + + conn = S2C(session); + + if (conn->ckpt_reserved_session != NULL) { + WT_TRET(__wt_session_close_internal(conn->ckpt_reserved_session)); + conn->ckpt_reserved_session = NULL; + } + + return (ret); +} diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index b86ca4eb616..343e0046423 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -106,6 +106,9 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); + /* Close the checkpoint reserved session. */ + WT_TRET(__wt_checkpoint_reserved_session_destroy(session)); + /* Shut down metadata tracking. */ WT_TRET(__wt_meta_track_destroy(session)); @@ -244,6 +247,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) /* Start the optional capacity thread. */ WT_RET(__wt_capacity_server_create(session, cfg)); + /* Initialize checkpoint reserved session, required for the checkpoint operation. */ + WT_RET(__wt_checkpoint_reserved_session_init(session)); + /* Start the optional checkpoint thread. */ WT_RET(__wt_checkpoint_server_create(session, cfg)); diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c index 8375de8ffdf..1f927309f94 100644 --- a/src/third_party/wiredtiger/src/history/hs_rec.c +++ b/src/third_party/wiredtiger/src/history/hs_rec.c @@ -151,16 +151,17 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp)); /* * The same value should not be inserted again unless: - * 1. the previous entry is already deleted (i.e. the stop timestamp is globally + * 1. The previous entry is already deleted (i.e. the stop timestamp is globally * visible) - * 2. it came from a different transaction - * 3. it came from the same transaction but with a different timestamp + * 2. It came from a different transaction + * 3. It came from the same transaction but with a different timestamp + * 4. The prepared rollback left the history store entry when checkpoint is in progress. */ if (cmp == 0) { if (!__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw) && tw->start_txn != WT_TXN_NONE && tw->start_txn == hs_cbt->upd_value->tw.start_txn && - tw->start_ts == hs_cbt->upd_value->tw.start_ts) { + tw->start_ts == hs_cbt->upd_value->tw.start_ts && tw->start_ts != tw->stop_ts) { /* * If we have issues with duplicate history store records, we want to be able to * distinguish between modifies and full updates. Since modifies are not @@ -169,7 +170,6 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, */ WT_ASSERT(session, type != WT_UPDATE_MODIFY && (uint8_t)upd_type_full_diag != WT_UPDATE_MODIFY); - WT_ASSERT(session, false && "Duplicate values inserted into history store"); } } counter = hs_counter + 1; @@ -286,8 +286,7 @@ __hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates, * fails or succeeds, if there is a successful write to history, cache_write_hs is set to true. */ int -__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, - bool *cache_write_hs, bool checkpoint_running) +__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi) { WT_BTREE *btree, *hs_btree; WT_CURSOR *hs_cursor; @@ -313,9 +312,10 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, uint32_t i; uint8_t *p; int nentries; - bool enable_reverse_modify, hs_inserted, squashed; + bool checkpoint_running, enable_reverse_modify, hs_inserted, squashed; - *cache_write_hs = false; + checkpoint_running = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING); + r->cache_write_hs = false; btree = S2BT(session); prev_upd = NULL; insert_cnt = 0; @@ -366,7 +366,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, continue; /* History store table key component: source key. */ - switch (page->type) { + switch (r->page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: p = key->mem; @@ -375,8 +375,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, break; case WT_PAGE_ROW_LEAF: if (list->ins == NULL) { - WT_WITH_BTREE( - session, btree, ret = __wt_row_leaf_key(session, page, list->ripcip, key, false)); + WT_WITH_BTREE(session, btree, + ret = __wt_row_leaf_key(session, r->page, list->ripcip, key, false)); WT_ERR(ret); } else { key->data = WT_INSERT_KEY(list->ins); @@ -384,7 +384,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, } break; default: - WT_ERR(__wt_illegal_value(session, page->type)); + WT_ERR(__wt_illegal_value(session, r->page->type)); } first_globally_visible_upd = min_ts_upd = out_of_order_ts_upd = NULL; @@ -645,6 +645,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, /* Clear out the insert success flag prior to our insert attempt. */ __wt_curhs_clear_insert_success(hs_cursor); + /* Fail here 0.05% of the time if we are in the eviction path. */ + if (F_ISSET(r, WT_REC_EVICT) && + __wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1, 0.05)) + WT_ERR(EBUSY); + /* * Calculate reverse modify and clear the history store records with timestamps when * inserting the first update. Always write on-disk data store updates to the history @@ -717,6 +722,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, __wt_update_vector_clear(&updates); } + /* Fail here 0.5% of the time if we are an eviction thread. */ + if (F_ISSET(r, WT_REC_EVICT) && + __wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2, 0.05)) + WT_ERR(EBUSY); + WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size)); hs_btree = __wt_curhs_get_btree(hs_cursor); max_hs_size = hs_btree->file_max; @@ -731,7 +741,7 @@ err: /* cache_write_hs is set to true as there was at least one successful write to history. */ if (insert_cnt > 0) - *cache_write_hs = true; + r->cache_write_hs = true; __wt_scr_free(session, &key); /* modify_value is allocated in __wt_modify_pack. Free it if it is allocated. */ diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 68a03cad84b..e40bc2acc27 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -473,7 +473,8 @@ struct __wt_connection_impl { uint16_t log_req_min; /* Min required log version */ uint32_t txn_logsync; /* Log sync configuration */ - WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */ + WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */ + WT_SESSION_IMPL *ckpt_reserved_session; /* Checkpoint reserved session */ /* * Is there a data/schema change that needs to be the part of a checkpoint. @@ -594,21 +595,25 @@ struct __wt_connection_impl { * Variable with flags for which subsystems the diagnostic stress timing delays have been requested. */ /* AUTOMATIC FLAG VALUE GENERATION START 0 */ -#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x0001u -#define WT_TIMING_STRESS_BACKUP_RENAME 0x0002u -#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x0004u -#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x0008u -#define WT_TIMING_STRESS_HS_SEARCH 0x0010u -#define WT_TIMING_STRESS_HS_SWEEP 0x0020u -#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x0040u -#define WT_TIMING_STRESS_SPLIT_1 0x0080u -#define WT_TIMING_STRESS_SPLIT_2 0x0100u -#define WT_TIMING_STRESS_SPLIT_3 0x0200u -#define WT_TIMING_STRESS_SPLIT_4 0x0400u -#define WT_TIMING_STRESS_SPLIT_5 0x0800u -#define WT_TIMING_STRESS_SPLIT_6 0x1000u -#define WT_TIMING_STRESS_SPLIT_7 0x2000u -#define WT_TIMING_STRESS_SPLIT_8 0x4000u +#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x00001u +#define WT_TIMING_STRESS_BACKUP_RENAME 0x00002u +#define WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY 0x00004u +#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x00008u +#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x00010u +#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1 0x00020u +#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2 0x00040u +#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x00080u +#define WT_TIMING_STRESS_HS_SEARCH 0x00100u +#define WT_TIMING_STRESS_HS_SWEEP 0x00200u +#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x00400u +#define WT_TIMING_STRESS_SPLIT_1 0x00800u +#define WT_TIMING_STRESS_SPLIT_2 0x01000u +#define WT_TIMING_STRESS_SPLIT_3 0x02000u +#define WT_TIMING_STRESS_SPLIT_4 0x04000u +#define WT_TIMING_STRESS_SPLIT_5 0x08000u +#define WT_TIMING_STRESS_SPLIT_6 0x10000u +#define WT_TIMING_STRESS_SPLIT_7 0x20000u +#define WT_TIMING_STRESS_SPLIT_8 0x40000u /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */ uint64_t timing_stress_flags; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 8061ce88008..601eff81c29 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -20,6 +20,8 @@ extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool vi WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_failpoint(WT_SESSION_IMPL *session, uint64_t conn_flag, double probability) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation) @@ -355,6 +357,10 @@ extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_reserved_session_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_reserved_session_init(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) @@ -779,8 +785,8 @@ extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, - bool *cache_write_hs, bool checkpoint_running) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_open(WT_SESSION_IMPL *session, const char **cfg) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index e590bae7f20..4b6b9cec111 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -733,6 +733,8 @@ struct __wt_connection_stats { int64_t txn_prepare_commit; int64_t txn_prepare_active; int64_t txn_prepare_rollback; + int64_t txn_prepare_rollback_do_not_remove_hs_update; + int64_t txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid; int64_t txn_query_ts; int64_t txn_read_race_prepare_update; int64_t txn_rts; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 6b84061ff82..5dce2471949 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -153,6 +153,8 @@ struct __wt_txn_global { volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ WT_TXN_SHARED checkpoint_txn_shared; /* Checkpoint's txn shared state */ wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */ + volatile uint64_t checkpoint_reserved_txn_id; /* A transaction ID reserved by checkpoint for + prepared transaction resolution. */ volatile uint64_t debug_ops; /* Debug mode op counter */ uint64_t debug_rollback; /* Debug mode rollback */ diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index e292d1c74fd..5b12a73e017 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5988,167 +5988,177 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1418 /*! transaction: prepared transactions rolled back */ #define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1419 +/*! + * transaction: prepared transactions rolled back and do not remove the + * history store entry + */ +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_DO_NOT_REMOVE_HS_UPDATE 1420 +/*! + * transaction: prepared transactions rolled back and fix the history + * store entry with checkpoint reserved transaction id + */ +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_FIX_HS_UPDATE_WITH_CKPT_RESERVED_TXNID 1421 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1420 +#define WT_STAT_CONN_TXN_QUERY_TS 1422 /*! transaction: race to read prepared update retry */ -#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1421 +#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1423 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_RTS 1422 +#define WT_STAT_CONN_TXN_RTS 1424 /*! * transaction: rollback to stable history store records with stop * timestamps older than newer records */ -#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1423 +#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1425 /*! transaction: rollback to stable inconsistent checkpoint */ -#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1424 +#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1426 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1425 +#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1427 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1426 +#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1428 /*! transaction: rollback to stable pages visited */ -#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1427 +#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1429 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1428 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1430 /*! transaction: rollback to stable restored updates from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1429 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1431 /*! transaction: rollback to stable skipping delete rle */ -#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1430 +#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1432 /*! transaction: rollback to stable skipping stable rle */ -#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1431 +#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1433 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1432 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1434 /*! transaction: rollback to stable tree walk skipping pages */ -#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1433 +#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1435 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1434 +#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1436 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1435 +#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1437 /*! transaction: sessions scanned in each walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1436 +#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1438 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1437 +#define WT_STAT_CONN_TXN_SET_TS 1439 /*! transaction: set timestamp durable calls */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1438 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1440 /*! transaction: set timestamp durable updates */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1439 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1441 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1440 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1442 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1441 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1443 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1442 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1444 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1443 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1445 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1444 +#define WT_STAT_CONN_TXN_BEGIN 1446 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1445 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1447 /*! * transaction: transaction checkpoint currently running for history * store file */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1446 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1448 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1447 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1449 /*! * transaction: transaction checkpoint history store file duration * (usecs) */ -#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1448 +#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1450 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1449 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1451 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1450 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1452 /*! * transaction: transaction checkpoint most recent duration for gathering * all handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1451 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1453 /*! * transaction: transaction checkpoint most recent duration for gathering * applied handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1452 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1454 /*! * transaction: transaction checkpoint most recent duration for gathering * skipped handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1453 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1455 /*! transaction: transaction checkpoint most recent handles applied */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1454 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1456 /*! transaction: transaction checkpoint most recent handles skipped */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1455 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1457 /*! transaction: transaction checkpoint most recent handles walked */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1456 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1458 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1457 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1459 /*! transaction: transaction checkpoint prepare currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1458 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1460 /*! transaction: transaction checkpoint prepare max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1459 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1461 /*! transaction: transaction checkpoint prepare min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1460 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1462 /*! transaction: transaction checkpoint prepare most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1461 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1463 /*! transaction: transaction checkpoint prepare total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1462 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1464 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1463 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1465 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1464 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1466 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1465 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1467 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1466 +#define WT_STAT_CONN_TXN_CHECKPOINT 1468 /*! transaction: transaction checkpoints due to obsolete pages */ -#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1467 +#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1469 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1468 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1470 /*! transaction: transaction failures due to history store */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1469 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1471 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1470 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1472 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1471 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1473 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1472 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1474 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1473 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1475 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1474 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1476 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1475 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1477 /*! * transaction: transaction range of timestamps pinned by the oldest * active read timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1476 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1478 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1477 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1479 /*! transaction: transaction read timestamp of the oldest active reader */ -#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1478 +#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1480 /*! transaction: transaction rollback to stable currently running */ -#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1479 +#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1481 /*! transaction: transaction walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_WALK_SESSIONS 1480 +#define WT_STAT_CONN_TXN_WALK_SESSIONS 1482 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1481 +#define WT_STAT_CONN_TXN_COMMIT 1483 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1482 +#define WT_STAT_CONN_TXN_ROLLBACK 1484 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1483 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1485 /*! * @} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 99d887da573..002086f540e 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -931,6 +931,11 @@ __wt_rec_row_leaf( WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, tmpkey, WT_TS_NONE, false, F_ISSET(r, WT_REC_CHECKPOINT_RUNNING))); + /* Fail 1% of the time. */ + if (F_ISSET(r, WT_REC_EVICT) && + __wt_failpoint( + session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS, 1)) + WT_ERR(EBUSY); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal); WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal); } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index cf9d1be3175..03b5860c22e 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -2326,8 +2326,7 @@ __rec_hs_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r) for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) if (multi->supd != NULL) { - WT_ERR(__wt_hs_insert_updates( - session, r->page, multi, &r->cache_write_hs, F_ISSET(r, WT_REC_CHECKPOINT_RUNNING))); + WT_ERR(__wt_hs_insert_updates(session, r, multi)); if (!multi->supd_restore) { __wt_free(session, multi->supd); multi->supd_entries = 0; diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index 81212ff65da..62ed921edf5 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -437,6 +437,31 @@ __wt_ext_err_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char } /* + * __wt_failpoint -- + * A generic failpoint function, it will return true if the failpoint triggers. Takes a double + * representing the probability of the failpoint occurring. Supports percentages with two + * decimal places. + */ +bool +__wt_failpoint(WT_SESSION_IMPL *session, uint64_t conn_flag, double probability) +{ + WT_CONNECTION_IMPL *conn; + uint32_t ratio; + + conn = S2C(session); + /* To support two decimal places we multiply the percent change of occurring by 100. */ + ratio = (uint32_t)(probability * 100); + + WT_ASSERT(session, probability >= 0 && probability <= 100); + + if (FLD_ISSET(conn->timing_stress_flags, conn_flag)) { + if (__wt_random(&session->rnd) % 10000 <= ratio) + return (true); + } + return (false); +} + +/* * __wt_verbose_worker -- * Verbose message. */ diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index c33d3b60bef..52db5e50094 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -1416,6 +1416,9 @@ static const char *const __stats_connection_desc[] = { "transaction: prepared transactions committed", "transaction: prepared transactions currently active", "transaction: prepared transactions rolled back", + "transaction: prepared transactions rolled back and do not remove the history store entry", + "transaction: prepared transactions rolled back and fix the history store entry with checkpoint " + "reserved transaction id", "transaction: query timestamp calls", "transaction: race to read prepared update retry", "transaction: rollback to stable calls", @@ -1942,6 +1945,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->txn_prepare_commit = 0; stats->txn_prepare_active = 0; stats->txn_prepare_rollback = 0; + stats->txn_prepare_rollback_do_not_remove_hs_update = 0; + stats->txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid = 0; stats->txn_query_ts = 0; stats->txn_read_race_prepare_update = 0; stats->txn_rts = 0; @@ -2477,6 +2482,10 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->txn_prepare_commit += WT_STAT_READ(from, txn_prepare_commit); to->txn_prepare_active += WT_STAT_READ(from, txn_prepare_active); to->txn_prepare_rollback += WT_STAT_READ(from, txn_prepare_rollback); + to->txn_prepare_rollback_do_not_remove_hs_update += + WT_STAT_READ(from, txn_prepare_rollback_do_not_remove_hs_update); + to->txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid += + WT_STAT_READ(from, txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid); to->txn_query_ts += WT_STAT_READ(from, txn_query_ts); to->txn_read_race_prepare_update += WT_STAT_READ(from, txn_read_race_prepare_update); to->txn_rts += WT_STAT_READ(from, txn_rts); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 8c0c2bcb305..29c183789e4 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -866,6 +866,7 @@ __txn_locate_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_PAGE * WT_PUBLISH(chain->next, upd); *upd_appended = true; + *fix_updp = upd; __wt_cache_page_inmem_incr(session, page, total_size); if (0) { @@ -992,6 +993,7 @@ __txn_fixup_prepared_update( WT_ITEM hs_value; WT_TIME_WINDOW tw; WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; uint32_t txn_flags; #ifdef HAVE_DIAGNOSTIC uint64_t hs_upd_type; @@ -999,6 +1001,7 @@ __txn_fixup_prepared_update( #endif txn = session->txn; + txn_global = &S2C(session)->txn_global; WT_TIME_WINDOW_INIT(&tw); /* @@ -1008,9 +1011,6 @@ __txn_fixup_prepared_update( txn_flags = FLD_MASK(txn->flags, WT_TXN_ERROR | WT_TXN_PREPARE); F_CLR(txn, txn_flags); - /* The value older than the prepared update in the history store must be a full value. */ - WT_ASSERT(session, fix_upd->type == WT_UPDATE_STANDARD); - /* * If the history update already has a stop time point and we are committing the prepared update * there is no work to do. @@ -1038,8 +1038,48 @@ __txn_fixup_prepared_update( hs_cursor->set_value(hs_cursor, &tw, tw.durable_stop_ts, tw.durable_start_ts, (uint64_t)WT_UPDATE_STANDARD, &hs_value); WT_ERR(hs_cursor->update(hs_cursor)); - } else - WT_ERR(hs_cursor->remove(hs_cursor)); + } else { + /* + * Remove the history store entry if a checkpoint is not running, otherwise place a + * tombstone in front of the history store entry if it doesn't have a stop timestamp. + */ + if (txn_global->checkpoint_running) { + /* Don't update the history store entry if the entry already has a stop timestamp. */ + if (fix_upd->type != WT_UPDATE_TOMBSTONE) { + /* + * When the history store's update start transaction id is greater than the + * checkpoint's reserved transaction id, the durable timestamp of this update is + * guaranteed to be greater than the checkpoint timestamp, as such there is no need + * to save this unstable update in the history store. + */ + if (fix_upd->txnid > txn_global->checkpoint_reserved_txn_id) + WT_ERR(hs_cursor->remove(hs_cursor)); + else { + tw.durable_stop_ts = fix_upd->durable_ts; + tw.stop_ts = fix_upd->start_ts; + + /* + * Set the stop transaction id of the time window to the checkpoint reserved + * transaction id. As such the tombstone won't be visible to rollback to stable, + * additionally checkpoint garbage collection cannot clean it up as it greater + * than the globally visible transaction id. + */ + tw.stop_txn = txn_global->checkpoint_reserved_txn_id; + WT_TIME_WINDOW_SET_START(&tw, fix_upd); + + hs_value.data = fix_upd->data; + hs_value.size = fix_upd->size; + hs_cursor->set_value(hs_cursor, &tw, tw.durable_stop_ts, tw.durable_start_ts, + (uint64_t)WT_UPDATE_STANDARD, &hs_value); + WT_ERR(hs_cursor->update(hs_cursor)); + WT_STAT_CONN_INCR( + session, txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid); + } + } else + WT_STAT_CONN_INCR(session, txn_prepare_rollback_do_not_remove_hs_update); + } else + WT_ERR(hs_cursor->remove(hs_cursor)); + } err: F_SET(txn, txn_flags); diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index c32cd43b0bc..1c5ce6c32ee 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -628,6 +628,34 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[ __wt_writeunlock(session, &txn_global->rwlock); /* + * Allocate a reserved transaction id that will be used for removing history entries when a + * prepare transaction rollback occurs in parallel to a checkpoint. Ensure that this transaction + * id is published before taking the checkpoint's snapshot. + * + * Other alternatives to solve the issue is by using a transaction id that is allocated after + * the second checkpoint snapshot. This approach has issues of using a stale reserved + * transaction id for the history store updates and the data store page is skipped in the + * checkpoint. To address the use of stale reserved transaction id, all the data store pages + * that have restored prepared updates need to get checkpointed forcefully. + * + * The checkpoint snapshot max can also be used for this purpose, instead of allocating a new + * reserved transaction id. This solution also have to force all the pages with restored + * prepared updates to be part of the current checkpoint. Therefore, we think it is better to + * use a dedicated transaction id as the checkpoint snapshot max is allocated to a session and + * used for other operations can lead to confusion when an issue occurs. + */ + if (conn->ckpt_reserved_session != NULL) { + WT_RET(__wt_txn_begin(conn->ckpt_reserved_session, NULL)); + WT_ERR(__wt_txn_id_check(conn->ckpt_reserved_session)); + txn_global->checkpoint_reserved_txn_id = conn->ckpt_reserved_session->txn->id; + + /* Add a one second wait to simulate reserved transaction id race with prepared rollback. */ + tsp.tv_sec = 1; + tsp.tv_nsec = 0; + __checkpoint_timing_stress(session, WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY, &tsp); + } + + /* * Refresh our snapshot here without publishing our shared ids to the world, doing so prevents * us from racing with the stable timestamp moving ahead of current snapshot. i.e. if the stable * timestamp moves after we begin the checkpoint transaction but before we set the checkpoint @@ -640,6 +668,13 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[ /* Flag as unused for non diagnostic builds. */ WT_UNUSED(original_snap_min); + /* Assert that the checkpoint reserved transaction id not visible in the checkpoint snapshot. */ + WT_ASSERT(session, + conn->ckpt_reserved_session == NULL || + !__wt_txn_visible_id_snapshot(txn_global->checkpoint_reserved_txn_id, + session->txn->snap_min, session->txn->snap_max, session->txn->snapshot, + session->txn->snapshot_count)); + if (use_timestamp) __wt_verbose_timestamp( session, txn_global->checkpoint_timestamp, "Checkpoint requested at stable timestamp"); @@ -657,6 +692,10 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[ __wt_epoch(session, &conn->ckpt_prep_end); WT_STAT_CONN_SET(session, txn_checkpoint_prep_running, 0); + +err: + if (conn->ckpt_reserved_session != NULL) + __wt_txn_release(conn->ckpt_reserved_session); return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index fb85adc9745..c19f63d84bc 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -326,12 +326,13 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page WT_DECL_RET; WT_TIME_WINDOW *hs_tw; WT_UPDATE *tombstone, *upd; - wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts; + wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts, pinned_ts; uint64_t hs_counter, type_full; uint32_t hs_btree_id; uint8_t *memp; uint8_t type; char ts_string[4][WT_TS_INT_STRING_SIZE]; + char tw_string[WT_TIME_STRING_SIZE]; bool valid_update_found; #ifdef HAVE_DIAGNOSTIC bool first_record; @@ -386,6 +387,8 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page WT_ERR(__wt_buf_set(session, full_value, full_value->data, full_value->size)); newer_hs_durable_ts = unpack->tw.durable_start_ts; + __wt_txn_pinned_timestamp(session, &pinned_ts); + /* Open a history store table cursor. */ WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor)); /* @@ -412,6 +415,26 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page hs_cursor, &hs_stop_durable_ts, &hs_durable_ts, &type_full, hs_value)); type = (uint8_t)type_full; + /* Retrieve the time window from the history cursor. */ + __wt_hs_upd_time_window(hs_cursor, &hs_tw); + + /* + * We have a tombstone on the history update and it is obsolete according to the timestamp + * and txnid, so no need to restore it. These obsolete updates are written to the disk when + * they are not obsolete at the time of reconciliation by an eviction thread and later they + * become obsolete according to the checkpoint. + */ + if (__rollback_txn_visible_id(session, hs_tw->stop_txn) && + hs_stop_durable_ts <= pinned_ts) { + __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), + "history store stop is obsolete with time window: %s and pinned timestamp: %s", + __wt_time_window_to_string(hs_tw, tw_string), + __wt_timestamp_to_string(pinned_ts, ts_string[0])); + WT_ERR(hs_cursor->remove(hs_cursor)); + WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed); + continue; + } + /* * Do not include history store updates greater than on-disk data store version to construct * a full update to restore except when the on-disk update is prepared. Including more @@ -446,6 +469,11 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page * records newer than or equal to the onpage value if eviction runs concurrently with * checkpoint. In that case, don't verify the first record. * + * It is possible during a prepared transaction rollback, the history store update that have + * its own stop timestamp doesn't get removed leads to duplicate records in history store + * after further operations on that same key. Rollback to stable should ignore such records + * for timestamp ordering verification. + * * If we have fixed the out-of-order timestamps, then the newer update reinserted with an * older timestamp may have a durable timestamp that is smaller than the current stop * durable timestamp. @@ -458,14 +486,12 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page */ WT_ASSERT(session, hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts || - hs_start_ts == newer_hs_durable_ts || first_record || hs_stop_durable_ts == WT_TS_MAX); + hs_start_ts == newer_hs_durable_ts || newer_hs_durable_ts == hs_durable_ts || + first_record || hs_stop_durable_ts == WT_TS_MAX); if (hs_stop_durable_ts < newer_hs_durable_ts) WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start); - /* Retrieve the time window from the history cursor. */ - __wt_hs_upd_time_window(hs_cursor, &hs_tw); - /* * Stop processing when we find a stable update according to the given timestamp and * transaction id. diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c index a2445225e2e..1c8b5135d49 100644 --- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c +++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c @@ -33,7 +33,6 @@ static WT_THREAD_RET clock_thread(void *); static int compare_cursors(WT_CURSOR *, const char *, WT_CURSOR *, const char *); static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int); static int real_checkpointer(void); -static int verify_consistency(WT_SESSION *, char *); /* * set_stable -- @@ -44,7 +43,11 @@ set_stable(void) { char buf[128]; - testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable)); + if (g.race_timetamps) + testutil_check(__wt_snprintf( + buf, sizeof(buf), "stable_timestamp=%x,oldest_timestamp=%x", g.ts_stable, g.ts_stable)); + else + testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable)); testutil_check(g.conn->set_timestamp(g.conn, buf)); } @@ -97,7 +100,14 @@ clock_thread(void *arg) while (g.running) { __wt_writelock(session, &g.clock_lock); - ++g.ts_stable; + if (g.prepare) + /* + * Leave a gap between timestamps so prepared insert followed by remove don't overlap + * with stable timestamp. + */ + g.ts_stable += 5; + else + ++g.ts_stable; set_stable(); if (g.ts_stable % 997 == 0) { /* @@ -147,6 +157,7 @@ real_checkpointer(void) { WT_RAND_STATE rnd; WT_SESSION *session; + wt_timestamp_t stable_ts, oldest_ts, verify_ts; uint64_t delay; int ret; char buf[128], timestamp_buf[64]; @@ -154,6 +165,7 @@ real_checkpointer(void) checkpoint_config = "use_timestamp=false"; g.ts_oldest = 0; + verify_ts = WT_TS_NONE; if (g.running == 0) return (log_print_err("Checkpoint thread started stopped\n", EINVAL, 1)); @@ -179,12 +191,18 @@ real_checkpointer(void) * Check for consistency of online data, here we don't expect to see the version at the * checkpoint just a consistent view across all tables. */ - if ((ret = verify_consistency(session, NULL)) != 0) + if ((ret = verify_consistency(session, WT_TS_NONE)) != 0) return (log_print_err("verify_consistency (online)", ret, 1)); if (g.use_timestamps) { - WT_ORDERED_READ(g.ts_oldest, g.ts_stable); testutil_check(g.conn->query_timestamp(g.conn, timestamp_buf, "get=stable")); + testutil_timestamp_parse(timestamp_buf, &stable_ts); + oldest_ts = g.ts_oldest; + if (stable_ts <= oldest_ts) + verify_ts = stable_ts; + else + verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts; + WT_ORDERED_READ(g.ts_oldest, g.ts_stable); } /* Execute a checkpoint */ @@ -201,7 +219,7 @@ real_checkpointer(void) * without timestamps as such we don't perform a verification here in the non-timestamped * scenario. */ - if (g.use_timestamps && (ret = verify_consistency(session, timestamp_buf)) != 0) + if (g.use_timestamps && (ret = verify_consistency(session, verify_ts)) != 0) return (log_print_err("verify_consistency (timestamps)", ret, 1)); /* Advance the oldest timestamp to the most recently set stable timestamp. */ @@ -229,8 +247,8 @@ done: * Open a cursor on each table at the last checkpoint and walk through the tables in parallel. * The key/values should match across all tables. */ -static int -verify_consistency(WT_SESSION *session, char *stable_timestamp) +int +verify_consistency(WT_SESSION *session, wt_timestamp_t verify_ts) { WT_CURSOR **cursors; uint64_t key_count; @@ -244,12 +262,11 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp) if (cursors == NULL) return (log_print_err("verify_consistency", ENOMEM, 1)); - if (stable_timestamp != NULL) { - testutil_check(__wt_snprintf( - cfg_buf, sizeof(cfg_buf), "isolation=snapshot,read_timestamp=%s", stable_timestamp)); - } else { + if (verify_ts != WT_TS_NONE) + testutil_check(__wt_snprintf(cfg_buf, sizeof(cfg_buf), + "isolation=snapshot,read_timestamp=%" PRIx64 ",roundup_timestamps=read", verify_ts)); + else testutil_check(__wt_snprintf(cfg_buf, sizeof(cfg_buf), "isolation=snapshot")); - } testutil_check(session->begin_transaction(session, cfg_buf)); for (i = 0; i < g.ntables; i++) { @@ -267,13 +284,19 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp) } while (ret == 0) { - ret = cursors[0]->next(cursors[0]); + while ((ret = cursors[0]->next(cursors[0])) != 0) { + if (ret == WT_NOTFOUND) + break; + if (ret != WT_PREPARE_CONFLICT) { + (void)log_print_err("cursor->next", ret, 1); + goto err; + } + __wt_yield(); + } + if (ret == 0) ++key_count; - else if (ret != WT_NOTFOUND) { - (void)log_print_err("cursor->next", ret, 1); - goto err; - } + /* * Check to see that all remaining cursors have the same key/value pair. */ @@ -283,10 +306,14 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp) */ if (g.cookies[i].type == LSM) continue; - t_ret = cursors[i]->next(cursors[i]); - if (t_ret != 0 && t_ret != WT_NOTFOUND) { - (void)log_print_err("cursor->next", t_ret, 1); - goto err; + while ((t_ret = cursors[i]->next(cursors[i])) != 0) { + if (t_ret == WT_NOTFOUND) + break; + if (t_ret != WT_PREPARE_CONFLICT) { + (void)log_print_err("cursor->next", t_ret, 1); + goto err; + } + __wt_yield(); } if (ret == WT_NOTFOUND && t_ret == WT_NOTFOUND) @@ -306,8 +333,8 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp) } } } - printf("Finished verifying a %s with %d tables and %" PRIu64 " keys\n", - stable_timestamp != NULL ? "checkpoint" : "snapshot", g.ntables, key_count); + printf("Finished verifying with %d tables and %" PRIu64 " keys at timestamp %" PRIu64 "\n", + g.ntables, key_count, verify_ts); fflush(stdout); err: diff --git a/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh b/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh new file mode 100755 index 00000000000..fc98ff2f463 --- /dev/null +++ b/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -x + +home=${1:-WT_TEST} +backup=$home.backup +recovery=$home.recovery + +#./t -t r -W 3 -D -X -n 100000 -k 100000 -C cache_size=100MB -h $home > $home.out 2>&1 & +./t -t r -s 2 -m -W 3 -D -p -x -n 100000 -k 100000 -C cache_size=100MB -h $home > $home.out 2>&1 & +pid=$! + +trap "kill -9 $pid" 0 1 2 3 13 15 + +# Wait for the test to start running +while ! grep -q "Finished a checkpoint" $home.out && kill -0 $pid ; do + sleep 1 +done + +while kill -STOP $pid ; do + rm -rf $backup $recovery ; mkdir $backup ; mkdir $recovery + # Make sure all threads are stopped before copying files + sleep 1 + cp $home/* $backup + kill -CONT $pid + cp $backup/* $recovery + ./t -t r -D -v -h $recovery || exit 1 +done + +exit 0 diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh index 962b1893305..c3398e261d5 100755 --- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh +++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh @@ -23,10 +23,10 @@ echo "checkpoint: 6 column-store tables, named checkpoint with prepare" $TEST_WRAPPER ./t -c 'TeSt' -T 6 -t c -p echo "checkpoint: column-store tables, stress history store. Sweep and timestamps" -$TEST_WRAPPER ./t -t c -W 3 -r 2 -D -s -x -n 100000 -k 100000 -C cache_size=100MB +$TEST_WRAPPER ./t -t c -W 3 -r 2 -D -s 1 -x -n 100000 -k 100000 -C cache_size=100MB echo "checkpoint: column-store tables, Sweep and timestamps" -$TEST_WRAPPER ./t -t c -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB +$TEST_WRAPPER ./t -t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB echo "checkpoint: 6 LSM tables" $TEST_WRAPPER ./t -T 6 -t l @@ -47,13 +47,13 @@ echo "checkpoint: 6 row-store tables, named checkpoint with prepare" $TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r -p echo "checkpoint: row-store tables, stress history store. Sweep and timestamps" -$TEST_WRAPPER ./t -t r -W 3 -r 2 -D -s -x -n 100000 -k 100000 -C cache_size=100MB +$TEST_WRAPPER ./t -t r -W 3 -r 2 -D -s 1 -x -n 100000 -k 100000 -C cache_size=100MB echo "checkpoint: row-store tables, Sweep and timestamps" -$TEST_WRAPPER ./t -t r -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB +$TEST_WRAPPER ./t -t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB echo "checkpoint: 3 mixed tables, with sweep" -$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s -n 100000 -k 100000 +$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s 1 -n 100000 -k 100000 echo "checkpoint: 3 mixed tables, with timestamps" $TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -x -n 100000 -k 100000 diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index 1d4c99a2b03..924c7c33749 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -45,9 +45,10 @@ int main(int argc, char *argv[]) { table_type ttype; - int ch, cnt, ret, runs; + int ch, cnt, i, ret, runs; char *working_dir; const char *config_open; + bool verify_only; (void)testutil_set_progname(argv); @@ -63,9 +64,14 @@ main(int argc, char *argv[]) g.ntables = 3; g.nworkers = 1; g.sweep_stress = g.use_timestamps = false; + g.failpoint_hs_delete_key_from_ts = g.failpoint_hs_insert_1 = g.failpoint_hs_insert_2 = false; + g.hs_checkpoint_timing_stress = g.reserved_txnid_timing_stress = false; + g.checkpoint_slow_timing_stress = false; + g.mixed_mode_deletes = false; runs = 1; + verify_only = false; - while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:n:pr:sT:t:W:x")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:mn:pr:s:T:t:vW:xX")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -88,6 +94,9 @@ main(int argc, char *argv[]) return (EXIT_FAILURE); } break; + case 'm': + g.mixed_mode_deletes = true; + break; case 'n': /* operations */ g.nops = (u_int)atoi(__wt_optarg); break; @@ -98,7 +107,31 @@ main(int argc, char *argv[]) runs = atoi(__wt_optarg); break; case 's': - g.sweep_stress = true; + switch (__wt_optarg[0]) { + case '1': + g.sweep_stress = true; + break; + case '2': + g.failpoint_hs_delete_key_from_ts = true; + break; + case '3': + g.failpoint_hs_insert_1 = true; + break; + case '4': + g.failpoint_hs_insert_2 = true; + break; + case '5': + g.hs_checkpoint_timing_stress = true; + break; + case '6': + g.reserved_txnid_timing_stress = true; + break; + case '7': + g.checkpoint_slow_timing_stress = true; + break; + default: + return (usage()); + } break; case 't': switch (__wt_optarg[0]) { @@ -121,12 +154,18 @@ main(int argc, char *argv[]) case 'T': g.ntables = atoi(__wt_optarg); break; + case 'v': + verify_only = true; + break; case 'W': g.nworkers = atoi(__wt_optarg); break; case 'x': g.use_timestamps = true; break; + case 'X': + g.use_timestamps = g.race_timetamps = true; + break; default: return (usage()); } @@ -145,7 +184,7 @@ main(int argc, char *argv[]) printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { - cleanup(cnt == 1); /* Clean up previous runs */ + cleanup(cnt == 1 && !verify_only); /* Clean up previous runs */ printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); @@ -155,6 +194,16 @@ main(int argc, char *argv[]) break; } + for (i = 0; i < g.ntables; ++i) { + g.cookies[i].id = i; + if (ttype == MIX) + g.cookies[i].type = (table_type)((i % MAX_TABLE_TYPE) + 1); + else + g.cookies[i].type = ttype; + testutil_check(__wt_snprintf( + g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id)); + } + g.running = 1; if ((ret = wt_connect(config_open)) != 0) { @@ -162,8 +211,20 @@ main(int argc, char *argv[]) break; } + if (verify_only) { + WT_SESSION *session; + + if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) { + (void)log_print_err("conn.open_session", ret, 1); + break; + } + + verify_consistency(session, WT_TS_NONE); + goto run_complete; + } + start_checkpoints(); - if ((ret = start_workers(ttype)) != 0) { + if ((ret = start_workers()) != 0) { (void)log_print_err("Start workers failed", ret, 1); break; } @@ -171,6 +232,7 @@ main(int argc, char *argv[]) g.running = 0; end_checkpoints(); +run_complete: free(g.cookies); g.cookies = NULL; if ((ret = wt_shutdown()) != 0) { @@ -187,7 +249,7 @@ main(int argc, char *argv[]) return (g.status); } -#define DEBUG_MODE_CFG ",debug_mode=(eviction=true,table_logging=true)" +#define DEBUG_MODE_CFG ",debug_mode=(eviction=true,table_logging=true),verbose=(recovery)" /* * wt_connect -- * Configure the WiredTiger connection. @@ -200,6 +262,24 @@ wt_connect(const char *config_open) }; int ret; char config[512]; + char timing_stress_cofing[512]; + bool timing_stress; + + timing_stress = false; + + if (g.sweep_stress || g.failpoint_hs_delete_key_from_ts || g.failpoint_hs_insert_1 || + g.failpoint_hs_insert_2 || g.hs_checkpoint_timing_stress || g.reserved_txnid_timing_stress || + g.checkpoint_slow_timing_stress) { + timing_stress = true; + testutil_check(__wt_snprintf(timing_stress_cofing, sizeof(timing_stress_cofing), + ",timing_stress_for_test=[%s%s%s%s%s%s%s]", g.sweep_stress ? "aggressive_sweep" : "", + g.failpoint_hs_delete_key_from_ts ? "failpoint_history_store_delete_key_from_ts" : "", + g.failpoint_hs_insert_1 ? "failpoint_history_store_insert_1" : "", + g.failpoint_hs_insert_2 ? "failpoint_history_store_insert_2" : "", + g.hs_checkpoint_timing_stress ? "history_store_checkpoint_delay" : "", + g.reserved_txnid_timing_stress ? "checkpoint_reserved_txnid_delay" : "", + g.checkpoint_slow_timing_stress ? "checkpoint_slow" : "")); + } /* * If we want to stress sweep, we have a lot of additional configuration settings to set. @@ -208,16 +288,17 @@ wt_connect(const char *config_open) testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),error_prefix=" "\"%s\",file_manager=(close_handle_minimum=1,close_idle_time=1,close_scan_interval=1)," - "log=(enabled),cache_size=1GB,timing_stress_for_test=(aggressive_sweep)%s%s%s", - progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open)); - else + "log=(enabled),cache_size=1GB%s%s%s%s", + progname, timing_stress_cofing, g.debug_mode ? DEBUG_MODE_CFG : "", + config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open)); + else { testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),error_prefix=" - "\"%s\"%s%s%s", + "\"%s\"%s%s%s%s", progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open)); - + config_open == NULL ? "" : config_open, timing_stress ? timing_stress_cofing : "")); + } + printf("WT open config: %s\n", config); if ((ret = wiredtiger_open(g.home, &event_handler, config, &g.conn)) != 0) return (log_print_err("wiredtiger_open", ret, 1)); return (0); @@ -338,8 +419,8 @@ static int usage(void) { fprintf(stderr, - "usage: %s [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t[-l log] [-n ops] " - "[-r runs] [-T table-config] [-t f|r|v]\n\t[-W workers]\n", + "usage: %s [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t[-l log] [-m] " + "[-n ops] [-r runs] [-s 1|2|3|4] [-T table-config] [-t f|r|v]\n\t[-W workers]\n", progname); fprintf(stderr, "%s", "\t-C specify wiredtiger_open configuration arguments\n" @@ -347,12 +428,23 @@ usage(void) "\t-h set a database home directory\n" "\t-k set number of keys to load\n" "\t-l specify a log file\n" + "\t-m run with mixed mode delete operations\n" "\t-n set number of operations each thread does\n" "\t-p use prepare\n" "\t-r set number of runs (0 for continuous)\n" + "\t-s specify which timing stress configuration to use ( 1 | 2 | 3 | 4 | 5 | 6 | 7 )\n" + "\t\t1: sweep_stress\n" + "\t\t2: failpoint_hs_delete_key_from_ts\n" + "\t\t3: failpoint_hs_insert_1\n" + "\t\t4: failpoint_hs_insert_2\n" + "\t\t5: hs_checkpoint_timing_stress\n" + "\t\t6: reserved_txnid_timing_stress\n" + "\t\t7: checkpoint_slow_timing_stress\n" "\t-T specify a table configuration\n" "\t-t set a file type ( col | mix | row | lsm )\n" + "\t-v verify only\n" "\t-W set number of worker threads\n" - "\t-x use timestamps\n"); + "\t-x use timestamps\n" + "\t-X race timestamp updates with checkpoints\n"); return (EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 7950fc8bb2e..b3b65c5d828 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -52,32 +52,41 @@ typedef struct { } COOKIE; typedef struct { - char *home; /* Home directory */ - const char *checkpoint_name; /* Checkpoint name */ - WT_CONNECTION *conn; /* WiredTiger connection */ - bool debug_mode; /* History store stress test */ - u_int nkeys; /* Keys to load */ - u_int nops; /* Operations per thread */ - FILE *logfp; /* Message log file. */ - int nworkers; /* Number workers configured */ - int ntables; /* Number tables configured */ - int ntables_created; /* Number tables opened */ - volatile int running; /* Whether to stop */ - int status; /* Exit status */ - bool sweep_stress; /* Sweep stress test */ - u_int ts_oldest; /* Current oldest timestamp */ - u_int ts_stable; /* Current stable timestamp */ - bool use_timestamps; /* Use txn timestamps */ - bool prepare; /* Use prepare transactions */ - COOKIE *cookies; /* Per-thread info */ - WT_RWLOCK clock_lock; /* Clock synchronization */ - wt_thread_t checkpoint_thread; /* Checkpoint thread */ - wt_thread_t clock_thread; /* Clock thread */ + char *home; /* Home directory */ + const char *checkpoint_name; /* Checkpoint name */ + WT_CONNECTION *conn; /* WiredTiger connection */ + bool debug_mode; /* History store stress test */ + u_int nkeys; /* Keys to load */ + u_int nops; /* Operations per thread */ + FILE *logfp; /* Message log file. */ + int nworkers; /* Number workers configured */ + int ntables; /* Number tables configured */ + int ntables_created; /* Number tables opened */ + volatile int running; /* Whether to stop */ + int status; /* Exit status */ + bool sweep_stress; /* Sweep stress test */ + bool failpoint_hs_delete_key_from_ts; /* Failpoint for hs key deletion. */ + bool failpoint_hs_insert_1; /* Failpoint for hs insertion. */ + bool failpoint_hs_insert_2; /* Failpoint for hs insertion. */ + bool hs_checkpoint_timing_stress; /* History store checkpoint timing stress */ + bool reserved_txnid_timing_stress; /* Reserved transaction id timing stress */ + bool checkpoint_slow_timing_stress; /* Checkpoint slow timing stress */ + u_int ts_oldest; /* Current oldest timestamp */ + u_int ts_stable; /* Current stable timestamp */ + bool mixed_mode_deletes; /* Run with mixed mode deletes */ + bool use_timestamps; /* Use txn timestamps */ + bool race_timetamps; /* Async update to oldest timestamp */ + bool prepare; /* Use prepare transactions */ + COOKIE *cookies; /* Per-thread info */ + WT_RWLOCK clock_lock; /* Clock synchronization */ + wt_thread_t checkpoint_thread; /* Checkpoint thread */ + wt_thread_t clock_thread; /* Clock thread */ } GLOBAL; extern GLOBAL g; void end_checkpoints(void); int log_print_err(const char *, int, int); void start_checkpoints(void); -int start_workers(table_type); +int start_workers(void); const char *type_to_string(table_type); +int verify_consistency(WT_SESSION *, wt_timestamp_t); diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c index 05b9a83b75b..de2798413ee 100644 --- a/src/third_party/wiredtiger/test/checkpoint/workers.c +++ b/src/third_party/wiredtiger/test/checkpoint/workers.c @@ -28,6 +28,9 @@ #include "test_checkpoint.h" +#define MAX_MODIFY_ENTRIES 5 + +static char modify_repl[256]; static int real_worker(void); static WT_THREAD_RET worker(void *); @@ -62,12 +65,25 @@ create_table(WT_SESSION *session, COOKIE *cookie) } /* + * modify_repl_init -- + * Initialize the replacement information. + */ +static void +modify_repl_init(void) +{ + size_t i; + + for (i = 0; i < sizeof(modify_repl); ++i) + modify_repl[i] = "0123456789"[i % 10]; +} + +/* * start_workers -- * Setup the configuration for the tables being populated, then start the worker thread(s) and * wait for them to finish. */ int -start_workers(table_type type) +start_workers(void) { struct timeval start, stop; WT_SESSION *session; @@ -77,6 +93,8 @@ start_workers(table_type type) ret = 0; + modify_repl_init(); + /* Create statistics and thread structures. */ if ((tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL) return (log_print_err("calloc", errno, 1)); @@ -85,16 +103,9 @@ start_workers(table_type type) (void)log_print_err("conn.open_session", ret, 1); goto err; } - /* Setup the cookies */ - for (i = 0; i < g.ntables; ++i) { - g.cookies[i].id = i; - if (type == MIX) - g.cookies[i].type = (table_type)((i % MAX_TABLE_TYPE) + 1); - else - g.cookies[i].type = type; - testutil_check(__wt_snprintf( - g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id)); + /* Create tables */ + for (i = 0; i < g.ntables; ++i) { /* Should probably be atomic to avoid races. */ if ((ret = create_table(session, &g.cookies[i])) != 0) goto err; @@ -123,13 +134,55 @@ err: } /* + * modify_build -- + * Generate a set of modify vectors. + */ +static void +modify_build(WT_MODIFY *entries, int *nentriesp, u_int seed) +{ + int i, nentries; + + /* Deterministically generate modifies based on the seed. */ + nentries = (int)seed % MAX_MODIFY_ENTRIES + 1; + for (i = 0; i < nentries; ++i) { + entries[i].data.data = modify_repl + seed % 10; + entries[i].data.size = seed % 8 + 1; + entries[i].offset = seed % 40; + entries[i].size = seed % 10 + 1; + } + + *nentriesp = (int)nentries; +} + +/* + * worker_mm_delete -- + * Delete a key with a mixed mode timestamp. + */ +static inline int +worker_mm_delete(WT_CURSOR *cursor, uint64_t keyno) +{ + int ret; + + cursor->set_key(cursor, keyno); + ret = cursor->search(cursor); + if (ret == 0) + ret = cursor->remove(cursor); + else if (ret == WT_NOTFOUND) + ret = 0; + + return (ret); +} + +/* * worker_op -- * Write operation. */ static inline int worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) { + WT_MODIFY entries[MAX_MODIFY_ENTRIES]; int cmp, ret; + int nentries; char valuebuf[64]; cursor->set_key(cursor, keyno); @@ -138,7 +191,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) if ((ret = cursor->search_near(cursor, &cmp)) != 0) { if (ret == WT_NOTFOUND) return (0); - if (ret == WT_ROLLBACK) + if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT) return (WT_ROLLBACK); return (log_print_err("cursor.search_near", ret, 1)); } @@ -169,13 +222,31 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) testutil_check(cursor->reset(cursor)); } else if (new_val % 39 < 10) { if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) { - if (ret == WT_ROLLBACK) + if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT) return (WT_ROLLBACK); return (log_print_err("cursor.search", ret, 1)); } if (g.sweep_stress) testutil_check(cursor->reset(cursor)); } else { + if (new_val % 39 < 30) { + // Do modify + ret = cursor->search(cursor); + if (ret == 0) { + modify_build(entries, &nentries, new_val); + if ((ret = cursor->modify(cursor, entries, nentries)) != 0) { + if (ret == WT_ROLLBACK) + return (WT_ROLLBACK); + return (log_print_err("cursor.modify", ret, 1)); + } + } else if (ret != WT_NOTFOUND) { + if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT) + return (WT_ROLLBACK); + return (log_print_err("cursor.search", ret, 1)); + } + } + + // If key doesn't exist, turn modify into an insert. testutil_check(__wt_snprintf(valuebuf, sizeof(valuebuf), "%052u", new_val)); cursor->set_value(cursor, valuebuf); if ((ret = cursor->insert(cursor)) != 0) { @@ -220,11 +291,12 @@ real_worker(void) int j, ret, t_ret; char buf[128]; const char *begin_cfg; - bool reopen_cursors, start_txn; + bool reopen_cursors, new_txn, start_txn; ret = t_ret = 0; reopen_cursors = false; start_txn = true; + new_txn = false; if ((cursors = calloc((size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL) return (log_print_err("malloc", ENOMEM, 1)); @@ -253,9 +325,37 @@ real_worker(void) (void)log_print_err("real_worker:begin_transaction", ret, 1); goto err; } + new_txn = true; start_txn = false; } keyno = __wt_random(&rnd) % g.nkeys + 1; + /* If we have specified to run with mix mode deletes we need to do it in it's own txn. */ + if (g.use_timestamps && g.mixed_mode_deletes && new_txn && __wt_random(&rnd) % 72 == 0) { + new_txn = false; + for (j = 0; ret == 0 && j < g.ntables; j++) { + ret = worker_mm_delete(cursors[j], keyno); + if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT) + break; + else if (ret != 0) + goto err; + } + + if (ret == 0) { + if ((ret = session->commit_transaction(session, NULL)) != 0) { + (void)log_print_err("real_worker:commit_mm_transaction", ret, 1); + goto err; + } + } else { + if ((ret = session->rollback_transaction(session, NULL)) != 0) { + (void)log_print_err("real_worker:rollback_transaction", ret, 1); + goto err; + } + } + start_txn = true; + continue; + } else + new_txn = false; + for (j = 0; ret == 0 && j < g.ntables; j++) ret = worker_op(cursors[j], keyno, i); if (ret != 0 && ret != WT_ROLLBACK) { @@ -271,6 +371,7 @@ real_worker(void) testutil_check(__wt_snprintf( buf, sizeof(buf), "prepare_timestamp=%x", g.ts_stable + 1)); if ((ret = session->prepare_transaction(session, buf)) != 0) { + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); (void)log_print_err("real_worker:prepare_transaction", ret, 1); goto err; } @@ -280,29 +381,45 @@ real_worker(void) } else testutil_check(__wt_snprintf( buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1)); - if ((ret = session->commit_transaction(session, buf)) != 0) { - __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); - (void)log_print_err("real_worker:commit_transaction", ret, 1); - goto err; + + // Commit majority of times + if (next_rnd % 49 != 0) { + if ((ret = session->commit_transaction(session, buf)) != 0) { + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + (void)log_print_err("real_worker:commit_transaction", ret, 1); + goto err; + } + } else { + if ((ret = session->rollback_transaction(session, NULL)) != 0) { + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + (void)log_print_err("real_worker:rollback_transaction", ret, 1); + goto err; + } } __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); start_txn = true; - /* Occasionally reopen cursors after committing. */ - if (next_rnd % 13 == 0) { + /* Occasionally reopen cursors after transaction finish. */ + if (next_rnd % 13 == 0) reopen_cursors = true; - } } } else { - if ((ret = session->commit_transaction(session, NULL)) != 0) { - (void)log_print_err("real_worker:commit_transaction", ret, 1); - goto err; + // Commit majority of times + if (next_rnd % 49 != 0) { + if ((ret = session->commit_transaction(session, NULL)) != 0) { + (void)log_print_err("real_worker:commit_transaction", ret, 1); + goto err; + } + } else { + if ((ret = session->rollback_transaction(session, NULL)) != 0) { + (void)log_print_err("real_worker:rollback_transaction", ret, 1); + goto err; + } } start_txn = true; } - } else if (next_rnd % 15 == 0) { + } else if (next_rnd % 15 == 0) /* Occasionally reopen cursors during a running transaction. */ reopen_cursors = true; - } } else { if ((ret = session->rollback_transaction(session, NULL)) != 0) { (void)log_print_err("real_worker:rollback_transaction", ret, 1); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 33b8d7a9112..e3c1afea11a 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -384,6 +384,14 @@ config_backward_compatible(void) config_single("disk.mmap_all=off", false); } + if (g.c_timing_stress_checkpoint_reserved_txnid_delay) { + if (config_is_perm("stress.checkpoint_reserved_txnid_delay")) + testutil_die(EINVAL, + "stress.checkpoint_reserved_txnid_delay not supported in backward compatibility " + "mode"); + config_single("stress.checkpoint_reserved_txnid_delay=off", false); + } + if (g.c_timing_stress_hs_sweep) { if (config_is_perm("stress.hs_sweep")) testutil_die(EINVAL, "stress.hs_sweep not supported in backward compatibility mode"); diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index 07a6e2603ff..c5456810e60 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -301,9 +301,25 @@ static CONFIG c[] = { {"stress.checkpoint", "stress checkpoints", C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL}, /* 2% */ + {"stress.checkpoint_reserved_txnid_delay", "stress checkpoint invisible transaction id delay", + C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint_reserved_txnid_delay, NULL}, + + /* 2% */ {"stress.checkpoint_prepare", "stress checkpoint prepare", C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint_prepare, NULL}, + /* 30% */ + {"stress.failpoint_hs_delete_key_from_ts", "stress failpoint history store delete key from ts", + C_BOOL, 30, 0, 0, &g.c_timing_stress_failpoint_hs_delete_key_from_ts, NULL}, + + /* 30% */ + {"stress.failpoint_hs_insert_1", "stress failpoint history store insert (#1)", C_BOOL, 30, 0, 0, + &g.c_timing_stress_failpoint_hs_insert_1, NULL}, + + /* 30% */ + {"stress.failpoint_hs_insert_2", "stress failpoint history store insert (#2)", C_BOOL, 30, 0, 0, + &g.c_timing_stress_failpoint_hs_insert_2, NULL}, + /* 2% */ {"stress.hs_checkpoint_delay", "stress history store checkpoint delay", C_BOOL, 2, 0, 0, &g.c_timing_stress_hs_checkpoint_delay, NULL}, diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 619060e1881..ecfb83a37ce 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -249,6 +249,10 @@ typedef struct { uint32_t c_timer; uint32_t c_timing_stress_aggressive_sweep; uint32_t c_timing_stress_checkpoint; + uint32_t c_timing_stress_checkpoint_reserved_txnid_delay; + uint32_t c_timing_stress_failpoint_hs_delete_key_from_ts; + uint32_t c_timing_stress_failpoint_hs_insert_1; + uint32_t c_timing_stress_failpoint_hs_insert_2; uint32_t c_timing_stress_hs_checkpoint_delay; uint32_t c_timing_stress_hs_search; uint32_t c_timing_stress_hs_sweep; diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index e27f54a42a9..d9f0c81a34c 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -264,6 +264,14 @@ create_database(const char *home, WT_CONNECTION **connp) CONFIG_APPEND(p, ",checkpoint_slow"); if (g.c_timing_stress_checkpoint_prepare) CONFIG_APPEND(p, ",prepare_checkpoint_delay"); + if (g.c_timing_stress_checkpoint_reserved_txnid_delay) + CONFIG_APPEND(p, ",checkpoint_reserved_txnid_delay"); + if (g.c_timing_stress_failpoint_hs_delete_key_from_ts) + CONFIG_APPEND(p, ",failpoint_history_store_delete_key_from_ts"); + if (g.c_timing_stress_failpoint_hs_insert_1) + CONFIG_APPEND(p, ",failpoint_history_store_insert_1"); + if (g.c_timing_stress_failpoint_hs_insert_2) + CONFIG_APPEND(p, ",failpoint_history_store_insert_2"); if (g.c_timing_stress_hs_checkpoint_delay) CONFIG_APPEND(p, ",history_store_checkpoint_delay"); if (g.c_timing_stress_hs_search) @@ -478,6 +486,47 @@ wts_open(const char *home, WT_CONNECTION **connp, WT_SESSION **sessionp, bool al if (enc != NULL) CONFIG_APPEND(p, ",encryption=(name=%s)", enc); + /* + * Timing stress options aren't persisted in the base config and need to be added to the + * configuration for re-open. + */ + CONFIG_APPEND(p, ",timing_stress_for_test=["); + if (g.c_timing_stress_aggressive_sweep) + CONFIG_APPEND(p, ",aggressive_sweep"); + if (g.c_timing_stress_checkpoint) + CONFIG_APPEND(p, ",checkpoint_slow"); + if (g.c_timing_stress_checkpoint_prepare) + CONFIG_APPEND(p, ",prepare_checkpoint_delay"); + if (g.c_timing_stress_failpoint_hs_delete_key_from_ts) + CONFIG_APPEND(p, ",failpoint_history_store_delete_key_from_ts"); + if (g.c_timing_stress_failpoint_hs_insert_1) + CONFIG_APPEND(p, ",failpoint_history_store_insert_1"); + if (g.c_timing_stress_failpoint_hs_insert_2) + CONFIG_APPEND(p, ",failpoint_history_store_insert_2"); + if (g.c_timing_stress_hs_checkpoint_delay) + CONFIG_APPEND(p, ",history_store_checkpoint_delay"); + if (g.c_timing_stress_hs_search) + CONFIG_APPEND(p, ",history_store_search"); + if (g.c_timing_stress_hs_sweep) + CONFIG_APPEND(p, ",history_store_sweep_race"); + if (g.c_timing_stress_split_1) + CONFIG_APPEND(p, ",split_1"); + if (g.c_timing_stress_split_2) + CONFIG_APPEND(p, ",split_2"); + if (g.c_timing_stress_split_3) + CONFIG_APPEND(p, ",split_3"); + if (g.c_timing_stress_split_4) + CONFIG_APPEND(p, ",split_4"); + if (g.c_timing_stress_split_5) + CONFIG_APPEND(p, ",split_5"); + if (g.c_timing_stress_split_6) + CONFIG_APPEND(p, ",split_6"); + if (g.c_timing_stress_split_7) + CONFIG_APPEND(p, ",split_7"); + if (g.c_timing_stress_split_8) + CONFIG_APPEND(p, ",split_8"); + CONFIG_APPEND(p, "]"); + /* If in-memory, there's only a single, shared WT_CONNECTION handle. */ if (g.c_in_memory != 0) conn = g.wts_conn_inmemory; diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py new file mode 100755 index 00000000000..d9cc9b9a5fa --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import threading, time +from helper import simulate_crash_restart +from test_rollback_to_stable01 import test_rollback_to_stable_base +from wiredtiger import stat, WT_NOTFOUND +from wtdataset import SimpleDataSet +from wtscenario import make_scenarios +from wtthread import checkpoint_thread + +# test_rollback_to_stable26.py +# Test the rollback to stable does properly restore the prepare rollback entry +# from the history store. +class test_rollback_to_stable26(test_rollback_to_stable_base): + session_config = 'isolation=snapshot' + + key_format_values = [ + ('column', dict(key_format='r')), + ('integer_row', dict(key_format='i')), + ] + + hs_remove_values = [ + ('no_hs_remove', dict(hs_remove=False)), + ('hs_remove', dict(hs_remove=True)) + ] + + prepare_remove_values = [ + ('no_prepare_remove', dict(prepare_remove=False)), + ('prepare_remove', dict(prepare_remove=True)) + ] + + scenarios = make_scenarios(key_format_values, hs_remove_values, prepare_remove_values) + + def conn_config(self): + config = 'cache_size=10MB,statistics=(all),timing_stress_for_test=[history_store_checkpoint_delay]' + return config + + def evict_cursor(self, uri, nrows): + # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. + evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") + self.session.begin_transaction("ignore_prepare=true") + for i in range (1, nrows + 1): + evict_cursor.set_key(i) + evict_cursor.search() + evict_cursor.reset() + evict_cursor.close() + self.session.rollback_transaction() + + def test_rollback_to_stable(self): + nrows = 10 + + # Create a table without logging. + uri = "table:rollback_to_stable26" + ds = SimpleDataSet( + self, uri, 0, key_format=self.key_format, value_format="S", config='log=(enabled=false)') + ds.populate() + + # Pin oldest and stable to timestamp 10. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) + + ',stable_timestamp=' + self.timestamp_str(10)) + + value_a = "aaaaa" * 100 + value_b = "bbbbb" * 100 + value_c = "ccccc" * 100 + value_d = "ddddd" * 100 + value_e = "eeeee" * 100 + + self.large_updates(uri, value_a, ds, nrows, False, 20) + self.large_updates(uri, value_b, ds, nrows, False, 30) + + if self.hs_remove: + self.large_removes(uri, ds, nrows, False, 40) + + prepare_session = self.conn.open_session() + prepare_session.begin_transaction() + cursor = prepare_session.open_cursor(uri) + for i in range (1, nrows + 1): + cursor[i] = value_c + if self.prepare_remove: + cursor.set_key(i) + self.assertEqual(cursor.remove(), 0) + cursor.close() + prepare_session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(50)) + + # Verify data is visible and correct. + self.check(value_a, uri, nrows, 20) + self.check(value_b, uri, nrows, 30) + + self.evict_cursor(uri, nrows) + + # Pin stable to timestamp 40. + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(40)) + + # Create a checkpoint thread + done = threading.Event() + ckpt = checkpoint_thread(self.conn, done) + try: + ckpt.start() + # Sleep for sometime so that checkpoint starts before committing last transaction. + time.sleep(5) + prepare_session.rollback_transaction() + finally: + done.set() + ckpt.join() + + self.large_updates(uri, value_d, ds, nrows, False, 60) + + # Check that the correct data. + self.check(value_a, uri, nrows, 20) + self.check(value_b, uri, nrows, 30) + self.check(value_d, uri, nrows, 60) + + # Simulate a server crash and restart. + simulate_crash_restart(self, ".", "RESTART") + + stat_cursor = self.session.open_cursor('statistics:', None, None) + hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] + hs_restore_updates = stat_cursor[stat.conn.txn_rts_hs_restore_updates][2] + keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2] + stat_cursor.close() + + self.assertEqual(keys_removed, 0) + self.assertEqual(hs_restore_updates, nrows) + self.assertEqual(hs_removed, nrows) + + # Check that the correct data. + self.check(value_a, uri, nrows, 20) + self.check(value_b, uri, nrows, 30) + + self.large_updates(uri, value_e, ds, nrows, False, 60) + + self.evict_cursor(uri, nrows) + + # Check that the correct data. + self.check(value_a, uri, nrows, 20) + self.check(value_b, uri, nrows, 30) + self.check(value_e, uri, nrows, 60) + +if __name__ == '__main__': + wttest.run() |