summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-09-13 09:56:46 +1000
committerLuke Chen <luke.chen@mongodb.com>2021-09-13 09:56:46 +1000
commit71f9ea50b97cf49ffc52ed33a8c798a77990ba80 (patch)
treea7c45c0300164319d2a6ecda11c7fde508713a5b
parent057ee3d434a3822bc007d7a34524b4eb0a1aa1ea (diff)
downloadmongo-71f9ea50b97cf49ffc52ed33a8c798a77990ba80.tar.gz
Import wiredtiger: 7f9d2bd3eefbb725301d6075db7ce48c3b33e159 from branch mongodb-4.4
ref: 823abc9cb3..7f9d2bd3ee for: 4.4.9 WT-7630 Fix a history store entry remove with checkpoint reserved transaction-id WT-7958 Include recovery in test/checkpoint WT-8032 Add fail points in reconciliation for history store inserts. WT-8047 Add mixed mode delete operations to test checkpoint WT-8056 Fix a bug in RTS that incorrectly restores an update from HS lead to the key removal
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py4
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c3
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c60
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c7
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c37
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c6
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c38
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h37
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h10
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in138
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c5
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c3
-rw-r--r--src/third_party/wiredtiger/src/support/err.c25
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c9
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c50
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c39
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c36
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/checkpointer.c75
-rwxr-xr-xsrc/third_party/wiredtiger/test/checkpoint/recovery-test.sh30
-rwxr-xr-xsrc/third_party/wiredtiger/test/checkpoint/smoke.sh10
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c124
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h53
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/workers.c167
-rw-r--r--src/third_party/wiredtiger/test/format/config.c8
-rw-r--r--src/third_party/wiredtiger/test/format/config.h16
-rw-r--r--src/third_party/wiredtiger/test/format/format.h4
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c49
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py166
33 files changed, 997 insertions, 223 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 4b4c98f6018..ca9d3f2ed6e 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -793,7 +793,9 @@ connection_runtime_config = [
intended for use with internal stress testing of WiredTiger.''',
type='list', undoc=True,
choices=[
- 'aggressive_sweep', 'backup_rename', 'checkpoint_slow', 'history_store_checkpoint_delay',
+ 'aggressive_sweep', 'backup_rename', 'checkpoint_reserved_txnid_delay', 'checkpoint_slow',
+ 'failpoint_history_store_delete_key_from_ts', 'failpoint_history_store_insert_1',
+ 'failpoint_history_store_insert_2', 'history_store_checkpoint_delay',
'history_store_search', 'history_store_sweep_race', 'prepare_checkpoint_delay', 'split_1',
'split_2', 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8']),
Config('verbose', '[]', r'''
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 1b0afc64eda..7ed2f3b6473 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -113,6 +113,7 @@ Decrement
Decrypt
DeleteFileW
Destructor
+Deterministically
EACCES
EAGAIN
EB
@@ -151,6 +152,7 @@ FTRUNCATE
FULLFSYNC
FUNCSIG
Facebook
+Failpoint
FindClose
FindFirstFile
FindNextFileW
@@ -784,6 +786,7 @@ extern
extlist
fadvise
fahrenheit
+failpoint
fallocate
fallthrough
fblocks
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index f35e56252a1..05efc9107ca 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -554,6 +554,8 @@ conn_stats = [
TxnStat('txn_prepare_active', 'prepared transactions currently active'),
TxnStat('txn_prepare_commit', 'prepared transactions committed'),
TxnStat('txn_prepare_rollback', 'prepared transactions rolled back'),
+ TxnStat('txn_prepare_rollback_do_not_remove_hs_update', 'prepared transactions rolled back and do not remove the history store entry'),
+ TxnStat('txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid', 'prepared transactions rolled back and fix the history store entry with checkpoint reserved transaction id'),
TxnStat('txn_prepared_updates_committed', 'Number of prepared updates committed'),
TxnStat('txn_prepared_updates', 'Number of prepared updates'),
TxnStat('txn_prepared_updates_key_repeated', 'Number of prepared updates repeated on the same key'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 9b01b52a1e1..fc19d257fb8 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "823abc9cb3d952b59276381a6181f5b1a0749cc9"
+ "commit": "7f9d2bd3eefbb725301d6075db7ce48c3b33e159"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index d718239a575..e2205b78047 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -1110,7 +1110,8 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
"disk %p",
(void *)page->dsk));
if (page->dsk != NULL)
- WT_RET(ds->f(ds, ", dsk_mem_size %" PRIu32, page->dsk->mem_size));
+ WT_RET(ds->f(ds, ", dsk_mem_size %" PRIu32 ", write_gen: %" PRIu64, page->dsk->mem_size,
+ page->dsk->write_gen));
WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index a3d9af315a4..cb0a104dc6b 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -138,10 +138,14 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 2},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
- "\"checkpoint_slow\",\"history_store_checkpoint_delay\","
- "\"history_store_search\",\"history_store_sweep_race\","
- "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\""
- ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
+ "\"failpoint_history_store_delete_key_from_ts\","
+ "\"failpoint_history_store_insert_1\","
+ "\"failpoint_history_store_insert_2\","
+ "\"history_store_checkpoint_delay\",\"history_store_search\","
+ "\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
+ "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
+ "\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0},
{"verbose", "list", NULL,
"choices=[\"api\",\"backup\",\"block\",\"checkpoint\","
@@ -863,10 +867,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
- "\"checkpoint_slow\",\"history_store_checkpoint_delay\","
- "\"history_store_search\",\"history_store_sweep_race\","
- "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\""
- ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
+ "\"failpoint_history_store_delete_key_from_ts\","
+ "\"failpoint_history_store_insert_1\","
+ "\"failpoint_history_store_insert_2\","
+ "\"history_store_checkpoint_delay\",\"history_store_search\","
+ "\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
+ "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
+ "\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
@@ -941,10 +949,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
- "\"checkpoint_slow\",\"history_store_checkpoint_delay\","
- "\"history_store_search\",\"history_store_sweep_race\","
- "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\""
- ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
+ "\"failpoint_history_store_delete_key_from_ts\","
+ "\"failpoint_history_store_insert_1\","
+ "\"failpoint_history_store_insert_2\","
+ "\"history_store_checkpoint_delay\",\"history_store_search\","
+ "\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
+ "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
+ "\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
@@ -1016,10 +1028,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
- "\"checkpoint_slow\",\"history_store_checkpoint_delay\","
- "\"history_store_search\",\"history_store_sweep_race\","
- "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\""
- ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
+ "\"failpoint_history_store_delete_key_from_ts\","
+ "\"failpoint_history_store_insert_1\","
+ "\"failpoint_history_store_insert_2\","
+ "\"history_store_checkpoint_delay\",\"history_store_search\","
+ "\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
+ "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
+ "\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
@@ -1089,10 +1105,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
- "\"checkpoint_slow\",\"history_store_checkpoint_delay\","
- "\"history_store_search\",\"history_store_sweep_race\","
- "\"prepare_checkpoint_delay\",\"split_1\",\"split_2\",\"split_3\""
- ",\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
+ "\"failpoint_history_store_delete_key_from_ts\","
+ "\"failpoint_history_store_insert_1\","
+ "\"failpoint_history_store_insert_2\","
+ "\"history_store_checkpoint_delay\",\"history_store_search\","
+ "\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
+ "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
+ "\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index a05b4b1c85d..95ca5acbac5 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2124,11 +2124,18 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
* Each split race delay is controlled using a different flag to allow more effective race
* condition detection, since enabling all delays at once can lead to an overall slowdown to the
* point where race conditions aren't encountered.
+ *
+ * Fail points are also defined in this list and will occur randomly when enabled.
*/
static const WT_NAME_FLAG stress_types[] = {
{"aggressive_sweep", WT_TIMING_STRESS_AGGRESSIVE_SWEEP},
{"backup_rename", WT_TIMING_STRESS_BACKUP_RENAME},
+ {"checkpoint_reserved_txnid_delay", WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY},
{"checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW},
+ {"failpoint_history_delete_key_from_ts",
+ WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS},
+ {"failpoint_history_store_insert_1", WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1},
+ {"failpoint_history_store_insert_2", WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2},
{"history_store_checkpoint_delay", WT_TIMING_STRESS_HS_CHECKPOINT_DELAY},
{"history_store_search", WT_TIMING_STRESS_HS_SEARCH},
{"history_store_sweep_race", WT_TIMING_STRESS_HS_SWEEP},
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 7ac53585134..221de9ffe54 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -240,3 +240,40 @@ __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize)
conn->ckpt_signalled = true;
}
}
+
+/*
+ * __wt_checkpoint_reserved_session_init --
+ * Initialize checkpoint reserved session.
+ */
+int
+__wt_checkpoint_reserved_session_init(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, conn->ckpt_reserved_session == NULL);
+
+ return (__wt_open_internal_session(
+ conn, "ckpt-reserved", false, WT_SESSION_NO_RECONCILE, 0, &conn->ckpt_reserved_session));
+}
+
+/*
+ * __wt_checkpoint_reserved_session_destroy --
+ * Release resources allocated for checkpoint reserved session.
+ */
+int
+__wt_checkpoint_reserved_session_destroy(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ if (conn->ckpt_reserved_session != NULL) {
+ WT_TRET(__wt_session_close_internal(conn->ckpt_reserved_session));
+ conn->ckpt_reserved_session = NULL;
+ }
+
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index b86ca4eb616..343e0046423 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -106,6 +106,9 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
/* Close open data handles. */
WT_TRET(__wt_conn_dhandle_discard(session));
+ /* Close the checkpoint reserved session. */
+ WT_TRET(__wt_checkpoint_reserved_session_destroy(session));
+
/* Shut down metadata tracking. */
WT_TRET(__wt_meta_track_destroy(session));
@@ -244,6 +247,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
/* Start the optional capacity thread. */
WT_RET(__wt_capacity_server_create(session, cfg));
+ /* Initialize checkpoint reserved session, required for the checkpoint operation. */
+ WT_RET(__wt_checkpoint_reserved_session_init(session));
+
/* Start the optional checkpoint thread. */
WT_RET(__wt_checkpoint_server_create(session, cfg));
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 8375de8ffdf..1f927309f94 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -151,16 +151,17 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
/*
* The same value should not be inserted again unless:
- * 1. the previous entry is already deleted (i.e. the stop timestamp is globally
+ * 1. The previous entry is already deleted (i.e. the stop timestamp is globally
* visible)
- * 2. it came from a different transaction
- * 3. it came from the same transaction but with a different timestamp
+ * 2. It came from a different transaction
+ * 3. It came from the same transaction but with a different timestamp
+ * 4. The prepared rollback left the history store entry when checkpoint is in progress.
*/
if (cmp == 0) {
if (!__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw) &&
tw->start_txn != WT_TXN_NONE &&
tw->start_txn == hs_cbt->upd_value->tw.start_txn &&
- tw->start_ts == hs_cbt->upd_value->tw.start_ts) {
+ tw->start_ts == hs_cbt->upd_value->tw.start_ts && tw->start_ts != tw->stop_ts) {
/*
* If we have issues with duplicate history store records, we want to be able to
* distinguish between modifies and full updates. Since modifies are not
@@ -169,7 +170,6 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
*/
WT_ASSERT(session,
type != WT_UPDATE_MODIFY && (uint8_t)upd_type_full_diag != WT_UPDATE_MODIFY);
- WT_ASSERT(session, false && "Duplicate values inserted into history store");
}
}
counter = hs_counter + 1;
@@ -286,8 +286,7 @@ __hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates,
* fails or succeeds, if there is a successful write to history, cache_write_hs is set to true.
*/
int
-__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
- bool *cache_write_hs, bool checkpoint_running)
+__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi)
{
WT_BTREE *btree, *hs_btree;
WT_CURSOR *hs_cursor;
@@ -313,9 +312,10 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
uint32_t i;
uint8_t *p;
int nentries;
- bool enable_reverse_modify, hs_inserted, squashed;
+ bool checkpoint_running, enable_reverse_modify, hs_inserted, squashed;
- *cache_write_hs = false;
+ checkpoint_running = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING);
+ r->cache_write_hs = false;
btree = S2BT(session);
prev_upd = NULL;
insert_cnt = 0;
@@ -366,7 +366,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
continue;
/* History store table key component: source key. */
- switch (page->type) {
+ switch (r->page->type) {
case WT_PAGE_COL_FIX:
case WT_PAGE_COL_VAR:
p = key->mem;
@@ -375,8 +375,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
break;
case WT_PAGE_ROW_LEAF:
if (list->ins == NULL) {
- WT_WITH_BTREE(
- session, btree, ret = __wt_row_leaf_key(session, page, list->ripcip, key, false));
+ WT_WITH_BTREE(session, btree,
+ ret = __wt_row_leaf_key(session, r->page, list->ripcip, key, false));
WT_ERR(ret);
} else {
key->data = WT_INSERT_KEY(list->ins);
@@ -384,7 +384,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
}
break;
default:
- WT_ERR(__wt_illegal_value(session, page->type));
+ WT_ERR(__wt_illegal_value(session, r->page->type));
}
first_globally_visible_upd = min_ts_upd = out_of_order_ts_upd = NULL;
@@ -645,6 +645,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
/* Clear out the insert success flag prior to our insert attempt. */
__wt_curhs_clear_insert_success(hs_cursor);
+ /* Fail here 0.05% of the time if we are in the eviction path. */
+ if (F_ISSET(r, WT_REC_EVICT) &&
+ __wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1, 0.05))
+ WT_ERR(EBUSY);
+
/*
* Calculate reverse modify and clear the history store records with timestamps when
* inserting the first update. Always write on-disk data store updates to the history
@@ -717,6 +722,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
__wt_update_vector_clear(&updates);
}
+ /* Fail here 0.5% of the time if we are an eviction thread. */
+ if (F_ISSET(r, WT_REC_EVICT) &&
+ __wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2, 0.05))
+ WT_ERR(EBUSY);
+
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
hs_btree = __wt_curhs_get_btree(hs_cursor);
max_hs_size = hs_btree->file_max;
@@ -731,7 +741,7 @@ err:
/* cache_write_hs is set to true as there was at least one successful write to history. */
if (insert_cnt > 0)
- *cache_write_hs = true;
+ r->cache_write_hs = true;
__wt_scr_free(session, &key);
/* modify_value is allocated in __wt_modify_pack. Free it if it is allocated. */
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 68a03cad84b..e40bc2acc27 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -473,7 +473,8 @@ struct __wt_connection_impl {
uint16_t log_req_min; /* Min required log version */
uint32_t txn_logsync; /* Log sync configuration */
- WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */
+ WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */
+ WT_SESSION_IMPL *ckpt_reserved_session; /* Checkpoint reserved session */
/*
* Is there a data/schema change that needs to be the part of a checkpoint.
@@ -594,21 +595,25 @@ struct __wt_connection_impl {
* Variable with flags for which subsystems the diagnostic stress timing delays have been requested.
*/
/* AUTOMATIC FLAG VALUE GENERATION START 0 */
-#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x0001u
-#define WT_TIMING_STRESS_BACKUP_RENAME 0x0002u
-#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x0004u
-#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x0008u
-#define WT_TIMING_STRESS_HS_SEARCH 0x0010u
-#define WT_TIMING_STRESS_HS_SWEEP 0x0020u
-#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x0040u
-#define WT_TIMING_STRESS_SPLIT_1 0x0080u
-#define WT_TIMING_STRESS_SPLIT_2 0x0100u
-#define WT_TIMING_STRESS_SPLIT_3 0x0200u
-#define WT_TIMING_STRESS_SPLIT_4 0x0400u
-#define WT_TIMING_STRESS_SPLIT_5 0x0800u
-#define WT_TIMING_STRESS_SPLIT_6 0x1000u
-#define WT_TIMING_STRESS_SPLIT_7 0x2000u
-#define WT_TIMING_STRESS_SPLIT_8 0x4000u
+#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x00001u
+#define WT_TIMING_STRESS_BACKUP_RENAME 0x00002u
+#define WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY 0x00004u
+#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x00008u
+#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x00010u
+#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_1 0x00020u
+#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_INSERT_2 0x00040u
+#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x00080u
+#define WT_TIMING_STRESS_HS_SEARCH 0x00100u
+#define WT_TIMING_STRESS_HS_SWEEP 0x00200u
+#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x00400u
+#define WT_TIMING_STRESS_SPLIT_1 0x00800u
+#define WT_TIMING_STRESS_SPLIT_2 0x01000u
+#define WT_TIMING_STRESS_SPLIT_3 0x02000u
+#define WT_TIMING_STRESS_SPLIT_4 0x04000u
+#define WT_TIMING_STRESS_SPLIT_5 0x08000u
+#define WT_TIMING_STRESS_SPLIT_6 0x10000u
+#define WT_TIMING_STRESS_SPLIT_7 0x20000u
+#define WT_TIMING_STRESS_SPLIT_8 0x40000u
/* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
uint64_t timing_stress_flags;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 8061ce88008..601eff81c29 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -20,6 +20,8 @@ extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool vi
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_failpoint(WT_SESSION_IMPL *session, uint64_t conn_flag, double probability)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation)
@@ -355,6 +357,10 @@ extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_reserved_session_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_reserved_session_init(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
@@ -779,8 +785,8 @@ extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
- bool *cache_write_hs, bool checkpoint_running) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_open(WT_SESSION_IMPL *session, const char **cfg)
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index e590bae7f20..4b6b9cec111 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -733,6 +733,8 @@ struct __wt_connection_stats {
int64_t txn_prepare_commit;
int64_t txn_prepare_active;
int64_t txn_prepare_rollback;
+ int64_t txn_prepare_rollback_do_not_remove_hs_update;
+ int64_t txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid;
int64_t txn_query_ts;
int64_t txn_read_race_prepare_update;
int64_t txn_rts;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 6b84061ff82..5dce2471949 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -153,6 +153,8 @@ struct __wt_txn_global {
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
WT_TXN_SHARED checkpoint_txn_shared; /* Checkpoint's txn shared state */
wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */
+ volatile uint64_t checkpoint_reserved_txn_id; /* A transaction ID reserved by checkpoint for
+ prepared transaction resolution. */
volatile uint64_t debug_ops; /* Debug mode op counter */
uint64_t debug_rollback; /* Debug mode rollback */
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index e292d1c74fd..5b12a73e017 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -5988,167 +5988,177 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1418
/*! transaction: prepared transactions rolled back */
#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1419
+/*!
+ * transaction: prepared transactions rolled back and do not remove the
+ * history store entry
+ */
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_DO_NOT_REMOVE_HS_UPDATE 1420
+/*!
+ * transaction: prepared transactions rolled back and fix the history
+ * store entry with checkpoint reserved transaction id
+ */
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_FIX_HS_UPDATE_WITH_CKPT_RESERVED_TXNID 1421
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1420
+#define WT_STAT_CONN_TXN_QUERY_TS 1422
/*! transaction: race to read prepared update retry */
-#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1421
+#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1423
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_RTS 1422
+#define WT_STAT_CONN_TXN_RTS 1424
/*!
* transaction: rollback to stable history store records with stop
* timestamps older than newer records
*/
-#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1423
+#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1425
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1424
+#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1426
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1425
+#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1427
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1426
+#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1428
/*! transaction: rollback to stable pages visited */
-#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1427
+#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1429
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1428
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1430
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1429
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1431
/*! transaction: rollback to stable skipping delete rle */
-#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1430
+#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1432
/*! transaction: rollback to stable skipping stable rle */
-#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1431
+#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1433
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1432
+#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1434
/*! transaction: rollback to stable tree walk skipping pages */
-#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1433
+#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1435
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1434
+#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1436
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1435
+#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1437
/*! transaction: sessions scanned in each walk of concurrent sessions */
-#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1436
+#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1438
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1437
+#define WT_STAT_CONN_TXN_SET_TS 1439
/*! transaction: set timestamp durable calls */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1438
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1440
/*! transaction: set timestamp durable updates */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1439
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1441
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1440
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1442
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1441
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1443
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1442
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1444
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1443
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1445
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1444
+#define WT_STAT_CONN_TXN_BEGIN 1446
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1445
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1447
/*!
* transaction: transaction checkpoint currently running for history
* store file
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1446
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1448
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1447
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1449
/*!
* transaction: transaction checkpoint history store file duration
* (usecs)
*/
-#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1448
+#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1450
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1449
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1451
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1450
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1452
/*!
* transaction: transaction checkpoint most recent duration for gathering
* all handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1451
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1453
/*!
* transaction: transaction checkpoint most recent duration for gathering
* applied handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1452
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1454
/*!
* transaction: transaction checkpoint most recent duration for gathering
* skipped handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1453
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1455
/*! transaction: transaction checkpoint most recent handles applied */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1454
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1456
/*! transaction: transaction checkpoint most recent handles skipped */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1455
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1457
/*! transaction: transaction checkpoint most recent handles walked */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1456
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1458
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1457
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1459
/*! transaction: transaction checkpoint prepare currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1458
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1460
/*! transaction: transaction checkpoint prepare max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1459
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1461
/*! transaction: transaction checkpoint prepare min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1460
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1462
/*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1461
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1463
/*! transaction: transaction checkpoint prepare total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1462
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1464
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1463
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1465
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1464
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1466
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1465
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1467
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1466
+#define WT_STAT_CONN_TXN_CHECKPOINT 1468
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1467
+#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1469
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1468
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1470
/*! transaction: transaction failures due to history store */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1469
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1471
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1470
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1472
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1471
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1473
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1472
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1474
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1473
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1475
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1474
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1476
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1475
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1477
/*!
* transaction: transaction range of timestamps pinned by the oldest
* active read timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1476
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1478
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1477
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1479
/*! transaction: transaction read timestamp of the oldest active reader */
-#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1478
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1480
/*! transaction: transaction rollback to stable currently running */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1479
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1481
/*! transaction: transaction walk of concurrent sessions */
-#define WT_STAT_CONN_TXN_WALK_SESSIONS 1480
+#define WT_STAT_CONN_TXN_WALK_SESSIONS 1482
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1481
+#define WT_STAT_CONN_TXN_COMMIT 1483
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1482
+#define WT_STAT_CONN_TXN_ROLLBACK 1484
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1483
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1485
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 99d887da573..002086f540e 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -931,6 +931,11 @@ __wt_rec_row_leaf(
WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, tmpkey,
WT_TS_NONE, false, F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)));
+ /* Fail 1% of the time. */
+ if (F_ISSET(r, WT_REC_EVICT) &&
+ __wt_failpoint(
+ session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS, 1))
+ WT_ERR(EBUSY);
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index cf9d1be3175..03b5860c22e 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -2326,8 +2326,7 @@ __rec_hs_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
if (multi->supd != NULL) {
- WT_ERR(__wt_hs_insert_updates(
- session, r->page, multi, &r->cache_write_hs, F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)));
+ WT_ERR(__wt_hs_insert_updates(session, r, multi));
if (!multi->supd_restore) {
__wt_free(session, multi->supd);
multi->supd_entries = 0;
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index 81212ff65da..62ed921edf5 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -437,6 +437,31 @@ __wt_ext_err_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char
}
/*
+ * __wt_failpoint --
+ * A generic failpoint function, it will return true if the failpoint triggers. Takes a double
+ * representing the probability of the failpoint occurring. Supports percentages with two
+ * decimal places.
+ */
+bool
+__wt_failpoint(WT_SESSION_IMPL *session, uint64_t conn_flag, double probability)
+{
+ WT_CONNECTION_IMPL *conn;
+ uint32_t ratio;
+
+ conn = S2C(session);
+ /* To support two decimal places we multiply the percent change of occurring by 100. */
+ ratio = (uint32_t)(probability * 100);
+
+ WT_ASSERT(session, probability >= 0 && probability <= 100);
+
+ if (FLD_ISSET(conn->timing_stress_flags, conn_flag)) {
+ if (__wt_random(&session->rnd) % 10000 <= ratio)
+ return (true);
+ }
+ return (false);
+}
+
+/*
* __wt_verbose_worker --
* Verbose message.
*/
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index c33d3b60bef..52db5e50094 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1416,6 +1416,9 @@ static const char *const __stats_connection_desc[] = {
"transaction: prepared transactions committed",
"transaction: prepared transactions currently active",
"transaction: prepared transactions rolled back",
+ "transaction: prepared transactions rolled back and do not remove the history store entry",
+ "transaction: prepared transactions rolled back and fix the history store entry with checkpoint "
+ "reserved transaction id",
"transaction: query timestamp calls",
"transaction: race to read prepared update retry",
"transaction: rollback to stable calls",
@@ -1942,6 +1945,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_prepare_commit = 0;
stats->txn_prepare_active = 0;
stats->txn_prepare_rollback = 0;
+ stats->txn_prepare_rollback_do_not_remove_hs_update = 0;
+ stats->txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid = 0;
stats->txn_query_ts = 0;
stats->txn_read_race_prepare_update = 0;
stats->txn_rts = 0;
@@ -2477,6 +2482,10 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_prepare_commit += WT_STAT_READ(from, txn_prepare_commit);
to->txn_prepare_active += WT_STAT_READ(from, txn_prepare_active);
to->txn_prepare_rollback += WT_STAT_READ(from, txn_prepare_rollback);
+ to->txn_prepare_rollback_do_not_remove_hs_update +=
+ WT_STAT_READ(from, txn_prepare_rollback_do_not_remove_hs_update);
+ to->txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid +=
+ WT_STAT_READ(from, txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid);
to->txn_query_ts += WT_STAT_READ(from, txn_query_ts);
to->txn_read_race_prepare_update += WT_STAT_READ(from, txn_read_race_prepare_update);
to->txn_rts += WT_STAT_READ(from, txn_rts);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 8c0c2bcb305..29c183789e4 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -866,6 +866,7 @@ __txn_locate_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_PAGE *
WT_PUBLISH(chain->next, upd);
*upd_appended = true;
+ *fix_updp = upd;
__wt_cache_page_inmem_incr(session, page, total_size);
if (0) {
@@ -992,6 +993,7 @@ __txn_fixup_prepared_update(
WT_ITEM hs_value;
WT_TIME_WINDOW tw;
WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
uint32_t txn_flags;
#ifdef HAVE_DIAGNOSTIC
uint64_t hs_upd_type;
@@ -999,6 +1001,7 @@ __txn_fixup_prepared_update(
#endif
txn = session->txn;
+ txn_global = &S2C(session)->txn_global;
WT_TIME_WINDOW_INIT(&tw);
/*
@@ -1008,9 +1011,6 @@ __txn_fixup_prepared_update(
txn_flags = FLD_MASK(txn->flags, WT_TXN_ERROR | WT_TXN_PREPARE);
F_CLR(txn, txn_flags);
- /* The value older than the prepared update in the history store must be a full value. */
- WT_ASSERT(session, fix_upd->type == WT_UPDATE_STANDARD);
-
/*
* If the history update already has a stop time point and we are committing the prepared update
* there is no work to do.
@@ -1038,8 +1038,48 @@ __txn_fixup_prepared_update(
hs_cursor->set_value(hs_cursor, &tw, tw.durable_stop_ts, tw.durable_start_ts,
(uint64_t)WT_UPDATE_STANDARD, &hs_value);
WT_ERR(hs_cursor->update(hs_cursor));
- } else
- WT_ERR(hs_cursor->remove(hs_cursor));
+ } else {
+ /*
+ * Remove the history store entry if a checkpoint is not running, otherwise place a
+ * tombstone in front of the history store entry if it doesn't have a stop timestamp.
+ */
+ if (txn_global->checkpoint_running) {
+ /* Don't update the history store entry if the entry already has a stop timestamp. */
+ if (fix_upd->type != WT_UPDATE_TOMBSTONE) {
+ /*
+ * When the history store's update start transaction id is greater than the
+ * checkpoint's reserved transaction id, the durable timestamp of this update is
+ * guaranteed to be greater than the checkpoint timestamp, as such there is no need
+ * to save this unstable update in the history store.
+ */
+ if (fix_upd->txnid > txn_global->checkpoint_reserved_txn_id)
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ else {
+ tw.durable_stop_ts = fix_upd->durable_ts;
+ tw.stop_ts = fix_upd->start_ts;
+
+ /*
+ * Set the stop transaction id of the time window to the checkpoint reserved
+ * transaction id. As such the tombstone won't be visible to rollback to stable,
+ * additionally checkpoint garbage collection cannot clean it up as it greater
+ * than the globally visible transaction id.
+ */
+ tw.stop_txn = txn_global->checkpoint_reserved_txn_id;
+ WT_TIME_WINDOW_SET_START(&tw, fix_upd);
+
+ hs_value.data = fix_upd->data;
+ hs_value.size = fix_upd->size;
+ hs_cursor->set_value(hs_cursor, &tw, tw.durable_stop_ts, tw.durable_start_ts,
+ (uint64_t)WT_UPDATE_STANDARD, &hs_value);
+ WT_ERR(hs_cursor->update(hs_cursor));
+ WT_STAT_CONN_INCR(
+ session, txn_prepare_rollback_fix_hs_update_with_ckpt_reserved_txnid);
+ }
+ } else
+ WT_STAT_CONN_INCR(session, txn_prepare_rollback_do_not_remove_hs_update);
+ } else
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ }
err:
F_SET(txn, txn_flags);
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index c32cd43b0bc..1c5ce6c32ee 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -628,6 +628,34 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
__wt_writeunlock(session, &txn_global->rwlock);
/*
+ * Allocate a reserved transaction id that will be used for removing history entries when a
+ * prepare transaction rollback occurs in parallel to a checkpoint. Ensure that this transaction
+ * id is published before taking the checkpoint's snapshot.
+ *
+ * Other alternatives to solve the issue is by using a transaction id that is allocated after
+ * the second checkpoint snapshot. This approach has issues of using a stale reserved
+ * transaction id for the history store updates and the data store page is skipped in the
+ * checkpoint. To address the use of stale reserved transaction id, all the data store pages
+ * that have restored prepared updates need to get checkpointed forcefully.
+ *
+ * The checkpoint snapshot max can also be used for this purpose, instead of allocating a new
+ * reserved transaction id. This solution also have to force all the pages with restored
+ * prepared updates to be part of the current checkpoint. Therefore, we think it is better to
+ * use a dedicated transaction id as the checkpoint snapshot max is allocated to a session and
+ * used for other operations can lead to confusion when an issue occurs.
+ */
+ if (conn->ckpt_reserved_session != NULL) {
+ WT_RET(__wt_txn_begin(conn->ckpt_reserved_session, NULL));
+ WT_ERR(__wt_txn_id_check(conn->ckpt_reserved_session));
+ txn_global->checkpoint_reserved_txn_id = conn->ckpt_reserved_session->txn->id;
+
+ /* Add a one second wait to simulate reserved transaction id race with prepared rollback. */
+ tsp.tv_sec = 1;
+ tsp.tv_nsec = 0;
+ __checkpoint_timing_stress(session, WT_TIMING_STRESS_CHECKPOINT_RESERVED_TXNID_DELAY, &tsp);
+ }
+
+ /*
* Refresh our snapshot here without publishing our shared ids to the world, doing so prevents
* us from racing with the stable timestamp moving ahead of current snapshot. i.e. if the stable
* timestamp moves after we begin the checkpoint transaction but before we set the checkpoint
@@ -640,6 +668,13 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
/* Flag as unused for non diagnostic builds. */
WT_UNUSED(original_snap_min);
+ /* Assert that the checkpoint reserved transaction id not visible in the checkpoint snapshot. */
+ WT_ASSERT(session,
+ conn->ckpt_reserved_session == NULL ||
+ !__wt_txn_visible_id_snapshot(txn_global->checkpoint_reserved_txn_id,
+ session->txn->snap_min, session->txn->snap_max, session->txn->snapshot,
+ session->txn->snapshot_count));
+
if (use_timestamp)
__wt_verbose_timestamp(
session, txn_global->checkpoint_timestamp, "Checkpoint requested at stable timestamp");
@@ -657,6 +692,10 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
__wt_epoch(session, &conn->ckpt_prep_end);
WT_STAT_CONN_SET(session, txn_checkpoint_prep_running, 0);
+
+err:
+ if (conn->ckpt_reserved_session != NULL)
+ __wt_txn_release(conn->ckpt_reserved_session);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index fb85adc9745..c19f63d84bc 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -326,12 +326,13 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
WT_DECL_RET;
WT_TIME_WINDOW *hs_tw;
WT_UPDATE *tombstone, *upd;
- wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts;
+ wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts, pinned_ts;
uint64_t hs_counter, type_full;
uint32_t hs_btree_id;
uint8_t *memp;
uint8_t type;
char ts_string[4][WT_TS_INT_STRING_SIZE];
+ char tw_string[WT_TIME_STRING_SIZE];
bool valid_update_found;
#ifdef HAVE_DIAGNOSTIC
bool first_record;
@@ -386,6 +387,8 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
WT_ERR(__wt_buf_set(session, full_value, full_value->data, full_value->size));
newer_hs_durable_ts = unpack->tw.durable_start_ts;
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+
/* Open a history store table cursor. */
WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
/*
@@ -412,6 +415,26 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
hs_cursor, &hs_stop_durable_ts, &hs_durable_ts, &type_full, hs_value));
type = (uint8_t)type_full;
+ /* Retrieve the time window from the history cursor. */
+ __wt_hs_upd_time_window(hs_cursor, &hs_tw);
+
+ /*
+ * We have a tombstone on the history update and it is obsolete according to the timestamp
+ * and txnid, so no need to restore it. These obsolete updates are written to the disk when
+ * they are not obsolete at the time of reconciliation by an eviction thread and later they
+ * become obsolete according to the checkpoint.
+ */
+ if (__rollback_txn_visible_id(session, hs_tw->stop_txn) &&
+ hs_stop_durable_ts <= pinned_ts) {
+ __wt_verbose(session, WT_VERB_RECOVERY_RTS(session),
+ "history store stop is obsolete with time window: %s and pinned timestamp: %s",
+ __wt_time_window_to_string(hs_tw, tw_string),
+ __wt_timestamp_to_string(pinned_ts, ts_string[0]));
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_removed);
+ continue;
+ }
+
/*
* Do not include history store updates greater than on-disk data store version to construct
* a full update to restore except when the on-disk update is prepared. Including more
@@ -446,6 +469,11 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
* records newer than or equal to the onpage value if eviction runs concurrently with
* checkpoint. In that case, don't verify the first record.
*
+ * It is possible during a prepared transaction rollback, the history store update that have
+ * its own stop timestamp doesn't get removed leads to duplicate records in history store
+ * after further operations on that same key. Rollback to stable should ignore such records
+ * for timestamp ordering verification.
+ *
* If we have fixed the out-of-order timestamps, then the newer update reinserted with an
* older timestamp may have a durable timestamp that is smaller than the current stop
* durable timestamp.
@@ -458,14 +486,12 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
*/
WT_ASSERT(session,
hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts ||
- hs_start_ts == newer_hs_durable_ts || first_record || hs_stop_durable_ts == WT_TS_MAX);
+ hs_start_ts == newer_hs_durable_ts || newer_hs_durable_ts == hs_durable_ts ||
+ first_record || hs_stop_durable_ts == WT_TS_MAX);
if (hs_stop_durable_ts < newer_hs_durable_ts)
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start);
- /* Retrieve the time window from the history cursor. */
- __wt_hs_upd_time_window(hs_cursor, &hs_tw);
-
/*
* Stop processing when we find a stable update according to the given timestamp and
* transaction id.
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index a2445225e2e..1c8b5135d49 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -33,7 +33,6 @@ static WT_THREAD_RET clock_thread(void *);
static int compare_cursors(WT_CURSOR *, const char *, WT_CURSOR *, const char *);
static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int);
static int real_checkpointer(void);
-static int verify_consistency(WT_SESSION *, char *);
/*
* set_stable --
@@ -44,7 +43,11 @@ set_stable(void)
{
char buf[128];
- testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable));
+ if (g.race_timetamps)
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "stable_timestamp=%x,oldest_timestamp=%x", g.ts_stable, g.ts_stable));
+ else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable));
testutil_check(g.conn->set_timestamp(g.conn, buf));
}
@@ -97,7 +100,14 @@ clock_thread(void *arg)
while (g.running) {
__wt_writelock(session, &g.clock_lock);
- ++g.ts_stable;
+ if (g.prepare)
+ /*
+ * Leave a gap between timestamps so prepared insert followed by remove don't overlap
+ * with stable timestamp.
+ */
+ g.ts_stable += 5;
+ else
+ ++g.ts_stable;
set_stable();
if (g.ts_stable % 997 == 0) {
/*
@@ -147,6 +157,7 @@ real_checkpointer(void)
{
WT_RAND_STATE rnd;
WT_SESSION *session;
+ wt_timestamp_t stable_ts, oldest_ts, verify_ts;
uint64_t delay;
int ret;
char buf[128], timestamp_buf[64];
@@ -154,6 +165,7 @@ real_checkpointer(void)
checkpoint_config = "use_timestamp=false";
g.ts_oldest = 0;
+ verify_ts = WT_TS_NONE;
if (g.running == 0)
return (log_print_err("Checkpoint thread started stopped\n", EINVAL, 1));
@@ -179,12 +191,18 @@ real_checkpointer(void)
* Check for consistency of online data, here we don't expect to see the version at the
* checkpoint just a consistent view across all tables.
*/
- if ((ret = verify_consistency(session, NULL)) != 0)
+ if ((ret = verify_consistency(session, WT_TS_NONE)) != 0)
return (log_print_err("verify_consistency (online)", ret, 1));
if (g.use_timestamps) {
- WT_ORDERED_READ(g.ts_oldest, g.ts_stable);
testutil_check(g.conn->query_timestamp(g.conn, timestamp_buf, "get=stable"));
+ testutil_timestamp_parse(timestamp_buf, &stable_ts);
+ oldest_ts = g.ts_oldest;
+ if (stable_ts <= oldest_ts)
+ verify_ts = stable_ts;
+ else
+ verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts;
+ WT_ORDERED_READ(g.ts_oldest, g.ts_stable);
}
/* Execute a checkpoint */
@@ -201,7 +219,7 @@ real_checkpointer(void)
* without timestamps as such we don't perform a verification here in the non-timestamped
* scenario.
*/
- if (g.use_timestamps && (ret = verify_consistency(session, timestamp_buf)) != 0)
+ if (g.use_timestamps && (ret = verify_consistency(session, verify_ts)) != 0)
return (log_print_err("verify_consistency (timestamps)", ret, 1));
/* Advance the oldest timestamp to the most recently set stable timestamp. */
@@ -229,8 +247,8 @@ done:
* Open a cursor on each table at the last checkpoint and walk through the tables in parallel.
* The key/values should match across all tables.
*/
-static int
-verify_consistency(WT_SESSION *session, char *stable_timestamp)
+int
+verify_consistency(WT_SESSION *session, wt_timestamp_t verify_ts)
{
WT_CURSOR **cursors;
uint64_t key_count;
@@ -244,12 +262,11 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp)
if (cursors == NULL)
return (log_print_err("verify_consistency", ENOMEM, 1));
- if (stable_timestamp != NULL) {
- testutil_check(__wt_snprintf(
- cfg_buf, sizeof(cfg_buf), "isolation=snapshot,read_timestamp=%s", stable_timestamp));
- } else {
+ if (verify_ts != WT_TS_NONE)
+ testutil_check(__wt_snprintf(cfg_buf, sizeof(cfg_buf),
+ "isolation=snapshot,read_timestamp=%" PRIx64 ",roundup_timestamps=read", verify_ts));
+ else
testutil_check(__wt_snprintf(cfg_buf, sizeof(cfg_buf), "isolation=snapshot"));
- }
testutil_check(session->begin_transaction(session, cfg_buf));
for (i = 0; i < g.ntables; i++) {
@@ -267,13 +284,19 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp)
}
while (ret == 0) {
- ret = cursors[0]->next(cursors[0]);
+ while ((ret = cursors[0]->next(cursors[0])) != 0) {
+ if (ret == WT_NOTFOUND)
+ break;
+ if (ret != WT_PREPARE_CONFLICT) {
+ (void)log_print_err("cursor->next", ret, 1);
+ goto err;
+ }
+ __wt_yield();
+ }
+
if (ret == 0)
++key_count;
- else if (ret != WT_NOTFOUND) {
- (void)log_print_err("cursor->next", ret, 1);
- goto err;
- }
+
/*
* Check to see that all remaining cursors have the same key/value pair.
*/
@@ -283,10 +306,14 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp)
*/
if (g.cookies[i].type == LSM)
continue;
- t_ret = cursors[i]->next(cursors[i]);
- if (t_ret != 0 && t_ret != WT_NOTFOUND) {
- (void)log_print_err("cursor->next", t_ret, 1);
- goto err;
+ while ((t_ret = cursors[i]->next(cursors[i])) != 0) {
+ if (t_ret == WT_NOTFOUND)
+ break;
+ if (t_ret != WT_PREPARE_CONFLICT) {
+ (void)log_print_err("cursor->next", t_ret, 1);
+ goto err;
+ }
+ __wt_yield();
}
if (ret == WT_NOTFOUND && t_ret == WT_NOTFOUND)
@@ -306,8 +333,8 @@ verify_consistency(WT_SESSION *session, char *stable_timestamp)
}
}
}
- printf("Finished verifying a %s with %d tables and %" PRIu64 " keys\n",
- stable_timestamp != NULL ? "checkpoint" : "snapshot", g.ntables, key_count);
+ printf("Finished verifying with %d tables and %" PRIu64 " keys at timestamp %" PRIu64 "\n",
+ g.ntables, key_count, verify_ts);
fflush(stdout);
err:
diff --git a/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh b/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh
new file mode 100755
index 00000000000..fc98ff2f463
--- /dev/null
+++ b/src/third_party/wiredtiger/test/checkpoint/recovery-test.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+set -x
+
+home=${1:-WT_TEST}
+backup=$home.backup
+recovery=$home.recovery
+
+#./t -t r -W 3 -D -X -n 100000 -k 100000 -C cache_size=100MB -h $home > $home.out 2>&1 &
+./t -t r -s 2 -m -W 3 -D -p -x -n 100000 -k 100000 -C cache_size=100MB -h $home > $home.out 2>&1 &
+pid=$!
+
+trap "kill -9 $pid" 0 1 2 3 13 15
+
+# Wait for the test to start running
+while ! grep -q "Finished a checkpoint" $home.out && kill -0 $pid ; do
+ sleep 1
+done
+
+while kill -STOP $pid ; do
+ rm -rf $backup $recovery ; mkdir $backup ; mkdir $recovery
+ # Make sure all threads are stopped before copying files
+ sleep 1
+ cp $home/* $backup
+ kill -CONT $pid
+ cp $backup/* $recovery
+ ./t -t r -D -v -h $recovery || exit 1
+done
+
+exit 0
diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
index 962b1893305..c3398e261d5 100755
--- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh
+++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
@@ -23,10 +23,10 @@ echo "checkpoint: 6 column-store tables, named checkpoint with prepare"
$TEST_WRAPPER ./t -c 'TeSt' -T 6 -t c -p
echo "checkpoint: column-store tables, stress history store. Sweep and timestamps"
-$TEST_WRAPPER ./t -t c -W 3 -r 2 -D -s -x -n 100000 -k 100000 -C cache_size=100MB
+$TEST_WRAPPER ./t -t c -W 3 -r 2 -D -s 1 -x -n 100000 -k 100000 -C cache_size=100MB
echo "checkpoint: column-store tables, Sweep and timestamps"
-$TEST_WRAPPER ./t -t c -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB
+$TEST_WRAPPER ./t -t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB
echo "checkpoint: 6 LSM tables"
$TEST_WRAPPER ./t -T 6 -t l
@@ -47,13 +47,13 @@ echo "checkpoint: 6 row-store tables, named checkpoint with prepare"
$TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r -p
echo "checkpoint: row-store tables, stress history store. Sweep and timestamps"
-$TEST_WRAPPER ./t -t r -W 3 -r 2 -D -s -x -n 100000 -k 100000 -C cache_size=100MB
+$TEST_WRAPPER ./t -t r -W 3 -r 2 -D -s 1 -x -n 100000 -k 100000 -C cache_size=100MB
echo "checkpoint: row-store tables, Sweep and timestamps"
-$TEST_WRAPPER ./t -t r -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB
+$TEST_WRAPPER ./t -t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB
echo "checkpoint: 3 mixed tables, with sweep"
-$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s -n 100000 -k 100000
+$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s 1 -n 100000 -k 100000
echo "checkpoint: 3 mixed tables, with timestamps"
$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -x -n 100000 -k 100000
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index 1d4c99a2b03..924c7c33749 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -45,9 +45,10 @@ int
main(int argc, char *argv[])
{
table_type ttype;
- int ch, cnt, ret, runs;
+ int ch, cnt, i, ret, runs;
char *working_dir;
const char *config_open;
+ bool verify_only;
(void)testutil_set_progname(argv);
@@ -63,9 +64,14 @@ main(int argc, char *argv[])
g.ntables = 3;
g.nworkers = 1;
g.sweep_stress = g.use_timestamps = false;
+ g.failpoint_hs_delete_key_from_ts = g.failpoint_hs_insert_1 = g.failpoint_hs_insert_2 = false;
+ g.hs_checkpoint_timing_stress = g.reserved_txnid_timing_stress = false;
+ g.checkpoint_slow_timing_stress = false;
+ g.mixed_mode_deletes = false;
runs = 1;
+ verify_only = false;
- while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:n:pr:sT:t:W:x")) != EOF)
+ while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:mn:pr:s:T:t:vW:xX")) != EOF)
switch (ch) {
case 'c':
g.checkpoint_name = __wt_optarg;
@@ -88,6 +94,9 @@ main(int argc, char *argv[])
return (EXIT_FAILURE);
}
break;
+ case 'm':
+ g.mixed_mode_deletes = true;
+ break;
case 'n': /* operations */
g.nops = (u_int)atoi(__wt_optarg);
break;
@@ -98,7 +107,31 @@ main(int argc, char *argv[])
runs = atoi(__wt_optarg);
break;
case 's':
- g.sweep_stress = true;
+ switch (__wt_optarg[0]) {
+ case '1':
+ g.sweep_stress = true;
+ break;
+ case '2':
+ g.failpoint_hs_delete_key_from_ts = true;
+ break;
+ case '3':
+ g.failpoint_hs_insert_1 = true;
+ break;
+ case '4':
+ g.failpoint_hs_insert_2 = true;
+ break;
+ case '5':
+ g.hs_checkpoint_timing_stress = true;
+ break;
+ case '6':
+ g.reserved_txnid_timing_stress = true;
+ break;
+ case '7':
+ g.checkpoint_slow_timing_stress = true;
+ break;
+ default:
+ return (usage());
+ }
break;
case 't':
switch (__wt_optarg[0]) {
@@ -121,12 +154,18 @@ main(int argc, char *argv[])
case 'T':
g.ntables = atoi(__wt_optarg);
break;
+ case 'v':
+ verify_only = true;
+ break;
case 'W':
g.nworkers = atoi(__wt_optarg);
break;
case 'x':
g.use_timestamps = true;
break;
+ case 'X':
+ g.use_timestamps = g.race_timetamps = true;
+ break;
default:
return (usage());
}
@@ -145,7 +184,7 @@ main(int argc, char *argv[])
printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
- cleanup(cnt == 1); /* Clean up previous runs */
+ cleanup(cnt == 1 && !verify_only); /* Clean up previous runs */
printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables);
@@ -155,6 +194,16 @@ main(int argc, char *argv[])
break;
}
+ for (i = 0; i < g.ntables; ++i) {
+ g.cookies[i].id = i;
+ if (ttype == MIX)
+ g.cookies[i].type = (table_type)((i % MAX_TABLE_TYPE) + 1);
+ else
+ g.cookies[i].type = ttype;
+ testutil_check(__wt_snprintf(
+ g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id));
+ }
+
g.running = 1;
if ((ret = wt_connect(config_open)) != 0) {
@@ -162,8 +211,20 @@ main(int argc, char *argv[])
break;
}
+ if (verify_only) {
+ WT_SESSION *session;
+
+ if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) {
+ (void)log_print_err("conn.open_session", ret, 1);
+ break;
+ }
+
+ verify_consistency(session, WT_TS_NONE);
+ goto run_complete;
+ }
+
start_checkpoints();
- if ((ret = start_workers(ttype)) != 0) {
+ if ((ret = start_workers()) != 0) {
(void)log_print_err("Start workers failed", ret, 1);
break;
}
@@ -171,6 +232,7 @@ main(int argc, char *argv[])
g.running = 0;
end_checkpoints();
+run_complete:
free(g.cookies);
g.cookies = NULL;
if ((ret = wt_shutdown()) != 0) {
@@ -187,7 +249,7 @@ main(int argc, char *argv[])
return (g.status);
}
-#define DEBUG_MODE_CFG ",debug_mode=(eviction=true,table_logging=true)"
+#define DEBUG_MODE_CFG ",debug_mode=(eviction=true,table_logging=true),verbose=(recovery)"
/*
* wt_connect --
* Configure the WiredTiger connection.
@@ -200,6 +262,24 @@ wt_connect(const char *config_open)
};
int ret;
char config[512];
+ char timing_stress_cofing[512];
+ bool timing_stress;
+
+ timing_stress = false;
+
+ if (g.sweep_stress || g.failpoint_hs_delete_key_from_ts || g.failpoint_hs_insert_1 ||
+ g.failpoint_hs_insert_2 || g.hs_checkpoint_timing_stress || g.reserved_txnid_timing_stress ||
+ g.checkpoint_slow_timing_stress) {
+ timing_stress = true;
+ testutil_check(__wt_snprintf(timing_stress_cofing, sizeof(timing_stress_cofing),
+ ",timing_stress_for_test=[%s%s%s%s%s%s%s]", g.sweep_stress ? "aggressive_sweep" : "",
+ g.failpoint_hs_delete_key_from_ts ? "failpoint_history_store_delete_key_from_ts" : "",
+ g.failpoint_hs_insert_1 ? "failpoint_history_store_insert_1" : "",
+ g.failpoint_hs_insert_2 ? "failpoint_history_store_insert_2" : "",
+ g.hs_checkpoint_timing_stress ? "history_store_checkpoint_delay" : "",
+ g.reserved_txnid_timing_stress ? "checkpoint_reserved_txnid_delay" : "",
+ g.checkpoint_slow_timing_stress ? "checkpoint_slow" : ""));
+ }
/*
* If we want to stress sweep, we have a lot of additional configuration settings to set.
@@ -208,16 +288,17 @@ wt_connect(const char *config_open)
testutil_check(__wt_snprintf(config, sizeof(config),
"create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),error_prefix="
"\"%s\",file_manager=(close_handle_minimum=1,close_idle_time=1,close_scan_interval=1),"
- "log=(enabled),cache_size=1GB,timing_stress_for_test=(aggressive_sweep)%s%s%s",
- progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
- else
+ "log=(enabled),cache_size=1GB%s%s%s%s",
+ progname, timing_stress_cofing, g.debug_mode ? DEBUG_MODE_CFG : "",
+ config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open));
+ else {
testutil_check(__wt_snprintf(config, sizeof(config),
"create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),error_prefix="
- "\"%s\"%s%s%s",
+ "\"%s\"%s%s%s%s",
progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
-
+ config_open == NULL ? "" : config_open, timing_stress ? timing_stress_cofing : ""));
+ }
+ printf("WT open config: %s\n", config);
if ((ret = wiredtiger_open(g.home, &event_handler, config, &g.conn)) != 0)
return (log_print_err("wiredtiger_open", ret, 1));
return (0);
@@ -338,8 +419,8 @@ static int
usage(void)
{
fprintf(stderr,
- "usage: %s [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t[-l log] [-n ops] "
- "[-r runs] [-T table-config] [-t f|r|v]\n\t[-W workers]\n",
+ "usage: %s [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t[-l log] [-m] "
+ "[-n ops] [-r runs] [-s 1|2|3|4] [-T table-config] [-t f|r|v]\n\t[-W workers]\n",
progname);
fprintf(stderr, "%s",
"\t-C specify wiredtiger_open configuration arguments\n"
@@ -347,12 +428,23 @@ usage(void)
"\t-h set a database home directory\n"
"\t-k set number of keys to load\n"
"\t-l specify a log file\n"
+ "\t-m run with mixed mode delete operations\n"
"\t-n set number of operations each thread does\n"
"\t-p use prepare\n"
"\t-r set number of runs (0 for continuous)\n"
+ "\t-s specify which timing stress configuration to use ( 1 | 2 | 3 | 4 | 5 | 6 | 7 )\n"
+ "\t\t1: sweep_stress\n"
+ "\t\t2: failpoint_hs_delete_key_from_ts\n"
+ "\t\t3: failpoint_hs_insert_1\n"
+ "\t\t4: failpoint_hs_insert_2\n"
+ "\t\t5: hs_checkpoint_timing_stress\n"
+ "\t\t6: reserved_txnid_timing_stress\n"
+ "\t\t7: checkpoint_slow_timing_stress\n"
"\t-T specify a table configuration\n"
"\t-t set a file type ( col | mix | row | lsm )\n"
+ "\t-v verify only\n"
"\t-W set number of worker threads\n"
- "\t-x use timestamps\n");
+ "\t-x use timestamps\n"
+ "\t-X race timestamp updates with checkpoints\n");
return (EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index 7950fc8bb2e..b3b65c5d828 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -52,32 +52,41 @@ typedef struct {
} COOKIE;
typedef struct {
- char *home; /* Home directory */
- const char *checkpoint_name; /* Checkpoint name */
- WT_CONNECTION *conn; /* WiredTiger connection */
- bool debug_mode; /* History store stress test */
- u_int nkeys; /* Keys to load */
- u_int nops; /* Operations per thread */
- FILE *logfp; /* Message log file. */
- int nworkers; /* Number workers configured */
- int ntables; /* Number tables configured */
- int ntables_created; /* Number tables opened */
- volatile int running; /* Whether to stop */
- int status; /* Exit status */
- bool sweep_stress; /* Sweep stress test */
- u_int ts_oldest; /* Current oldest timestamp */
- u_int ts_stable; /* Current stable timestamp */
- bool use_timestamps; /* Use txn timestamps */
- bool prepare; /* Use prepare transactions */
- COOKIE *cookies; /* Per-thread info */
- WT_RWLOCK clock_lock; /* Clock synchronization */
- wt_thread_t checkpoint_thread; /* Checkpoint thread */
- wt_thread_t clock_thread; /* Clock thread */
+ char *home; /* Home directory */
+ const char *checkpoint_name; /* Checkpoint name */
+ WT_CONNECTION *conn; /* WiredTiger connection */
+ bool debug_mode; /* History store stress test */
+ u_int nkeys; /* Keys to load */
+ u_int nops; /* Operations per thread */
+ FILE *logfp; /* Message log file. */
+ int nworkers; /* Number workers configured */
+ int ntables; /* Number tables configured */
+ int ntables_created; /* Number tables opened */
+ volatile int running; /* Whether to stop */
+ int status; /* Exit status */
+ bool sweep_stress; /* Sweep stress test */
+ bool failpoint_hs_delete_key_from_ts; /* Failpoint for hs key deletion. */
+ bool failpoint_hs_insert_1; /* Failpoint for hs insertion. */
+ bool failpoint_hs_insert_2; /* Failpoint for hs insertion. */
+ bool hs_checkpoint_timing_stress; /* History store checkpoint timing stress */
+ bool reserved_txnid_timing_stress; /* Reserved transaction id timing stress */
+ bool checkpoint_slow_timing_stress; /* Checkpoint slow timing stress */
+ u_int ts_oldest; /* Current oldest timestamp */
+ u_int ts_stable; /* Current stable timestamp */
+ bool mixed_mode_deletes; /* Run with mixed mode deletes */
+ bool use_timestamps; /* Use txn timestamps */
+ bool race_timetamps; /* Async update to oldest timestamp */
+ bool prepare; /* Use prepare transactions */
+ COOKIE *cookies; /* Per-thread info */
+ WT_RWLOCK clock_lock; /* Clock synchronization */
+ wt_thread_t checkpoint_thread; /* Checkpoint thread */
+ wt_thread_t clock_thread; /* Clock thread */
} GLOBAL;
extern GLOBAL g;
void end_checkpoints(void);
int log_print_err(const char *, int, int);
void start_checkpoints(void);
-int start_workers(table_type);
+int start_workers(void);
const char *type_to_string(table_type);
+int verify_consistency(WT_SESSION *, wt_timestamp_t);
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index 05b9a83b75b..de2798413ee 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -28,6 +28,9 @@
#include "test_checkpoint.h"
+#define MAX_MODIFY_ENTRIES 5
+
+static char modify_repl[256];
static int real_worker(void);
static WT_THREAD_RET worker(void *);
@@ -62,12 +65,25 @@ create_table(WT_SESSION *session, COOKIE *cookie)
}
/*
+ * modify_repl_init --
+ * Initialize the replacement information.
+ */
+static void
+modify_repl_init(void)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(modify_repl); ++i)
+ modify_repl[i] = "0123456789"[i % 10];
+}
+
+/*
* start_workers --
* Setup the configuration for the tables being populated, then start the worker thread(s) and
* wait for them to finish.
*/
int
-start_workers(table_type type)
+start_workers(void)
{
struct timeval start, stop;
WT_SESSION *session;
@@ -77,6 +93,8 @@ start_workers(table_type type)
ret = 0;
+ modify_repl_init();
+
/* Create statistics and thread structures. */
if ((tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL)
return (log_print_err("calloc", errno, 1));
@@ -85,16 +103,9 @@ start_workers(table_type type)
(void)log_print_err("conn.open_session", ret, 1);
goto err;
}
- /* Setup the cookies */
- for (i = 0; i < g.ntables; ++i) {
- g.cookies[i].id = i;
- if (type == MIX)
- g.cookies[i].type = (table_type)((i % MAX_TABLE_TYPE) + 1);
- else
- g.cookies[i].type = type;
- testutil_check(__wt_snprintf(
- g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id));
+ /* Create tables */
+ for (i = 0; i < g.ntables; ++i) {
/* Should probably be atomic to avoid races. */
if ((ret = create_table(session, &g.cookies[i])) != 0)
goto err;
@@ -123,13 +134,55 @@ err:
}
/*
+ * modify_build --
+ * Generate a set of modify vectors.
+ */
+static void
+modify_build(WT_MODIFY *entries, int *nentriesp, u_int seed)
+{
+ int i, nentries;
+
+ /* Deterministically generate modifies based on the seed. */
+ nentries = (int)seed % MAX_MODIFY_ENTRIES + 1;
+ for (i = 0; i < nentries; ++i) {
+ entries[i].data.data = modify_repl + seed % 10;
+ entries[i].data.size = seed % 8 + 1;
+ entries[i].offset = seed % 40;
+ entries[i].size = seed % 10 + 1;
+ }
+
+ *nentriesp = (int)nentries;
+}
+
+/*
+ * worker_mm_delete --
+ * Delete a key with a mixed mode timestamp.
+ */
+static inline int
+worker_mm_delete(WT_CURSOR *cursor, uint64_t keyno)
+{
+ int ret;
+
+ cursor->set_key(cursor, keyno);
+ ret = cursor->search(cursor);
+ if (ret == 0)
+ ret = cursor->remove(cursor);
+ else if (ret == WT_NOTFOUND)
+ ret = 0;
+
+ return (ret);
+}
+
+/*
* worker_op --
* Write operation.
*/
static inline int
worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
{
+ WT_MODIFY entries[MAX_MODIFY_ENTRIES];
int cmp, ret;
+ int nentries;
char valuebuf[64];
cursor->set_key(cursor, keyno);
@@ -138,7 +191,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
if ((ret = cursor->search_near(cursor, &cmp)) != 0) {
if (ret == WT_NOTFOUND)
return (0);
- if (ret == WT_ROLLBACK)
+ if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT)
return (WT_ROLLBACK);
return (log_print_err("cursor.search_near", ret, 1));
}
@@ -169,13 +222,31 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
testutil_check(cursor->reset(cursor));
} else if (new_val % 39 < 10) {
if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) {
- if (ret == WT_ROLLBACK)
+ if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT)
return (WT_ROLLBACK);
return (log_print_err("cursor.search", ret, 1));
}
if (g.sweep_stress)
testutil_check(cursor->reset(cursor));
} else {
+ if (new_val % 39 < 30) {
+ // Do modify
+ ret = cursor->search(cursor);
+ if (ret == 0) {
+ modify_build(entries, &nentries, new_val);
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0) {
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.modify", ret, 1));
+ }
+ } else if (ret != WT_NOTFOUND) {
+ if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.search", ret, 1));
+ }
+ }
+
+ // If key doesn't exist, turn modify into an insert.
testutil_check(__wt_snprintf(valuebuf, sizeof(valuebuf), "%052u", new_val));
cursor->set_value(cursor, valuebuf);
if ((ret = cursor->insert(cursor)) != 0) {
@@ -220,11 +291,12 @@ real_worker(void)
int j, ret, t_ret;
char buf[128];
const char *begin_cfg;
- bool reopen_cursors, start_txn;
+ bool reopen_cursors, new_txn, start_txn;
ret = t_ret = 0;
reopen_cursors = false;
start_txn = true;
+ new_txn = false;
if ((cursors = calloc((size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
return (log_print_err("malloc", ENOMEM, 1));
@@ -253,9 +325,37 @@ real_worker(void)
(void)log_print_err("real_worker:begin_transaction", ret, 1);
goto err;
}
+ new_txn = true;
start_txn = false;
}
keyno = __wt_random(&rnd) % g.nkeys + 1;
+ /* If we have specified to run with mix mode deletes we need to do it in it's own txn. */
+ if (g.use_timestamps && g.mixed_mode_deletes && new_txn && __wt_random(&rnd) % 72 == 0) {
+ new_txn = false;
+ for (j = 0; ret == 0 && j < g.ntables; j++) {
+ ret = worker_mm_delete(cursors[j], keyno);
+ if (ret == WT_ROLLBACK || ret == WT_PREPARE_CONFLICT)
+ break;
+ else if (ret != 0)
+ goto err;
+ }
+
+ if (ret == 0) {
+ if ((ret = session->commit_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:commit_mm_transaction", ret, 1);
+ goto err;
+ }
+ } else {
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:rollback_transaction", ret, 1);
+ goto err;
+ }
+ }
+ start_txn = true;
+ continue;
+ } else
+ new_txn = false;
+
for (j = 0; ret == 0 && j < g.ntables; j++)
ret = worker_op(cursors[j], keyno, i);
if (ret != 0 && ret != WT_ROLLBACK) {
@@ -271,6 +371,7 @@ real_worker(void)
testutil_check(__wt_snprintf(
buf, sizeof(buf), "prepare_timestamp=%x", g.ts_stable + 1));
if ((ret = session->prepare_transaction(session, buf)) != 0) {
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
(void)log_print_err("real_worker:prepare_transaction", ret, 1);
goto err;
}
@@ -280,29 +381,45 @@ real_worker(void)
} else
testutil_check(__wt_snprintf(
buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1));
- if ((ret = session->commit_transaction(session, buf)) != 0) {
- __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
- (void)log_print_err("real_worker:commit_transaction", ret, 1);
- goto err;
+
+ // Commit majority of times
+ if (next_rnd % 49 != 0) {
+ if ((ret = session->commit_transaction(session, buf)) != 0) {
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ (void)log_print_err("real_worker:commit_transaction", ret, 1);
+ goto err;
+ }
+ } else {
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ (void)log_print_err("real_worker:rollback_transaction", ret, 1);
+ goto err;
+ }
}
__wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
start_txn = true;
- /* Occasionally reopen cursors after committing. */
- if (next_rnd % 13 == 0) {
+ /* Occasionally reopen cursors after transaction finish. */
+ if (next_rnd % 13 == 0)
reopen_cursors = true;
- }
}
} else {
- if ((ret = session->commit_transaction(session, NULL)) != 0) {
- (void)log_print_err("real_worker:commit_transaction", ret, 1);
- goto err;
+ // Commit majority of times
+ if (next_rnd % 49 != 0) {
+ if ((ret = session->commit_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:commit_transaction", ret, 1);
+ goto err;
+ }
+ } else {
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:rollback_transaction", ret, 1);
+ goto err;
+ }
}
start_txn = true;
}
- } else if (next_rnd % 15 == 0) {
+ } else if (next_rnd % 15 == 0)
/* Occasionally reopen cursors during a running transaction. */
reopen_cursors = true;
- }
} else {
if ((ret = session->rollback_transaction(session, NULL)) != 0) {
(void)log_print_err("real_worker:rollback_transaction", ret, 1);
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 33b8d7a9112..e3c1afea11a 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -384,6 +384,14 @@ config_backward_compatible(void)
config_single("disk.mmap_all=off", false);
}
+ if (g.c_timing_stress_checkpoint_reserved_txnid_delay) {
+ if (config_is_perm("stress.checkpoint_reserved_txnid_delay"))
+ testutil_die(EINVAL,
+ "stress.checkpoint_reserved_txnid_delay not supported in backward compatibility "
+ "mode");
+ config_single("stress.checkpoint_reserved_txnid_delay=off", false);
+ }
+
if (g.c_timing_stress_hs_sweep) {
if (config_is_perm("stress.hs_sweep"))
testutil_die(EINVAL, "stress.hs_sweep not supported in backward compatibility mode");
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 07a6e2603ff..c5456810e60 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -301,9 +301,25 @@ static CONFIG c[] = {
{"stress.checkpoint", "stress checkpoints", C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL},
/* 2% */
+ {"stress.checkpoint_reserved_txnid_delay", "stress checkpoint invisible transaction id delay",
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint_reserved_txnid_delay, NULL},
+
+ /* 2% */
{"stress.checkpoint_prepare", "stress checkpoint prepare", C_BOOL, 2, 0, 0,
&g.c_timing_stress_checkpoint_prepare, NULL},
+ /* 30% */
+ {"stress.failpoint_hs_delete_key_from_ts", "stress failpoint history store delete key from ts",
+ C_BOOL, 30, 0, 0, &g.c_timing_stress_failpoint_hs_delete_key_from_ts, NULL},
+
+ /* 30% */
+ {"stress.failpoint_hs_insert_1", "stress failpoint history store insert (#1)", C_BOOL, 30, 0, 0,
+ &g.c_timing_stress_failpoint_hs_insert_1, NULL},
+
+ /* 30% */
+ {"stress.failpoint_hs_insert_2", "stress failpoint history store insert (#2)", C_BOOL, 30, 0, 0,
+ &g.c_timing_stress_failpoint_hs_insert_2, NULL},
+
/* 2% */
{"stress.hs_checkpoint_delay", "stress history store checkpoint delay", C_BOOL, 2, 0, 0,
&g.c_timing_stress_hs_checkpoint_delay, NULL},
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 619060e1881..ecfb83a37ce 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -249,6 +249,10 @@ typedef struct {
uint32_t c_timer;
uint32_t c_timing_stress_aggressive_sweep;
uint32_t c_timing_stress_checkpoint;
+ uint32_t c_timing_stress_checkpoint_reserved_txnid_delay;
+ uint32_t c_timing_stress_failpoint_hs_delete_key_from_ts;
+ uint32_t c_timing_stress_failpoint_hs_insert_1;
+ uint32_t c_timing_stress_failpoint_hs_insert_2;
uint32_t c_timing_stress_hs_checkpoint_delay;
uint32_t c_timing_stress_hs_search;
uint32_t c_timing_stress_hs_sweep;
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index e27f54a42a9..d9f0c81a34c 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -264,6 +264,14 @@ create_database(const char *home, WT_CONNECTION **connp)
CONFIG_APPEND(p, ",checkpoint_slow");
if (g.c_timing_stress_checkpoint_prepare)
CONFIG_APPEND(p, ",prepare_checkpoint_delay");
+ if (g.c_timing_stress_checkpoint_reserved_txnid_delay)
+ CONFIG_APPEND(p, ",checkpoint_reserved_txnid_delay");
+ if (g.c_timing_stress_failpoint_hs_delete_key_from_ts)
+ CONFIG_APPEND(p, ",failpoint_history_store_delete_key_from_ts");
+ if (g.c_timing_stress_failpoint_hs_insert_1)
+ CONFIG_APPEND(p, ",failpoint_history_store_insert_1");
+ if (g.c_timing_stress_failpoint_hs_insert_2)
+ CONFIG_APPEND(p, ",failpoint_history_store_insert_2");
if (g.c_timing_stress_hs_checkpoint_delay)
CONFIG_APPEND(p, ",history_store_checkpoint_delay");
if (g.c_timing_stress_hs_search)
@@ -478,6 +486,47 @@ wts_open(const char *home, WT_CONNECTION **connp, WT_SESSION **sessionp, bool al
if (enc != NULL)
CONFIG_APPEND(p, ",encryption=(name=%s)", enc);
+ /*
+ * Timing stress options aren't persisted in the base config and need to be added to the
+ * configuration for re-open.
+ */
+ CONFIG_APPEND(p, ",timing_stress_for_test=[");
+ if (g.c_timing_stress_aggressive_sweep)
+ CONFIG_APPEND(p, ",aggressive_sweep");
+ if (g.c_timing_stress_checkpoint)
+ CONFIG_APPEND(p, ",checkpoint_slow");
+ if (g.c_timing_stress_checkpoint_prepare)
+ CONFIG_APPEND(p, ",prepare_checkpoint_delay");
+ if (g.c_timing_stress_failpoint_hs_delete_key_from_ts)
+ CONFIG_APPEND(p, ",failpoint_history_store_delete_key_from_ts");
+ if (g.c_timing_stress_failpoint_hs_insert_1)
+ CONFIG_APPEND(p, ",failpoint_history_store_insert_1");
+ if (g.c_timing_stress_failpoint_hs_insert_2)
+ CONFIG_APPEND(p, ",failpoint_history_store_insert_2");
+ if (g.c_timing_stress_hs_checkpoint_delay)
+ CONFIG_APPEND(p, ",history_store_checkpoint_delay");
+ if (g.c_timing_stress_hs_search)
+ CONFIG_APPEND(p, ",history_store_search");
+ if (g.c_timing_stress_hs_sweep)
+ CONFIG_APPEND(p, ",history_store_sweep_race");
+ if (g.c_timing_stress_split_1)
+ CONFIG_APPEND(p, ",split_1");
+ if (g.c_timing_stress_split_2)
+ CONFIG_APPEND(p, ",split_2");
+ if (g.c_timing_stress_split_3)
+ CONFIG_APPEND(p, ",split_3");
+ if (g.c_timing_stress_split_4)
+ CONFIG_APPEND(p, ",split_4");
+ if (g.c_timing_stress_split_5)
+ CONFIG_APPEND(p, ",split_5");
+ if (g.c_timing_stress_split_6)
+ CONFIG_APPEND(p, ",split_6");
+ if (g.c_timing_stress_split_7)
+ CONFIG_APPEND(p, ",split_7");
+ if (g.c_timing_stress_split_8)
+ CONFIG_APPEND(p, ",split_8");
+ CONFIG_APPEND(p, "]");
+
/* If in-memory, there's only a single, shared WT_CONNECTION handle. */
if (g.c_in_memory != 0)
conn = g.wts_conn_inmemory;
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py
new file mode 100755
index 00000000000..d9cc9b9a5fa
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable26.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import threading, time
+from helper import simulate_crash_restart
+from test_rollback_to_stable01 import test_rollback_to_stable_base
+from wiredtiger import stat, WT_NOTFOUND
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+from wtthread import checkpoint_thread
+
+# test_rollback_to_stable26.py
+# Test the rollback to stable does properly restore the prepare rollback entry
+# from the history store.
+class test_rollback_to_stable26(test_rollback_to_stable_base):
+ session_config = 'isolation=snapshot'
+
+ key_format_values = [
+ ('column', dict(key_format='r')),
+ ('integer_row', dict(key_format='i')),
+ ]
+
+ hs_remove_values = [
+ ('no_hs_remove', dict(hs_remove=False)),
+ ('hs_remove', dict(hs_remove=True))
+ ]
+
+ prepare_remove_values = [
+ ('no_prepare_remove', dict(prepare_remove=False)),
+ ('prepare_remove', dict(prepare_remove=True))
+ ]
+
+ scenarios = make_scenarios(key_format_values, hs_remove_values, prepare_remove_values)
+
+ def conn_config(self):
+ config = 'cache_size=10MB,statistics=(all),timing_stress_for_test=[history_store_checkpoint_delay]'
+ return config
+
+ def evict_cursor(self, uri, nrows):
+ # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used.
+ evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)")
+ self.session.begin_transaction("ignore_prepare=true")
+ for i in range (1, nrows + 1):
+ evict_cursor.set_key(i)
+ evict_cursor.search()
+ evict_cursor.reset()
+ evict_cursor.close()
+ self.session.rollback_transaction()
+
+ def test_rollback_to_stable(self):
+ nrows = 10
+
+ # Create a table without logging.
+ uri = "table:rollback_to_stable26"
+ ds = SimpleDataSet(
+ self, uri, 0, key_format=self.key_format, value_format="S", config='log=(enabled=false)')
+ ds.populate()
+
+ # Pin oldest and stable to timestamp 10.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) +
+ ',stable_timestamp=' + self.timestamp_str(10))
+
+ value_a = "aaaaa" * 100
+ value_b = "bbbbb" * 100
+ value_c = "ccccc" * 100
+ value_d = "ddddd" * 100
+ value_e = "eeeee" * 100
+
+ self.large_updates(uri, value_a, ds, nrows, False, 20)
+ self.large_updates(uri, value_b, ds, nrows, False, 30)
+
+ if self.hs_remove:
+ self.large_removes(uri, ds, nrows, False, 40)
+
+ prepare_session = self.conn.open_session()
+ prepare_session.begin_transaction()
+ cursor = prepare_session.open_cursor(uri)
+ for i in range (1, nrows + 1):
+ cursor[i] = value_c
+ if self.prepare_remove:
+ cursor.set_key(i)
+ self.assertEqual(cursor.remove(), 0)
+ cursor.close()
+ prepare_session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(50))
+
+ # Verify data is visible and correct.
+ self.check(value_a, uri, nrows, 20)
+ self.check(value_b, uri, nrows, 30)
+
+ self.evict_cursor(uri, nrows)
+
+ # Pin stable to timestamp 40.
+ self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(40))
+
+ # Create a checkpoint thread
+ done = threading.Event()
+ ckpt = checkpoint_thread(self.conn, done)
+ try:
+ ckpt.start()
+ # Sleep for sometime so that checkpoint starts before committing last transaction.
+ time.sleep(5)
+ prepare_session.rollback_transaction()
+ finally:
+ done.set()
+ ckpt.join()
+
+ self.large_updates(uri, value_d, ds, nrows, False, 60)
+
+ # Check that the correct data.
+ self.check(value_a, uri, nrows, 20)
+ self.check(value_b, uri, nrows, 30)
+ self.check(value_d, uri, nrows, 60)
+
+ # Simulate a server crash and restart.
+ simulate_crash_restart(self, ".", "RESTART")
+
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2]
+ hs_restore_updates = stat_cursor[stat.conn.txn_rts_hs_restore_updates][2]
+ keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2]
+ stat_cursor.close()
+
+ self.assertEqual(keys_removed, 0)
+ self.assertEqual(hs_restore_updates, nrows)
+ self.assertEqual(hs_removed, nrows)
+
+ # Check that the correct data.
+ self.check(value_a, uri, nrows, 20)
+ self.check(value_b, uri, nrows, 30)
+
+ self.large_updates(uri, value_e, ds, nrows, False, 60)
+
+ self.evict_cursor(uri, nrows)
+
+ # Check that the correct data.
+ self.check(value_a, uri, nrows, 20)
+ self.check(value_b, uri, nrows, 30)
+ self.check(value_e, uri, nrows, 60)
+
+if __name__ == '__main__':
+ wttest.run()