summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-06-18 17:04:28 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-06-18 07:16:47 +0000
commit0c9d5beb318953a4b41ddd615a2be07cc3e08e84 (patch)
tree0ea4f859c3771df0158a29ad3dd1a3ced5d8e9c7
parentc7108af024e67563eb32a312eac7fb5d5bd9009f (diff)
downloadmongo-0c9d5beb318953a4b41ddd615a2be07cc3e08e84.tar.gz
Import wiredtiger: 3998a1f701bfc67afeceeef68624fbeb58daa468 from branch mongodb-4.4
ref: 5faf7b26eb..3998a1f701 for: 4.4.0-rc11 WT-6417 Fix not restoring tombstone in rollback to stable WT-6434 Configure tests to avoid rollback due to cache pressure WT-6435 Disable dirty eviction in some tests sensitive to stats
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in97
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c12
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c107
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c18
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs05.py6
-rw-r--r--src/third_party/wiredtiger/test/suite/test_rollback_to_stable13.py127
-rw-r--r--src/third_party/wiredtiger/test/suite/test_stat05.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn13.py2
11 files changed, 278 insertions, 101 deletions
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index c1406f22f95..d9f476d14bc 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -652,6 +652,8 @@ connection_stats = [
TxnStat('txn_rollback', 'transactions rolled back'),
TxnStat('txn_rts', 'rollback to stable calls'),
TxnStat('txn_rts_hs_removed', 'rollback to stable updates removed from history store'),
+ TxnStat('txn_rts_hs_restore_tombstones', 'rollback to stable restored tombstones from history store'),
+ TxnStat('txn_rts_hs_stop_older_than_newer_start', 'rollback to stable hs records with stop timestamps older than newer records'),
TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'),
TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'),
TxnStat('txn_rts_pages_visited', 'rollback to stable pages visited'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index a6a732aab81..eb8474c7ca6 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "5faf7b26eb9d311b8a7575a16c757078772eb02d"
+ "commit": "3998a1f701bfc67afeceeef68624fbeb58daa468"
}
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index eb90627015d..a158e26e0fc 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -712,9 +712,11 @@ struct __wt_connection_stats {
int64_t txn_read_queue_inserts;
int64_t txn_read_queue_len;
int64_t txn_rts;
+ int64_t txn_rts_hs_stop_older_than_newer_start;
int64_t txn_rts_keys_removed;
int64_t txn_rts_keys_restored;
int64_t txn_rts_pages_visited;
+ int64_t txn_rts_hs_restore_tombstones;
int64_t txn_rts_skip_interal_pages_walk;
int64_t txn_rts_sweep_hs_keys;
int64_t txn_rts_upd_aborted;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 26ed8a8cd9c..24325bba4bc 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -5904,114 +5904,121 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1415
/*! transaction: rollback to stable calls */
#define WT_STAT_CONN_TXN_RTS 1416
+/*!
+ * transaction: rollback to stable hs records with stop timestamps older
+ * than newer records
+ */
+#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1417
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1417
+#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1418
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1418
+#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1419
/*! transaction: rollback to stable pages visited */
-#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1419
+#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1420
+/*! transaction: rollback to stable restored tombstones from history store */
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1421
/*! transaction: rollback to stable skipping internal pages tree walk */
-#define WT_STAT_CONN_TXN_RTS_SKIP_INTERAL_PAGES_WALK 1420
+#define WT_STAT_CONN_TXN_RTS_SKIP_INTERAL_PAGES_WALK 1422
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1421
+#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1423
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1422
+#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1424
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1423
+#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1425
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1424
+#define WT_STAT_CONN_TXN_SET_TS 1426
/*! transaction: set timestamp durable calls */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1425
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1427
/*! transaction: set timestamp durable updates */
-#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1426
+#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1428
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1427
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1429
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1428
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1430
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1429
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1431
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1430
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1432
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1431
+#define WT_STAT_CONN_TXN_BEGIN 1433
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1432
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1434
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1433
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1435
/*!
* transaction: transaction checkpoint history store file duration
* (usecs)
*/
-#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1434
+#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1436
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1435
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1437
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1436
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1438
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1437
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1439
/*! transaction: transaction checkpoint prepare currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1438
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1440
/*! transaction: transaction checkpoint prepare max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1439
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1441
/*! transaction: transaction checkpoint prepare min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1440
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1442
/*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1441
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1443
/*! transaction: transaction checkpoint prepare total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1442
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1444
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1443
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1445
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1444
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1446
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1445
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1447
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1446
+#define WT_STAT_CONN_TXN_CHECKPOINT 1448
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1447
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1449
/*! transaction: transaction failures due to history store */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1448
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1450
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1449
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1451
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1450
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1452
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1451
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1453
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1452
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1454
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1453
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1455
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1454
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1456
/*!
* transaction: transaction range of timestamps pinned by the oldest
* active read timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1455
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1457
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1456
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1458
/*! transaction: transaction read timestamp of the oldest active reader */
-#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1457
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1459
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1458
+#define WT_STAT_CONN_TXN_SYNC 1460
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1459
+#define WT_STAT_CONN_TXN_COMMIT 1461
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1460
+#define WT_STAT_CONN_TXN_ROLLBACK 1462
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1461
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1463
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index f94d20c7875..92f0e13f268 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1051,8 +1051,11 @@ static const char *const __stats_connection_desc[] = {
"transaction: read timestamp queue insert to empty",
"transaction: read timestamp queue inserts to head",
"transaction: read timestamp queue inserts total", "transaction: read timestamp queue length",
- "transaction: rollback to stable calls", "transaction: rollback to stable keys removed",
- "transaction: rollback to stable keys restored", "transaction: rollback to stable pages visited",
+ "transaction: rollback to stable calls",
+ "transaction: rollback to stable hs records with stop timestamps older than newer records",
+ "transaction: rollback to stable keys removed", "transaction: rollback to stable keys restored",
+ "transaction: rollback to stable pages visited",
+ "transaction: rollback to stable restored tombstones from history store",
"transaction: rollback to stable skipping internal pages tree walk",
"transaction: rollback to stable sweeping history store keys",
"transaction: rollback to stable updates aborted",
@@ -1546,9 +1549,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_read_queue_inserts = 0;
stats->txn_read_queue_len = 0;
stats->txn_rts = 0;
+ stats->txn_rts_hs_stop_older_than_newer_start = 0;
stats->txn_rts_keys_removed = 0;
stats->txn_rts_keys_restored = 0;
stats->txn_rts_pages_visited = 0;
+ stats->txn_rts_hs_restore_tombstones = 0;
stats->txn_rts_skip_interal_pages_walk = 0;
stats->txn_rts_sweep_hs_keys = 0;
stats->txn_rts_upd_aborted = 0;
@@ -2051,9 +2056,12 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_read_queue_inserts += WT_STAT_READ(from, txn_read_queue_inserts);
to->txn_read_queue_len += WT_STAT_READ(from, txn_read_queue_len);
to->txn_rts += WT_STAT_READ(from, txn_rts);
+ to->txn_rts_hs_stop_older_than_newer_start +=
+ WT_STAT_READ(from, txn_rts_hs_stop_older_than_newer_start);
to->txn_rts_keys_removed += WT_STAT_READ(from, txn_rts_keys_removed);
to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored);
to->txn_rts_pages_visited += WT_STAT_READ(from, txn_rts_pages_visited);
+ to->txn_rts_hs_restore_tombstones += WT_STAT_READ(from, txn_rts_hs_restore_tombstones);
to->txn_rts_skip_interal_pages_walk += WT_STAT_READ(from, txn_rts_skip_interal_pages_walk);
to->txn_rts_sweep_hs_keys += WT_STAT_READ(from, txn_rts_sweep_hs_keys);
to->txn_rts_upd_aborted += WT_STAT_READ(from, txn_rts_upd_aborted);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 13c3725659d..7b89d4f21d3 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -152,27 +152,27 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_DECL_ITEM(key);
WT_DECL_RET;
WT_ITEM full_value;
- WT_UPDATE *hs_upd, *upd;
- wt_timestamp_t durable_ts, hs_start_ts, hs_stop_ts;
-#ifdef HAVE_DIAGNOSTIC
- wt_timestamp_t newer_hs_ts;
-#endif
+ WT_UPDATE *hs_upd, *tombstone, *upd;
+ wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts;
uint64_t hs_counter, type_full;
uint32_t hs_btree_id, session_flags;
uint8_t type;
int cmp;
char ts_string[4][WT_TS_INT_STRING_SIZE];
bool is_owner, valid_update_found;
-
- hs_cursor = NULL;
- hs_upd = upd = NULL;
- durable_ts = hs_start_ts = WT_TS_NONE;
#ifdef HAVE_DIAGNOSTIC
- newer_hs_ts = WT_TS_NONE;
+ bool first_record;
#endif
+
+ hs_cursor = NULL;
+ hs_upd = tombstone = upd = NULL;
+ hs_durable_ts = hs_start_ts = hs_stop_durable_ts = WT_TS_NONE;
hs_btree_id = S2BT(session)->id;
session_flags = 0;
is_owner = valid_update_found = false;
+#ifdef HAVE_DIAGNOSTIC
+ first_record = true;
+#endif
/* Allocate buffers for the data store and history store key. */
WT_RET(__wt_scr_alloc(session, 0, &key));
@@ -183,12 +183,11 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
/* Get the full update value from the data store. */
WT_CLEAR(full_value);
- if (!__wt_row_leaf_value(page, rip, &full_value)) {
- unpack = &_unpack;
- __wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
- WT_ERR(__wt_page_cell_data_ref(session, page, unpack, &full_value));
- }
+ unpack = &_unpack;
+ __wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
+ WT_ERR(__wt_page_cell_data_ref(session, page, unpack, &full_value));
WT_ERR(__wt_buf_set(session, &full_value, full_value.data, full_value.size));
+ newer_hs_durable_ts = unpack->tw.durable_start_ts;
/* Open a history store table cursor. */
WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner));
@@ -228,7 +227,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
cbt->compare = 0;
/* Get current value and convert to full update if it is a modify. */
- WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_ts, &durable_ts, &type_full, hs_value));
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &hs_stop_durable_ts, &hs_durable_ts, &type_full, hs_value));
type = (uint8_t)type_full;
if (type == WT_UPDATE_MODIFY)
WT_ERR(__wt_modify_apply_item(
@@ -240,10 +240,15 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
/*
* Verify the history store timestamps are in order. The start timestamp may be equal to the
- * stop timestamp if the original update's commit timestamp is out of order.
+ * stop timestamp if the original update's commit timestamp is out of order. We may see
+ * records newer than or equal to the onpage value if eviction runs concurrently with
+ * checkpoint. In that case, don't verify the first record.
*/
- WT_ASSERT(session,
- (newer_hs_ts == WT_TS_NONE || hs_stop_ts <= newer_hs_ts || hs_start_ts == hs_stop_ts));
+ WT_ASSERT(session, hs_stop_durable_ts <= newer_hs_durable_ts ||
+ hs_start_ts == hs_stop_durable_ts || first_record);
+
+ if (hs_stop_durable_ts < newer_hs_durable_ts)
+ WT_STAT_CONN_INCR(session, txn_rts_hs_stop_older_than_newer_start);
/*
* Stop processing when we find the newer version value of this key is stable according to
@@ -251,22 +256,22 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* update chain. Also it confirms that history store doesn't contains any newer version than
* the current version for the key.
*/
- if (!replace && hs_stop_ts <= rollback_timestamp) {
+ if (!replace && hs_stop_durable_ts <= rollback_timestamp) {
__wt_verbose(session, WT_VERB_RTS,
"history store update valid with stop timestamp: %s and stable timestamp: %s",
- __wt_timestamp_to_string(hs_stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[0]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[1]));
break;
}
/* Stop processing when we find a stable update according to the given timestamp. */
- if (durable_ts <= rollback_timestamp) {
+ if (hs_durable_ts <= rollback_timestamp) {
__wt_verbose(session, WT_VERB_RTS,
"history store update valid with start timestamp: %s, durable timestamp: %s, "
"stop timestamp: %s and stable timestamp: %s",
__wt_timestamp_to_string(hs_start_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_ts, ts_string[1]),
- __wt_timestamp_to_string(hs_stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
+ __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[3]));
valid_update_found = true;
break;
@@ -276,21 +281,23 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
"history store update aborted with start timestamp: %s, durable timestamp: %s, stop "
"timestamp: %s and stable timestamp: %s",
__wt_timestamp_to_string(hs_start_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_ts, ts_string[1]),
- __wt_timestamp_to_string(hs_stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
+ __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[3]));
-#ifdef HAVE_DIAGNOSTIC
/*
- * Durable timestamp of the current record is used as stop timestamp of previous record.
- * Save it to verify against previous record.
+ * Start time point of the current record may be used as stop time point of the previous
+ * record. Save it to verify against the previous record and check if we need to append the
+ * stop time point as a tombstone when we rollback the history store record.
*/
- newer_hs_ts = durable_ts;
+ newer_hs_durable_ts = hs_durable_ts;
+#ifdef HAVE_DIAGNOSTIC
+ first_record = false;
#endif
+
WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
WT_ERR(__wt_hs_modify(cbt, hs_upd));
WT_STAT_CONN_INCR(session, txn_rts_hs_removed);
- hs_upd = NULL;
}
if (replace) {
@@ -301,9 +308,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
if (valid_update_found) {
WT_ERR(__wt_upd_alloc(session, &full_value, WT_UPDATE_STANDARD, &upd, NULL));
- upd->txnid = WT_TXN_NONE;
- upd->durable_ts = durable_ts;
- upd->start_ts = hs_start_ts;
+ upd->txnid = cbt->upd_value->tw.start_txn;
+ upd->durable_ts = cbt->upd_value->tw.durable_start_ts;
+ upd->start_ts = cbt->upd_value->tw.start_ts;
__wt_verbose(session, WT_VERB_RTS, "update restored from history store (txnid: %" PRIu64
", start_ts: %s, durable_ts: %s",
upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
@@ -314,6 +321,28 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* the rollback to stable operation.
*/
F_SET(upd, WT_UPDATE_RESTORED_FROM_HS);
+
+ /*
+ * We have a tombstone on the original update chain and it is behind the stable
+ * timestamp, we need to restore that as well.
+ */
+ if (hs_stop_durable_ts <= rollback_timestamp &&
+ hs_stop_durable_ts < newer_hs_durable_ts) {
+ WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, NULL));
+ tombstone->txnid = cbt->upd_value->tw.stop_txn;
+ tombstone->durable_ts = cbt->upd_value->tw.durable_stop_ts;
+ tombstone->start_ts = cbt->upd_value->tw.stop_ts;
+
+ /*
+ * Set the flag to indicate that this update has been restored from history store
+ * for the rollback to stable operation.
+ */
+ F_SET(tombstone, WT_UPDATE_RESTORED_FROM_HS);
+
+ tombstone->next = upd;
+ upd = tombstone;
+ WT_STAT_CONN_INCR(session, txn_rts_hs_restore_tombstones);
+ }
} else {
WT_ERR(__wt_upd_alloc_tombstone(session, &upd, NULL));
WT_STAT_CONN_INCR(session, txn_rts_keys_removed);
@@ -321,7 +350,6 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
}
WT_ERR(__rollback_row_add_update(session, page, rip, upd));
- upd = NULL;
}
/* Finally remove that update from history store. */
@@ -329,18 +357,19 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
WT_ERR(__wt_hs_modify(cbt, hs_upd));
WT_STAT_CONN_INCR(session, txn_rts_hs_removed);
- hs_upd = NULL;
}
+ if (0) {
err:
+ WT_ASSERT(session, tombstone == NULL || upd == tombstone);
+ __wt_free_update_list(session, &upd);
+ __wt_free_update_list(session, &hs_upd);
+ }
__wt_scr_free(session, &key);
__wt_scr_free(session, &hs_key);
__wt_scr_free(session, &hs_value);
__wt_buf_free(session, &full_value);
- __wt_free(session, hs_upd);
- __wt_free(session, upd);
WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner));
-
return (ret);
}
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 89c141290a9..b565282dad1 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -82,15 +82,15 @@ static bool compat, inmem, use_ts;
static volatile uint64_t global_ts = 1;
#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
-#define ENV_CONFIG_DEF \
- "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
- "debug_mode=(table_logging=true,checkpoint_retention=5)," \
- "statistics=(fast),statistics_log=(wait=1,json=true),session_max=%d"
-#define ENV_CONFIG_TXNSYNC \
- "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
- "debug_mode=(table_logging=true,checkpoint_retention=5)," \
- "statistics=(fast),statistics_log=(wait=1,json=true)," \
- "transaction_sync=(enabled,method=none),session_max=%d"
+#define ENV_CONFIG_DEF \
+ "cache_size=20M,create," \
+ "debug_mode=(table_logging=true,checkpoint_retention=5)," \
+ "eviction_dirty_trigger=100," \
+ "log=(archive=true,file_max=10M,enabled),session_max=%d," \
+ "statistics=(fast),statistics_log=(wait=1,json=true),"
+#define ENV_CONFIG_TXNSYNC \
+ ENV_CONFIG_DEF \
+ "transaction_sync=(enabled,method=none)"
#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
typedef struct {
diff --git a/src/third_party/wiredtiger/test/suite/test_hs05.py b/src/third_party/wiredtiger/test/suite/test_hs05.py
index 17c87109efd..f2d93a40547 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs05.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs05.py
@@ -38,8 +38,10 @@ def timestamp_str(t):
# Verify hs_score reflects cache pressure due to history
# even if we're not yet actively pushing into the history store file.
class test_hs05(wttest.WiredTigerTestCase):
- # Force a small cache.
- conn_config = 'cache_size=50MB,statistics=(fast)'
+ # Force a small cache, but disable eviction of dirty pages until the cache is full.
+ conn_config = 'cache_size=50MB,statistics=(fast),'
+ conn_config += 'eviction_dirty_target=100,eviction_dirty_trigger=100,'
+ conn_config += 'eviction_updates_target=100,eviction_updates_trigger=100'
session_config = 'isolation=snapshot'
stable = 1
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable13.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable13.py
new file mode 100644
index 00000000000..b5c22889f6a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable13.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, shutil, time
+from helper import copy_wiredtiger_home
+from test_rollback_to_stable01 import test_rollback_to_stable_base
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_rollback_to_stable13.py
+# Test the rollback to stable should roll back the tombstone in the history store.
+class test_rollback_to_stable13(test_rollback_to_stable_base):
+ session_config = 'isolation=snapshot'
+
+ prepare_values = [
+ ('no_prepare', dict(prepare=False)),
+ ('prepare', dict(prepare=True))
+ ]
+
+ scenarios = make_scenarios(prepare_values)
+
+ def conn_config(self):
+ config = 'cache_size=500MB,statistics=(all),log=(enabled=true)'
+ return config
+
+ def simulate_crash_restart(self, olddir, newdir):
+ ''' Simulate a crash from olddir and restart in newdir. '''
+ # with the connection still open, copy files to new directory
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ for fname in os.listdir(olddir):
+ fullname = os.path.join(olddir, fname)
+ # Skip lock file on Windows since it is locked
+ if os.path.isfile(fullname) and \
+ "WiredTiger.lock" not in fullname and \
+ "Tmplog" not in fullname and \
+ "Preplog" not in fullname:
+ shutil.copy(fullname, newdir)
+ #
+ # close the original connection and open to new directory
+ # NOTE: This really cannot test the difference between the
+ # write-no-sync (off) version of log_flush and the sync
+ # version since we're not crashing the system itself.
+ #
+ self.close_conn()
+ self.conn = self.setUpConnectionOpen(newdir)
+ self.session = self.setUpSessionOpen(self.conn)
+
+ def test_rollback_to_stable(self):
+ nrows = 1000
+
+ # Create a table without logging.
+ uri = "table:rollback_to_stable13"
+ ds = SimpleDataSet(
+ self, uri, 0, key_format="i", value_format="S", config='split_pct=50,log=(enabled=false)')
+ ds.populate()
+
+ # Pin oldest and stable to timestamp 10.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
+ ',stable_timestamp=' + timestamp_str(10))
+
+ value_a = "aaaaa" * 100
+ value_b = "bbbbb" * 100
+
+ # Perform several updates.
+ self.large_updates(uri, value_a, ds, nrows, 20)
+
+ # Perform several removes.
+ self.large_removes(uri, ds, nrows, 30)
+
+ # Perform several updates.
+ self.large_updates(uri, value_b, ds, nrows, 60)
+
+ # Verify data is visible and correct.
+ self.check(value_a, uri, nrows, 20)
+ self.check(None, uri, 0, 30)
+ self.check(value_b, uri, nrows, 60)
+
+ # Pin stable to timestamp 50 if prepare otherwise 40.
+ if self.prepare:
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(50))
+ else:
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(40))
+
+ self.session.checkpoint()
+
+ # Simulate a server crash and restart.
+ self.simulate_crash_restart(".", "RESTART")
+
+ # Check that the correct data is seen at and after the stable timestamp.
+ self.check(None, uri, 0, 50)
+
+ # Check that we restore the correct value from the history store.
+ self.check(value_a, uri, nrows, 20)
+
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ restored_tombstones = stat_cursor[stat.conn.txn_rts_hs_restore_tombstones][2]
+ self.assertEqual(restored_tombstones, nrows)
diff --git a/src/third_party/wiredtiger/test/suite/test_stat05.py b/src/third_party/wiredtiger/test/suite/test_stat05.py
index dd1b94b543a..235f6236a1d 100644
--- a/src/third_party/wiredtiger/test/suite/test_stat05.py
+++ b/src/third_party/wiredtiger/test/suite/test_stat05.py
@@ -45,12 +45,12 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
conn_config = 'in_memory,statistics=(fast)')),
('table-lsm', dict(uri='table:' + pfx, dataset=SimpleDataSet,
cfg='lsm=(chunk_size=1MB,merge_min=2)',
- conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
+ conn_config = 'statistics=(fast)')),
('complex', dict(uri='table:' + pfx, dataset=ComplexDataSet, cfg='')),
('complex-lsm',
dict(uri='table:' + pfx, dataset=ComplexLSMDataSet,
cfg='lsm=(chunk_size=1MB,merge_min=2)',
- conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
+ conn_config = 'statistics=(fast)')),
]
scenarios = make_scenarios(uri)
diff --git a/src/third_party/wiredtiger/test/suite/test_txn13.py b/src/third_party/wiredtiger/test/suite/test_txn13.py
index f2b1849333a..541017804c9 100644
--- a/src/third_party/wiredtiger/test/suite/test_txn13.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn13.py
@@ -52,7 +52,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess):
# Turn on logging for this test.
def conn_config(self):
return 'log=(archive=false,enabled,file_max=%s)' % self.logmax + \
- ',cache_size=20G'
+ ',cache_size=20G,eviction_dirty_trigger=100'
@wttest.longtest('txn tests with huge values')
def test_large_values(self):