summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/txn
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-09-09 11:37:44 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-09-09 01:52:32 +0000
commit0f0eb111c9dbd5dd4ca86529aee81bad59c6579b (patch)
tree94f3efa909755fcc8b381bf68214ebb9bb326964 /src/third_party/wiredtiger/src/txn
parent103b545528e7cf05537bd68bda4bc8235bf6fefa (diff)
downloadmongo-0f0eb111c9dbd5dd4ca86529aee81bad59c6579b.tar.gz
Import wiredtiger: 6a7db4f96fe828fdb1b9a31d83460c0573a2c3b1 from branch mongodb-4.4
ref: 579966149b..6a7db4f96f for: 4.4.2 WT-5144 Use wt_clock instead of wt_epoch in perf programs WT-5585 Remove cache_overflow config option WT-5693 Enable test_wt4105_large_doc_small_upd WT-5940 Migrate to Clang Format 10 WT-6000 Enhance incremental backup testing in format to support restart WT-6006 Revert test binaries to 10 in checkpoint-stress-test WT-6027 Fix docs spelling errors and warnings WT-6322 Split full compatibility tests into smaller groups WT-6390 Extend compact02 timeout from 8 => 10 minutes WT-6404 Add timing stress that delays checkpoint after it calls __wt_txn_begin WT-6451 Do not evict clean metadata pages if needed for historic reads WT-6458 read row-store leaf pages with prepared updates in a single pass WT-6463 History store operations should honor cache size WT-6471 Avoid the error message for non-existent clang-format binary WT-6472 Update timestamp_abort test cache configuration WT-6478 Cursor cache statistics not getting incremented WT-6505 Add debugging for missing file failure WT-6507 Exit cache eviction worker after our operation has timed out WT-6526 Fix assertion failure when opening DB in readonly mode after unclean shutdown WT-6532 Consider update structure overhead in split length calculation WT-6542 Add an assert to ensure we are not unintentionally returning empty values WT-6544 Onpage value not appended to the tombstone restored from the data or history store WT-6556 Fix internal sessions to use internal session close function than public API to avoid memory leak WT-6559 Use the session id from the new session to determine statistics bucket WT-6560 Fix usage of global salvage in WT utility WT-6561 Provide MongoDB configuration in the wt utility usage output WT-6569 Squash the prepared updates into a single update before writing it to data store WT-6570 RTS to remove the left over updates in the history store without stop timestamp WT-6571 Lseek cannot use error_sys_check because it does not return an int WT-6577 History store dump outputs confusing time window WT-6578 Prevent reconciliation from looking past the on-disk value WT-6581 Fix class name in test_hs15 WT-6585 Panic if updates that are older than the updates in history store are inserted to history store WT-6586 Tombstone inserted to history store should also be flagged as WT_UPDATE_HS WT-6589 Fix disabled cursor cache python tests WT-6591 Stop checkpoint thread before closing connection in Python tests WT-6593 Retry conflicting operations in test_rollback_to_stable10 WT-6596 Increase cache for timestamp abort test and separate key spaces for all abort tests WT-6598 Add new API allowing changing dhandle hash bucket size WT-6602 Allow operation timeout ms to be passed to commit and rollback WT-6604 Fix typo in the comment descibing WT_CELL structure WT-6610 Fix incremental backup checkpoint parsing to handle upgrades WT-6611 Revert enhancement allowing rename and incremental backup WT-6613 Add python test for early_load flag WT-6615 Initialize last_upd where it is actually used WT-6619 Eliminate possibility of infinite loop in test_cursor13.py WT-6623 Set the connection level file id in recovery file scan WT-6625 Remove outdated TODO WT-6635 Disable mix and column filetype test WT-6640 Coverity: Failure to restore saved dhandle WT-6641 Coverity: Unused value
Diffstat (limited to 'src/third_party/wiredtiger/src/txn')
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c182
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c62
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c15
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c109
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c62
6 files changed, 239 insertions, 193 deletions
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 0c3ff78fa74..014b449c6f7 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -124,8 +124,8 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
txn_global = &S2C(session)->txn_global;
txn_shared = WT_SESSION_TXN_SHARED(session);
- WT_ASSERT(session, txn_shared->pinned_id == WT_TXN_NONE ||
- session->txn->isolation == WT_ISO_READ_UNCOMMITTED ||
+ WT_ASSERT(session,
+ txn_shared->pinned_id == WT_TXN_NONE || session->txn->isolation == WT_ISO_READ_UNCOMMITTED ||
!__wt_txn_visible_all(session, txn_shared->pinned_id, WT_TS_NONE));
txn_shared->metadata_pinned = txn_shared->pinned_id = WT_TXN_NONE;
@@ -406,7 +406,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
if ((oldest_id == prev_oldest_id ||
(!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
((last_running == prev_last_running) ||
- (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) &&
+ (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) &&
metadata_pinned == prev_metadata_pinned)
return (0);
@@ -444,9 +444,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && current_id - oldest_id > 10000 &&
oldest_session != NULL) {
__wt_verbose(session, WT_VERB_TRANSACTION,
- "old snapshot %" PRIu64 " pinned in session %" PRIu32
- " [%s]"
- " with snap_min %" PRIu64,
+ "old snapshot %" PRIu64 " pinned in session %" PRIu32 " [%s] with snap_min %" PRIu64,
oldest_id, oldest_session->id, oldest_session->lastop, oldest_session->txn->snap_min);
}
}
@@ -457,6 +455,33 @@ done:
}
/*
+ * __txn_config_operation_timeout --
+ * Configure a transactions operation timeout duration.
+ */
+static int
+__txn_config_operation_timeout(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONFIG_ITEM cval;
+ WT_TXN *txn;
+
+ txn = session->txn;
+
+ if (cfg == NULL)
+ return (0);
+
+ /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */
+ WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval));
+
+ /*
+ * The default configuration value is 0, we can't tell if they're setting it back to 0 or, if
+ * the default was automatically passed in.
+ */
+ if (cval.val != 0)
+ txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
+ return (0);
+}
+
+/*
* __wt_txn_config --
* Configure a transaction.
*/
@@ -469,6 +494,9 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
txn = session->txn;
+ if (cfg == NULL)
+ return (0);
+
WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
if (cval.len != 0)
txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
@@ -476,9 +504,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED :
WT_ISO_READ_UNCOMMITTED;
- /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */
- WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval));
- txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
+ WT_RET(__txn_config_operation_timeout(session, cfg));
/*
* The default sync setting is inherited from the connection, but can be overridden by an
@@ -723,8 +749,18 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
if (commit)
goto done;
+ /*
+ * Set the flag to indicate that this update has been restored from history store for the
+ * rollback of a prepared transaction.
+ */
+ F_SET(upd, WT_UPDATE_RESTORED_FROM_HS);
total_size += size;
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "update restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s",
+ upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(upd->durable_ts, ts_string[1]));
+
/* If the history store record has a valid stop time point, append it. */
if (hs_stop_durable_ts != WT_TS_MAX) {
WT_ASSERT(session, hs_cbt->upd_value->tw.stop_ts != WT_TS_MAX);
@@ -733,20 +769,20 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
tombstone->start_ts = hs_cbt->upd_value->tw.stop_ts;
tombstone->txnid = hs_cbt->upd_value->tw.stop_txn;
tombstone->next = upd;
+ /*
+ * Set the flag to indicate that this update has been restored from history store for the
+ * rollback of a prepared transaction.
+ */
+ F_SET(tombstone, WT_UPDATE_RESTORED_FROM_HS);
total_size += size;
- } else
- tombstone = upd;
- __wt_verbose(session, WT_VERB_TRANSACTION,
- "update restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s",
- upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->durable_ts, ts_string[1]));
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "tombstone restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s",
+ tombstone->txnid, __wt_timestamp_to_string(tombstone->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tombstone->durable_ts, ts_string[1]));
- /*
- * Set the flag to indicate that this update has been restored from history store for the
- * rollback of a prepared transaction.
- */
- F_SET(upd, WT_UPDATE_RESTORED_FROM_HS);
+ upd = tombstone;
+ }
/* Walk to the end of the chain and we can only have prepared updates on the update chain. */
for (;; chain = chain->next) {
@@ -758,15 +794,15 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
}
/* Append the update to the end of the chain. */
- WT_PUBLISH(chain->next, tombstone);
+ WT_PUBLISH(chain->next, upd);
*upd_appended = true;
__wt_cache_page_inmem_incr(session, page, total_size);
if (0) {
err:
- __wt_free(session, upd);
- __wt_free(session, tombstone);
+ WT_ASSERT(session, tombstone == NULL || upd == tombstone);
+ __wt_free_update_list(session, &upd);
}
done:
__wt_scr_free(session, &hs_key);
@@ -914,13 +950,12 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
WT_TXN *txn;
WT_UPDATE *fix_upd, *tombstone, *upd;
size_t not_used;
- uint32_t hs_btree_id, session_flags;
+ uint32_t hs_btree_id;
bool upd_appended;
hs_cursor = NULL;
txn = session->txn;
fix_upd = tombstone = NULL;
- session_flags = 0;
upd_appended = false;
WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd));
@@ -953,7 +988,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
cbt = (WT_CURSOR_BTREE *)(*cursorp);
hs_btree_id = S2BT(session)->id;
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session, &session_flags));
+ WT_ERR(__wt_hs_cursor_open(session));
hs_cursor = session->hs_cursor;
/*
@@ -976,8 +1011,9 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
* and instead write nothing.
*/
WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, &not_used));
- WT_WITH_BTREE(session, op->btree, ret = __wt_row_modify(cbt, &cbt->iface.key, NULL,
- tombstone, WT_UPDATE_INVALID, false));
+ WT_WITH_BTREE(session, op->btree,
+ ret =
+ __wt_row_modify(cbt, &cbt->iface.key, NULL, tombstone, WT_UPDATE_INVALID, false));
WT_ERR(ret);
tombstone = NULL;
} else
@@ -1044,7 +1080,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
err:
if (hs_cursor != NULL)
- WT_TRET(__wt_hs_cursor_close(session, session_flags));
+ WT_TRET(__wt_hs_cursor_close(session));
if (!upd_appended)
__wt_free(session, fix_upd);
__wt_free(session, tombstone);
@@ -1243,6 +1279,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
+ /* Configure the timeout for this commit operation. */
+ WT_ERR(__txn_config_operation_timeout(session, cfg));
+
/*
* Clear the prepared round up flag if the transaction is not prepared. There is no rounding up
* to do in that case.
@@ -1513,8 +1552,8 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE))
WT_RET_ASSERT(session, txn->logrec == NULL, EINVAL,
- "A transaction should not have been assigned a log"
- " record if WT_CONN_LOG_DEBUG mode is not enabled");
+ "A transaction should not have been assigned a log record if WT_CONN_LOG_DEBUG mode is "
+ "not enabled");
/* Set the prepare timestamp. */
WT_RET(__wt_txn_set_timestamp(session, cfg));
@@ -1639,8 +1678,6 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
u_int i;
bool prepare, readonly;
- WT_UNUSED(cfg);
-
cursor = NULL;
txn = session->txn;
prepare = F_ISSET(txn, WT_TXN_PREPARE);
@@ -1652,6 +1689,9 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
if (txn->notify != NULL)
WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0));
+ /* Configure the timeout for this rollback operation. */
+ WT_RET(__txn_config_operation_timeout(session, cfg));
+
/*
* Resolving prepared updates is expensive. Sort prepared modifications so all updates for each
* page within each file are done at the same time.
@@ -1757,7 +1797,8 @@ __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
txn->snapshot = txn->__snapshot;
txn->id = WT_TXN_NONE;
- WT_ASSERT(session, S2C(session_ret)->txn_global.txn_shared_list == NULL ||
+ WT_ASSERT(session,
+ S2C(session_ret)->txn_global.txn_shared_list == NULL ||
WT_SESSION_TXN_SHARED(session_ret)->pinned_id == WT_TXN_NONE);
/*
@@ -1950,12 +1991,11 @@ __wt_txn_activity_drain(WT_SESSION_IMPL *session)
* Shut down the global transaction state.
*/
int
-__wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const char **cfg)
+__wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
{
WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_SESSION *wt_session;
WT_SESSION_IMPL *s;
char ts_string[WT_TS_INT_STRING_SIZE];
const char *ckpt_cfg;
@@ -1975,7 +2015,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha
if (conn->txn_global.has_stable_timestamp)
F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
}
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) {
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY | WT_CONN_PANIC)) {
/*
* Perform rollback to stable to ensure that the stable version is written to disk on a
* clean shutdown.
@@ -1992,7 +2032,6 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha
if (s != NULL) {
const char *checkpoint_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
- wt_session = &s->iface;
WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true));
/*
@@ -2000,7 +2039,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha
*/
WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s), __wt_tree_modify_set(s));
- WT_TRET(wt_session->close(wt_session, config));
+ WT_TRET(__wt_session_close_internal(s));
}
}
@@ -2051,8 +2090,9 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
* a transaction, we need to have considered splitting the page in the case that its updates are
* on a single page.
*/
- if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
- F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
+ if (conservative &&
+ (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
+ F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
return (0);
/*
@@ -2098,28 +2138,29 @@ __wt_verbose_dump_txn_one(
* Dump the information of the passed transaction into a buffer, to be logged with an optional
* error message.
*/
- WT_RET(__wt_snprintf(buf,
- sizeof(buf), "transaction id: %" PRIu64 ", mod count: %u"
- ", snap min: %" PRIu64 ", snap max: %" PRIu64 ", snapshot count: %u"
- ", commit_timestamp: %s"
- ", durable_timestamp: %s"
- ", first_commit_timestamp: %s"
- ", prepare_timestamp: %s"
- ", pinned_durable_timestamp: %s"
- ", read_timestamp: %s"
- ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]"
- ", full checkpoint: %s"
- ", rollback reason: %s"
- ", flags: 0x%08" PRIx32 ", isolation: %s",
- txn->id, txn->mod_count, txn->snap_min, txn->snap_max, txn->snapshot_count,
- __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]),
- __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]),
- __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]),
- __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]),
- __wt_timestamp_to_string(txn_shared->pinned_durable_timestamp, ts_string[4]),
- __wt_timestamp_to_string(txn_shared->read_timestamp, ts_string[5]), txn->ckpt_lsn.l.file,
- txn->ckpt_lsn.l.offset, txn->full_ckpt ? "true" : "false",
- txn->rollback_reason == NULL ? "" : txn->rollback_reason, txn->flags, iso_tag));
+ WT_RET(
+ __wt_snprintf(buf, sizeof(buf),
+ "transaction id: %" PRIu64 ", mod count: %u"
+ ", snap min: %" PRIu64 ", snap max: %" PRIu64 ", snapshot count: %u"
+ ", commit_timestamp: %s"
+ ", durable_timestamp: %s"
+ ", first_commit_timestamp: %s"
+ ", prepare_timestamp: %s"
+ ", pinned_durable_timestamp: %s"
+ ", read_timestamp: %s"
+ ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]"
+ ", full checkpoint: %s"
+ ", rollback reason: %s"
+ ", flags: 0x%08" PRIx32 ", isolation: %s",
+ txn->id, txn->mod_count, txn->snap_min, txn->snap_max, txn->snapshot_count,
+ __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]),
+ __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]),
+ __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]),
+ __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]),
+ __wt_timestamp_to_string(txn_shared->pinned_durable_timestamp, ts_string[4]),
+ __wt_timestamp_to_string(txn_shared->read_timestamp, ts_string[5]), txn->ckpt_lsn.l.file,
+ txn->ckpt_lsn.l.offset, txn->full_ckpt ? "true" : "false",
+ txn->rollback_reason == NULL ? "" : txn->rollback_reason, txn->flags, iso_tag));
/*
* Log a message and return an error if error code and an optional error string has been passed.
@@ -2258,13 +2299,14 @@ __wt_verbose_dump_update(WT_SESSION_IMPL *session, WT_UPDATE *upd)
break;
}
- __wt_errx(session, "transaction id: %" PRIu64
- ", commit timestamp: %s"
- ", durable timestamp: %s"
- ", has next: %s"
- ", size: %" PRIu32
- ", type: %s"
- ", prepare state: %s",
+ __wt_errx(session,
+ "transaction id: %" PRIu64
+ ", commit timestamp: %s"
+ ", durable timestamp: %s"
+ ", has next: %s"
+ ", size: %" PRIu32
+ ", type: %s"
+ ", prepare state: %s",
upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
__wt_timestamp_to_string(upd->durable_ts, ts_string[1]), upd->next == NULL ? "no" : "yes",
upd->size, upd_type, prepare_state);
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 9b3d1bb51fb..59a10c36733 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -8,7 +8,7 @@
#include "wt_internal.h"
-static void __checkpoint_timing_stress(WT_SESSION_IMPL *, bool);
+static void __checkpoint_timing_stress(WT_SESSION_IMPL *, uint64_t, struct timespec *);
static int __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *, bool, bool, bool, const char *[]);
static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool);
static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]);
@@ -137,9 +137,7 @@ __checkpoint_apply_operation(
}
if (v.len != 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid checkpoint target %.*s: URIs may require "
- "quoting",
+ WT_ERR_MSG(session, EINVAL, "invalid checkpoint target %.*s: URIs may require quoting",
(int)cval.len, (char *)cval.str);
/* Some objects don't support named checkpoints. */
@@ -517,6 +515,7 @@ __checkpoint_fail_reset(WT_SESSION_IMPL *session)
static int
__checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[])
{
+ struct timespec tsp;
WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -546,6 +545,10 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
__wt_epoch(session, &conn->ckpt_prep_start);
WT_RET(__wt_txn_begin(session, txn_cfg));
+ /* Wait 1000 microseconds to simulate slowdown in checkpoint prepare. */
+ tsp.tv_sec = 0;
+ tsp.tv_nsec = WT_MILLION;
+ __checkpoint_timing_stress(session, WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY, &tsp);
original_snap_min = session->txn->snap_min;
WT_DIAGNOSTIC_YIELD;
@@ -585,7 +588,8 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
/*
* Sanity check that the oldest ID hasn't moved on before we have cleared our entry.
*/
- WT_ASSERT(session, WT_TXNID_LE(txn_global->oldest_id, txn_shared->id) &&
+ WT_ASSERT(session,
+ WT_TXNID_LE(txn_global->oldest_id, txn_shared->id) &&
WT_TXNID_LE(txn_global->oldest_id, txn_shared->pinned_id));
/*
@@ -748,6 +752,7 @@ __txn_checkpoint_can_skip(
static int
__txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
+ struct timespec tsp;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *hs_dhandle;
@@ -870,11 +875,14 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(session, full, WT_TXN_LOG_CKPT_START, NULL));
- __checkpoint_timing_stress(session, false);
+ /* Add a ten second wait to simulate checkpoint slowness. */
+ tsp.tv_sec = 10;
+ tsp.tv_nsec = 0;
+ __checkpoint_timing_stress(session, WT_TIMING_STRESS_CHECKPOINT_SLOW, &tsp);
WT_ERR(__checkpoint_apply_to_dhandles(session, cfg, __checkpoint_tree_helper));
/* Wait prior to checkpointing the history store to simulate checkpoint slowness. */
- __checkpoint_timing_stress(session, true);
+ __checkpoint_timing_stress(session, WT_TIMING_STRESS_HS_CHECKPOINT_DELAY, &tsp);
/*
* Get a history store dhandle. If the history store file is opened for a special operation this
@@ -1129,11 +1137,8 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting)
*/
#undef WT_CHECKPOINT_SESSION_FLAGS
#define WT_CHECKPOINT_SESSION_FLAGS (WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE)
-#undef WT_CHECKPOINT_SESSION_FLAGS_OFF
-#define WT_CHECKPOINT_SESSION_FLAGS_OFF (WT_SESSION_HS_CURSOR)
- orig_flags = F_MASK(session, WT_CHECKPOINT_SESSION_FLAGS | WT_CHECKPOINT_SESSION_FLAGS_OFF);
+ orig_flags = F_MASK(session, WT_CHECKPOINT_SESSION_FLAGS);
F_SET(session, WT_CHECKPOINT_SESSION_FLAGS);
- F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS_OFF);
/*
* Only one checkpoint can be active at a time, and checkpoints must run in the same order as
@@ -1274,9 +1279,8 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b
continue;
}
WT_RET_MSG(session, EBUSY,
- "checkpoint %s blocked by hot backup: it would "
- "delete an existing named checkpoint, and such "
- "checkpoints cannot be deleted during a hot backup",
+ "checkpoint %s blocked by hot backup: it would delete an existing named checkpoint, "
+ "and such checkpoints cannot be deleted during a hot backup",
ckpt->name);
}
/*
@@ -1307,8 +1311,9 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b
WT_CKPT_FOREACH (ckptbase, ckpt) {
if (!F_ISSET(ckpt, WT_CKPT_DELETE))
continue;
- WT_ASSERT(session, !WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT) ||
- conn->hot_backup_start == 0 || ckpt->sec > conn->hot_backup_start);
+ WT_ASSERT(session,
+ !WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT) || conn->hot_backup_start == 0 ||
+ ckpt->sec > conn->hot_backup_start);
/*
* We can't delete checkpoints referenced by a cursor. WiredTiger checkpoints are
* uniquely named and it's OK to have multiple in the system: clear the delete flag for
@@ -1445,9 +1450,7 @@ __checkpoint_lock_dirty_tree(
else if (WT_STRING_MATCH("to", k.str, k.len))
__drop_to(ckptbase, v.str, v.len);
else
- WT_ERR_MSG(session, EINVAL,
- "unexpected value for checkpoint "
- "key: %.*s",
+ WT_ERR_MSG(session, EINVAL, "unexpected value for checkpoint key: %.*s",
(int)k.len, k.str);
}
WT_ERR_NOTFOUND_OK(ret, false);
@@ -1536,8 +1539,8 @@ __checkpoint_mark_skip(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force)
name = (ckpt - 1)->name;
if (ckpt > ckptbase + 1 && deleted < 2 &&
(strcmp(name, (ckpt - 2)->name) == 0 ||
- (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
- WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
+ (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
+ WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
F_SET(btree, WT_BTREE_SKIP_CKPT);
/*
* If there are potentially extra checkpoints to delete, we set the timer to recheck
@@ -1887,7 +1890,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
*/
if (btree->modified && !bulk && !__wt_btree_immediately_durable(session) &&
(S2C(session)->txn_global.has_stable_timestamp ||
- (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata)))
+ (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata)))
return (__wt_set_return(session, EBUSY));
/*
@@ -1915,12 +1918,12 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
/*
* __checkpoint_timing_stress --
- * Optionally add a 10 second delay to a checkpoint to simulate a long running checkpoint for
- * debug purposes. The reason for this option is finding operations that can block while waiting
- * for a checkpoint to complete.
+ * Optionally add a delay to a checkpoint to simulate a long running checkpoint for debug
+ * purposes. The reason for this option is finding operations that can block while waiting for a
+ * checkpoint to complete.
*/
static void
-__checkpoint_timing_stress(WT_SESSION_IMPL *session, bool history_store_stress)
+__checkpoint_timing_stress(WT_SESSION_IMPL *session, uint64_t flag, struct timespec *tsp)
{
WT_CONNECTION_IMPL *conn;
@@ -1931,9 +1934,6 @@ __checkpoint_timing_stress(WT_SESSION_IMPL *session, bool history_store_stress)
* the session used is either of the two sessions set aside for internal checkpoints.
*/
if (conn->ckpt_session != session && conn->meta_ckpt_session != session &&
- ((FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_CHECKPOINT_SLOW) &&
- !history_store_stress) ||
- (FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_HS_CHECKPOINT_DELAY) &&
- history_store_stress)))
- __wt_sleep(10, 0);
+ FLD_ISSET(conn->timing_stress_flags, flag))
+ __wt_sleep((uint64_t)tsp->tv_sec, (uint64_t)tsp->tv_nsec / WT_THOUSAND);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index d541d8f48d5..b335d4ca3cb 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -554,7 +554,7 @@ __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_
*/
if (conn->hot_backup_start == 0 &&
(!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
txn->full_ckpt)
__wt_log_ckpt(session, ckpt_lsn);
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index bc60efa5d51..fd230dab529 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -282,8 +282,9 @@ done:
return (0);
err:
- __wt_err(session, ret, "operation apply failed during recovery: operation type %" PRIu32
- " at LSN %" PRIu32 "/%" PRIu32,
+ __wt_err(session, ret,
+ "operation apply failed during recovery: operation type %" PRIu32 " at LSN %" PRIu32
+ "/%" PRIu32,
optype, lsnp->l.file, lsnp->l.offset);
return (ret);
}
@@ -477,9 +478,8 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
if (r->files[fileid].uri != NULL)
WT_RET_PANIC(r->session, WT_PANIC,
- "metadata corruption: files %s and %s have the same "
- "file ID %u",
- uri, r->files[fileid].uri, fileid);
+ "metadata corruption: files %s and %s have the same file ID %u", uri,
+ r->files[fileid].uri, fileid);
WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
WT_RET(__wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
/* If there is no checkpoint logged for the file, apply everything. */
@@ -851,7 +851,8 @@ done:
* written. The rollback to stable operation should only rollback the latest page changes
* solely based on the write generation numbers.
*/
- WT_ASSERT(session, conn->txn_global.has_stable_timestamp == false &&
+ WT_ASSERT(session,
+ conn->txn_global.has_stable_timestamp == false &&
conn->txn_global.stable_timestamp == WT_TS_NONE);
/*
@@ -900,7 +901,7 @@ err:
if (eviction_started)
WT_TRET(__wt_evict_destroy(session));
- WT_TRET(session->iface.close(&session->iface, NULL));
+ WT_TRET(__wt_session_close_internal(session));
F_CLR(conn, WT_CONN_RECOVERING);
return (ret);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 2c97ddf48c7..65ae870b8fe 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -35,7 +35,8 @@ __rollback_abort_newer_update(WT_SESSION_IMPL *session, WT_UPDATE *first_upd,
* is not configured for key consistency check, the timestamps could be out of order
* here.
*/
- WT_ASSERT(session, !FLD_ISSET(S2BT(session)->assert_flags, WT_ASSERT_COMMIT_TS_KEYS) ||
+ WT_ASSERT(session,
+ !FLD_ISSET(S2BT(session)->assert_flags, WT_ASSERT_COMMIT_TS_KEYS) ||
upd == first_upd);
first_upd = upd->next;
@@ -165,7 +166,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_UPDATE *hs_upd, *tombstone, *upd;
wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts;
uint64_t hs_counter, type_full;
- uint32_t hs_btree_id, session_flags;
+ uint32_t hs_btree_id;
uint8_t type;
int cmp;
char ts_string[4][WT_TS_INT_STRING_SIZE];
@@ -178,7 +179,6 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
hs_upd = tombstone = upd = NULL;
hs_durable_ts = hs_start_ts = hs_stop_durable_ts = WT_TS_NONE;
hs_btree_id = S2BT(session)->id;
- session_flags = 0;
WT_CLEAR(full_value);
valid_update_found = false;
#ifdef HAVE_DIAGNOSTIC
@@ -200,7 +200,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
newer_hs_durable_ts = unpack->tw.durable_start_ts;
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session, &session_flags));
+ WT_ERR(__wt_hs_cursor_open(session));
hs_cursor = session->hs_cursor;
cbt = (WT_CURSOR_BTREE *)hs_cursor;
@@ -263,8 +263,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* records newer than or equal to the onpage value if eviction runs concurrently with
* checkpoint. In that case, don't verify the first record.
*/
- WT_ASSERT(session, hs_stop_durable_ts <= newer_hs_durable_ts ||
- hs_start_ts == hs_stop_durable_ts || first_record);
+ WT_ASSERT(session,
+ hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts ||
+ first_record);
if (hs_stop_durable_ts < newer_hs_durable_ts)
WT_STAT_CONN_INCR(session, txn_rts_hs_stop_older_than_newer_start);
@@ -286,8 +287,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
/* Stop processing when we find a stable update according to the given timestamp. */
if (hs_durable_ts <= rollback_timestamp) {
__wt_verbose(session, WT_VERB_RTS,
- "history store update valid with start timestamp: %s, durable timestamp: %s, "
- "stop timestamp: %s and stable timestamp: %s",
+ "history store update valid with start timestamp: %s, durable timestamp: %s, stop "
+ "timestamp: %s and stable timestamp: %s",
__wt_timestamp_to_string(hs_start_ts, ts_string[0]),
__wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
@@ -331,8 +332,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
upd->txnid = cbt->upd_value->tw.start_txn;
upd->durable_ts = cbt->upd_value->tw.durable_start_ts;
upd->start_ts = cbt->upd_value->tw.start_ts;
- __wt_verbose(session, WT_VERB_RTS, "update restored from history store (txnid: %" PRIu64
- ", start_ts: %s, durable_ts: %s",
+ __wt_verbose(session, WT_VERB_RTS,
+ "update restored from history store (txnid: %" PRIu64
+ ", start_ts: %s, durable_ts: %s",
upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
__wt_timestamp_to_string(upd->durable_ts, ts_string[1]));
@@ -395,7 +397,7 @@ err:
__wt_scr_free(session, &hs_value);
__wt_scr_free(session, &key);
__wt_buf_free(session, &full_value);
- WT_TRET(__wt_hs_cursor_close(session, session_flags));
+ WT_TRET(__wt_hs_cursor_close(session));
return (ret);
}
@@ -421,10 +423,15 @@ __rollback_abort_row_ondisk_kv(
__wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
prepared = vpack->tw.prepare;
if (WT_IS_HS(S2BT(session))) {
- if (vpack->tw.durable_stop_ts > rollback_timestamp) {
+ /*
+ * Abort the history store update with stop durable timestamp greater than the stable
+ * timestamp or the updates with max stop timestamp which implies that they are associated
+ * with prepared transactions.
+ */
+ if (vpack->tw.durable_stop_ts > rollback_timestamp || vpack->tw.stop_ts == WT_TS_MAX) {
__wt_verbose(session, WT_VERB_RTS,
- "hs update aborted with start durable/commit timestamp: %s, %s, "
- "stop durable/commit timestamp: %s, %s and stable timestamp: %s",
+ "hs update aborted with start durable/commit timestamp: %s, %s, stop durable/commit "
+ "timestamp: %s, %s and stable timestamp: %s",
__wt_timestamp_to_string(vpack->tw.durable_start_ts, ts_string[0]),
__wt_timestamp_to_string(vpack->tw.start_ts, ts_string[1]),
__wt_timestamp_to_string(vpack->tw.durable_stop_ts, ts_string[2]),
@@ -595,11 +602,11 @@ __rollback_abort_row_reconciled_page(
if (mod->rec_result == WT_PM_REC_REPLACE &&
(mod->mod_replace.ta.newest_start_durable_ts > rollback_timestamp ||
- mod->mod_replace.ta.newest_stop_durable_ts > rollback_timestamp ||
- mod->mod_replace.ta.prepare)) {
+ mod->mod_replace.ta.newest_stop_durable_ts > rollback_timestamp ||
+ mod->mod_replace.ta.prepare)) {
__wt_verbose(session, WT_VERB_RTS,
- "reconciled replace block page history store update removal on-disk with start "
- "durable timestamp: %s, stop durable timestamp: %s and stable timestamp: %s",
+ "reconciled replace block page history store update removal on-disk with start durable "
+ "timestamp: %s, stop durable timestamp: %s and stable timestamp: %s",
__wt_timestamp_to_string(mod->mod_replace.ta.newest_start_durable_ts, ts_string[0]),
__wt_timestamp_to_string(mod->mod_replace.ta.newest_stop_durable_ts, ts_string[1]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[2]));
@@ -622,9 +629,8 @@ __rollback_abort_row_reconciled_page(
multi->addr.ta.newest_stop_durable_ts > rollback_timestamp ||
multi->addr.ta.prepare) {
__wt_verbose(session, WT_VERB_RTS,
- "reconciled multi block page history store update removal on-disk with "
- "start durable timestamp: %s, stop durable timestamp: %s and stable "
- "timestamp: %s",
+ "reconciled multi block page history store update removal on-disk with start "
+ "durable timestamp: %s, stop durable timestamp: %s and stable timestamp: %s",
__wt_timestamp_to_string(multi->addr.ta.newest_start_durable_ts, ts_string[0]),
__wt_timestamp_to_string(multi->addr.ta.newest_stop_durable_ts, ts_string[1]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[2]));
@@ -696,6 +702,24 @@ __rollback_abort_newer_row_leaf(
}
/*
+ * __rollback_get_ref_max_durable_timestamp --
+ * Returns the ref aggregated max durable timestamp. The max durable timestamp is calculated
+ * between both start and stop durable timestamps except for history store, because most of the
+ * history store updates have stop timestamp either greater or equal to the start timestamp
+ * except for the updates written for the prepared updates on the data store. To abort the
+ * updates with no stop timestamp, we must include the newest stop timestamp also into the
+ * calculation of maximum durable timestamp of the history store.
+ */
+static wt_timestamp_t
+__rollback_get_ref_max_durable_timestamp(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta)
+{
+ if (WT_IS_HS(S2BT(session)))
+ return WT_MAX(ta->newest_stop_durable_ts, ta->newest_stop_ts);
+ else
+ return WT_MAX(ta->newest_start_durable_ts, ta->newest_stop_durable_ts);
+}
+
+/*
* __rollback_page_needs_abort --
* Check whether the page needs rollback. Return true if the page has modifications newer than
* the given timestamp Otherwise return false.
@@ -730,16 +754,15 @@ __rollback_page_needs_abort(
*/
if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) {
tag = "reconciled replace block";
- durable_ts = WT_MAX(
- mod->mod_replace.ta.newest_start_durable_ts, mod->mod_replace.ta.newest_stop_durable_ts);
+ durable_ts = __rollback_get_ref_max_durable_timestamp(session, &mod->mod_replace.ta);
prepared = mod->mod_replace.ta.prepare;
result = (durable_ts > rollback_timestamp) || prepared;
} else if (mod != NULL && mod->rec_result == WT_PM_REC_MULTIBLOCK) {
tag = "reconciled multi block";
/* Calculate the max durable timestamp by traversing all multi addresses. */
for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_start_durable_ts);
- durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_stop_durable_ts);
+ durable_ts = WT_MAX(
+ durable_ts, __rollback_get_ref_max_durable_timestamp(session, &multi->addr.ta));
if (multi->addr.ta.prepare)
prepared = true;
}
@@ -748,12 +771,12 @@ __rollback_page_needs_abort(
tag = "on page cell";
/* Check if the page is obsolete using the page disk address. */
__wt_cell_unpack_addr(session, ref->home->dsk, (WT_CELL *)addr, &vpack);
- durable_ts = WT_MAX(vpack.ta.newest_start_durable_ts, vpack.ta.newest_stop_durable_ts);
+ durable_ts = __rollback_get_ref_max_durable_timestamp(session, &vpack.ta);
prepared = vpack.ta.prepare;
result = (durable_ts > rollback_timestamp) || prepared;
} else if (addr != NULL) {
tag = "address";
- durable_ts = WT_MAX(addr->ta.newest_start_durable_ts, addr->ta.newest_stop_durable_ts);
+ durable_ts = __rollback_get_ref_max_durable_timestamp(session, &addr->ta);
prepared = addr->ta.prepare;
result = (durable_ts > rollback_timestamp) || prepared;
}
@@ -987,19 +1010,18 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
WT_UPDATE *hs_upd;
wt_timestamp_t hs_start_ts;
uint64_t hs_counter;
- uint32_t hs_btree_id, session_flags;
+ uint32_t hs_btree_id;
int exact;
char ts_string[WT_TS_INT_STRING_SIZE];
hs_cursor = NULL;
WT_CLEAR(key);
hs_upd = NULL;
- session_flags = 0;
WT_RET(__wt_scr_alloc(session, 0, &hs_key));
/* Open a history store table cursor. */
- WT_ERR(__wt_hs_cursor_open(session, &session_flags));
+ WT_ERR(__wt_hs_cursor_open(session));
hs_cursor = session->hs_cursor;
cbt = (WT_CURSOR_BTREE *)hs_cursor;
@@ -1048,7 +1070,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
err:
__wt_scr_free(session, &hs_key);
__wt_free(session, hs_upd);
- WT_TRET(__wt_hs_cursor_close(session, session_flags));
+ WT_TRET(__wt_hs_cursor_close(session));
return (ret);
}
@@ -1064,7 +1086,7 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll
WT_CONFIG ckptconf;
WT_CONFIG_ITEM cval, durableval, key;
WT_DECL_RET;
- wt_timestamp_t max_durable_ts, newest_start_durable_ts, newest_stop_durable_ts;
+ wt_timestamp_t max_durable_ts, newest_stop_durable_ts, newest_stop_ts;
char *config;
char ts_string[2][WT_TS_INT_STRING_SIZE];
@@ -1072,22 +1094,27 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll
WT_RET(__wt_metadata_search(session, WT_HS_URI, &config));
- /* Find out the max durable timestamp of the object from checkpoint. */
- newest_start_durable_ts = newest_stop_durable_ts = WT_TS_NONE;
+ /*
+ * Find out the max durable timestamp of the history store from checkpoint. Most of the history
+ * store updates have stop timestamp either greater or equal to the start timestamp except for
+ * the updates written for the prepared updates on the data store. To abort the updates with no
+ * stop timestamp, we must include the newest stop timestamp also into the calculation of
+ * maximum timestamp of the history store.
+ */
+ newest_stop_durable_ts = newest_stop_ts = WT_TS_NONE;
WT_ERR(__wt_config_getones(session, config, "checkpoint", &cval));
__wt_config_subinit(session, &ckptconf, &cval);
for (; __wt_config_next(&ckptconf, &key, &cval) == 0;) {
- ret = __wt_config_subgets(session, &cval, "newest_start_durable_ts", &durableval);
- if (ret == 0)
- newest_start_durable_ts =
- WT_MAX(newest_start_durable_ts, (wt_timestamp_t)durableval.val);
- WT_ERR_NOTFOUND_OK(ret, false);
ret = __wt_config_subgets(session, &cval, "newest_stop_durable_ts", &durableval);
if (ret == 0)
newest_stop_durable_ts = WT_MAX(newest_stop_durable_ts, (wt_timestamp_t)durableval.val);
WT_ERR_NOTFOUND_OK(ret, false);
+ ret = __wt_config_subgets(session, &cval, "newest_stop_ts", &durableval);
+ if (ret == 0)
+ newest_stop_ts = WT_MAX(newest_stop_ts, (wt_timestamp_t)durableval.val);
+ WT_ERR_NOTFOUND_OK(ret, false);
}
- max_durable_ts = WT_MAX(newest_start_durable_ts, newest_stop_durable_ts);
+ max_durable_ts = WT_MAX(newest_stop_ts, newest_stop_durable_ts);
WT_ERR(__wt_session_get_dhandle(session, WT_HS_URI, NULL, NULL, 0));
/*
@@ -1299,7 +1326,7 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp
*/
if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && !no_ckpt)
WT_TRET(session->iface.checkpoint(&session->iface, "force=1"));
- WT_TRET(session->iface.close(&session->iface, NULL));
+ WT_TRET(__wt_session_close_internal(session));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index ba311cc93f0..c631b597f4d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -422,8 +422,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_durable && (has_oldest || txn_global->has_oldest_timestamp) && oldest_ts > durable_ts) {
__wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp %s must not be later than "
- "durable timestamp %s",
+ "set_timestamp: oldest timestamp %s must not be later than durable timestamp %s",
__wt_timestamp_to_string(oldest_ts, ts_string[0]),
__wt_timestamp_to_string(durable_ts, ts_string[1]));
}
@@ -431,8 +430,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_durable && (has_stable || txn_global->has_stable_timestamp) && stable_ts > durable_ts) {
__wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
- "set_timestamp: stable timestamp %s must not be later than "
- "durable timestamp %s",
+ "set_timestamp: stable timestamp %s must not be later than durable timestamp %s",
__wt_timestamp_to_string(stable_ts, ts_string[0]),
__wt_timestamp_to_string(durable_ts, ts_string[1]));
}
@@ -444,8 +442,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
(has_stable || txn_global->has_stable_timestamp) && oldest_ts > stable_ts) {
__wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp %s must not be later than "
- "stable timestamp %s",
+ "set_timestamp: oldest timestamp %s must not be later than stable timestamp %s",
__wt_timestamp_to_string(oldest_ts, ts_string[0]),
__wt_timestamp_to_string(stable_ts, ts_string[1]));
}
@@ -537,9 +534,8 @@ __txn_assert_after_reads(
if (tmp_timestamp >= ts) {
__wt_readunlock(session, &txn_global->read_timestamp_rwlock);
WT_RET_MSG(session, EINVAL,
- "%s timestamp %s must be greater than the "
- "latest active read timestamp %s ",
- op, __wt_timestamp_to_string(ts, ts_string[0]),
+ "%s timestamp %s must be greater than the latest active read timestamp %s ", op,
+ __wt_timestamp_to_string(ts, ts_string[0]),
__wt_timestamp_to_string(tmp_timestamp, ts_string[1]));
}
break;
@@ -583,9 +579,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
if (txn->isolation != WT_ISO_SNAPSHOT)
WT_RET_MSG(session, EINVAL,
- "setting a commit_timestamp"
- " requires a transaction running at snapshot"
- " isolation");
+ "setting a commit_timestamp requires a transaction running at snapshot isolation");
/*
* Compare against the oldest and the stable timestamp. Return an error if the given timestamp
@@ -604,16 +598,12 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
* timestamp.
*/
if (has_oldest_ts && commit_ts < oldest_ts)
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the oldest "
- "timestamp %s",
+ WT_RET_MSG(session, EINVAL, "commit timestamp %s is less than the oldest timestamp %s",
__wt_timestamp_to_string(commit_ts, ts_string[0]),
__wt_timestamp_to_string(oldest_ts, ts_string[1]));
if (has_stable_ts && commit_ts < stable_ts)
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the stable "
- "timestamp %s",
+ WT_RET_MSG(session, EINVAL, "commit timestamp %s is less than the stable timestamp %s",
__wt_timestamp_to_string(commit_ts, ts_string[0]),
__wt_timestamp_to_string(stable_ts, ts_string[1]));
@@ -623,8 +613,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
*/
if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && commit_ts < txn->first_commit_timestamp)
WT_RET_MSG(session, EINVAL,
- "commit timestamp %s older than the first "
- "commit timestamp %s for this transaction",
+ "commit timestamp %s older than the first commit timestamp %s for this transaction",
__wt_timestamp_to_string(commit_ts, ts_string[0]),
__wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[1]));
@@ -641,8 +630,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
if (txn->prepare_timestamp > commit_ts) {
if (!F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED))
WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the "
- "prepare timestamp %s for this transaction",
+ "commit timestamp %s is less than the prepare timestamp %s for this transaction",
__wt_timestamp_to_string(commit_ts, ts_string[0]),
__wt_timestamp_to_string(txn->prepare_timestamp, ts_string[1]));
commit_ts = txn->prepare_timestamp;
@@ -691,8 +679,7 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_
if (!F_ISSET(txn, WT_TXN_PREPARE))
WT_RET_MSG(session, EINVAL,
- "durable timestamp should not be specified for "
- "non-prepared transaction");
+ "durable timestamp should not be specified for non-prepared transaction");
if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
WT_RET_MSG(session, EINVAL, "commit timestamp is needed before the durable timestamp");
@@ -725,8 +712,7 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_
/* Check if the durable timestamp is less than the commit timestamp. */
if (durable_ts < txn->commit_timestamp)
WT_RET_MSG(session, EINVAL,
- "durable timestamp %s is less than the commit timestamp %s "
- "for this transaction",
+ "durable timestamp %s is less than the commit timestamp %s for this transaction",
__wt_timestamp_to_string(durable_ts, ts_string[0]),
__wt_timestamp_to_string(txn->commit_timestamp, ts_string[1]));
@@ -760,8 +746,7 @@ __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_
if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
WT_RET_MSG(session, EINVAL,
- "commit timestamp "
- "should not have been set before the prepare timestamp");
+ "commit timestamp should not have been set before the prepare timestamp");
WT_RET(__txn_assert_after_reads(session, "prepare", prepare_ts, &prev_shared));
@@ -781,16 +766,14 @@ __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_
WT_ASSERT(session, prev_shared == NULL);
__wt_verbose(session, WT_VERB_TIMESTAMP,
- "prepare timestamp %s rounded to oldest "
- "timestamp %s",
+ "prepare timestamp %s rounded to oldest timestamp %s",
__wt_timestamp_to_string(prepare_ts, ts_string[0]),
__wt_timestamp_to_string(oldest_ts, ts_string[1]));
prepare_ts = oldest_ts;
} else
WT_RET_MSG(session, EINVAL,
- "prepare timestamp %s is older than the oldest "
- "timestamp %s",
+ "prepare timestamp %s is older than the oldest timestamp %s",
__wt_timestamp_to_string(prepare_ts, ts_string[0]),
__wt_timestamp_to_string(oldest_ts, ts_string[1]));
}
@@ -825,15 +808,11 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
txn->isolation = WT_ISO_SNAPSHOT;
else if (txn->isolation != WT_ISO_SNAPSHOT)
WT_RET_MSG(session, EINVAL,
- "setting a read_timestamp"
- " requires a transaction running at snapshot"
- " isolation");
+ "setting a read_timestamp requires a transaction running at snapshot isolation");
/* Read timestamps can't change once set. */
if (F_ISSET(txn, WT_TXN_SHARED_TS_READ))
- WT_RET_MSG(session, EINVAL,
- "a read_timestamp"
- " may only be set once per transaction");
+ WT_RET_MSG(session, EINVAL, "a read_timestamp may only be set once per transaction");
/*
* This code is not using the timestamp validate function to avoid a race between checking and
@@ -860,9 +839,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
* error message because that logs a MongoDB error, use an informational message to
* provide the context instead.
*/
- WT_RET(__wt_msg(session,
- "read timestamp "
- "%s less than the oldest timestamp %s",
+ WT_RET(__wt_msg(session, "read timestamp %s less than the oldest timestamp %s",
__wt_timestamp_to_string(read_ts, ts_string[0]),
__wt_timestamp_to_string(ts_oldest, ts_string[1])));
return (EINVAL);
@@ -878,8 +855,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
*/
if (did_roundup_to_oldest)
__wt_verbose(session, WT_VERB_TIMESTAMP,
- "read "
- "timestamp %s : rounded to oldest timestamp %s",
+ "read timestamp %s : rounded to oldest timestamp %s",
__wt_timestamp_to_string(read_ts, ts_string[0]),
__wt_timestamp_to_string(ts_oldest, ts_string[1]));