diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-09-09 11:37:44 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-09 01:52:32 +0000 |
commit | 0f0eb111c9dbd5dd4ca86529aee81bad59c6579b (patch) | |
tree | 94f3efa909755fcc8b381bf68214ebb9bb326964 /src/third_party/wiredtiger/src/txn | |
parent | 103b545528e7cf05537bd68bda4bc8235bf6fefa (diff) | |
download | mongo-0f0eb111c9dbd5dd4ca86529aee81bad59c6579b.tar.gz |
Import wiredtiger: 6a7db4f96fe828fdb1b9a31d83460c0573a2c3b1 from branch mongodb-4.4
ref: 579966149b..6a7db4f96f
for: 4.4.2
WT-5144 Use wt_clock instead of wt_epoch in perf programs
WT-5585 Remove cache_overflow config option
WT-5693 Enable test_wt4105_large_doc_small_upd
WT-5940 Migrate to Clang Format 10
WT-6000 Enhance incremental backup testing in format to support restart
WT-6006 Revert test binaries to 10 in checkpoint-stress-test
WT-6027 Fix docs spelling errors and warnings
WT-6322 Split full compatibility tests into smaller groups
WT-6390 Extend compact02 timeout from 8 => 10 minutes
WT-6404 Add timing stress that delays checkpoint after it calls __wt_txn_begin
WT-6451 Do not evict clean metadata pages if needed for historic reads
WT-6458 read row-store leaf pages with prepared updates in a single pass
WT-6463 History store operations should honor cache size
WT-6471 Avoid the error message for non-existent clang-format binary
WT-6472 Update timestamp_abort test cache configuration
WT-6478 Cursor cache statistics not getting incremented
WT-6505 Add debugging for missing file failure
WT-6507 Exit cache eviction worker after our operation has timed out
WT-6526 Fix assertion failure when opening DB in readonly mode after unclean shutdown
WT-6532 Consider update structure overhead in split length calculation
WT-6542 Add an assert to ensure we are not unintentionally returning empty values
WT-6544 Onpage value not appended to the tombstone restored from the data or history store
WT-6556 Fix internal sessions to use internal session close function than public API to avoid memory leak
WT-6559 Use the session id from the new session to determine statistics bucket
WT-6560 Fix usage of global salvage in WT utility
WT-6561 Provide MongoDB configuration in the wt utility usage output
WT-6569 Squash the prepared updates into a single update before writing it to data store
WT-6570 RTS to remove the left over updates in the history store without stop timestamp
WT-6571 Lseek cannot use error_sys_check because it does not return an int
WT-6577 History store dump outputs confusing time window
WT-6578 Prevent reconciliation from looking past the on-disk value
WT-6581 Fix class name in test_hs15
WT-6585 Panic if updates that are older than the updates in history store are inserted to history store
WT-6586 Tombstone inserted to history store should also be flagged as WT_UPDATE_HS
WT-6589 Fix disabled cursor cache python tests
WT-6591 Stop checkpoint thread before closing connection in Python tests
WT-6593 Retry conflicting operations in test_rollback_to_stable10
WT-6596 Increase cache for timestamp abort test and separate key spaces for all abort tests
WT-6598 Add new API allowing changing dhandle hash bucket size
WT-6602 Allow operation timeout ms to be passed to commit and rollback
WT-6604 Fix typo in the comment descibing WT_CELL structure
WT-6610 Fix incremental backup checkpoint parsing to handle upgrades
WT-6611 Revert enhancement allowing rename and incremental backup
WT-6613 Add python test for early_load flag
WT-6615 Initialize last_upd where it is actually used
WT-6619 Eliminate possibility of infinite loop in test_cursor13.py
WT-6623 Set the connection level file id in recovery file scan
WT-6625 Remove outdated TODO
WT-6635 Disable mix and column filetype test
WT-6640 Coverity: Failure to restore saved dhandle
WT-6641 Coverity: Unused value
Diffstat (limited to 'src/third_party/wiredtiger/src/txn')
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 182 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_ckpt.c | 62 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_log.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_recover.c | 15 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c | 109 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_timestamp.c | 62 |
6 files changed, 239 insertions, 193 deletions
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 0c3ff78fa74..014b449c6f7 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -124,8 +124,8 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session) txn_global = &S2C(session)->txn_global; txn_shared = WT_SESSION_TXN_SHARED(session); - WT_ASSERT(session, txn_shared->pinned_id == WT_TXN_NONE || - session->txn->isolation == WT_ISO_READ_UNCOMMITTED || + WT_ASSERT(session, + txn_shared->pinned_id == WT_TXN_NONE || session->txn->isolation == WT_ISO_READ_UNCOMMITTED || !__wt_txn_visible_all(session, txn_shared->pinned_id, WT_TS_NONE)); txn_shared->metadata_pinned = txn_shared->pinned_id = WT_TXN_NONE; @@ -406,7 +406,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) if ((oldest_id == prev_oldest_id || (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) && ((last_running == prev_last_running) || - (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) && + (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) && metadata_pinned == prev_metadata_pinned) return (0); @@ -444,9 +444,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && current_id - oldest_id > 10000 && oldest_session != NULL) { __wt_verbose(session, WT_VERB_TRANSACTION, - "old snapshot %" PRIu64 " pinned in session %" PRIu32 - " [%s]" - " with snap_min %" PRIu64, + "old snapshot %" PRIu64 " pinned in session %" PRIu32 " [%s] with snap_min %" PRIu64, oldest_id, oldest_session->id, oldest_session->lastop, oldest_session->txn->snap_min); } } @@ -457,6 +455,33 @@ done: } /* + * __txn_config_operation_timeout -- + * Configure a transactions operation timeout duration. + */ +static int +__txn_config_operation_timeout(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONFIG_ITEM cval; + WT_TXN *txn; + + txn = session->txn; + + if (cfg == NULL) + return (0); + + /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */ + WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval)); + + /* + * The default configuration value is 0, we can't tell if they're setting it back to 0 or, if + * the default was automatically passed in. + */ + if (cval.val != 0) + txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND); + return (0); +} + +/* * __wt_txn_config -- * Configure a transaction. */ @@ -469,6 +494,9 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) txn = session->txn; + if (cfg == NULL) + return (0); + WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? @@ -476,9 +504,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; - /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */ - WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval)); - txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND); + WT_RET(__txn_config_operation_timeout(session, cfg)); /* * The default sync setting is inherited from the connection, but can be overridden by an @@ -723,8 +749,18 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * if (commit) goto done; + /* + * Set the flag to indicate that this update has been restored from history store for the + * rollback of a prepared transaction. + */ + F_SET(upd, WT_UPDATE_RESTORED_FROM_HS); total_size += size; + __wt_verbose(session, WT_VERB_TRANSACTION, + "update restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s", + upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]), + __wt_timestamp_to_string(upd->durable_ts, ts_string[1])); + /* If the history store record has a valid stop time point, append it. */ if (hs_stop_durable_ts != WT_TS_MAX) { WT_ASSERT(session, hs_cbt->upd_value->tw.stop_ts != WT_TS_MAX); @@ -733,20 +769,20 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * tombstone->start_ts = hs_cbt->upd_value->tw.stop_ts; tombstone->txnid = hs_cbt->upd_value->tw.stop_txn; tombstone->next = upd; + /* + * Set the flag to indicate that this update has been restored from history store for the + * rollback of a prepared transaction. + */ + F_SET(tombstone, WT_UPDATE_RESTORED_FROM_HS); total_size += size; - } else - tombstone = upd; - __wt_verbose(session, WT_VERB_TRANSACTION, - "update restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s", - upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]), - __wt_timestamp_to_string(upd->durable_ts, ts_string[1])); + __wt_verbose(session, WT_VERB_TRANSACTION, + "tombstone restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s", + tombstone->txnid, __wt_timestamp_to_string(tombstone->start_ts, ts_string[0]), + __wt_timestamp_to_string(tombstone->durable_ts, ts_string[1])); - /* - * Set the flag to indicate that this update has been restored from history store for the - * rollback of a prepared transaction. - */ - F_SET(upd, WT_UPDATE_RESTORED_FROM_HS); + upd = tombstone; + } /* Walk to the end of the chain and we can only have prepared updates on the update chain. */ for (;; chain = chain->next) { @@ -758,15 +794,15 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * } /* Append the update to the end of the chain. */ - WT_PUBLISH(chain->next, tombstone); + WT_PUBLISH(chain->next, upd); *upd_appended = true; __wt_cache_page_inmem_incr(session, page, total_size); if (0) { err: - __wt_free(session, upd); - __wt_free(session, tombstone); + WT_ASSERT(session, tombstone == NULL || upd == tombstone); + __wt_free_update_list(session, &upd); } done: __wt_scr_free(session, &hs_key); @@ -914,13 +950,12 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, WT_TXN *txn; WT_UPDATE *fix_upd, *tombstone, *upd; size_t not_used; - uint32_t hs_btree_id, session_flags; + uint32_t hs_btree_id; bool upd_appended; hs_cursor = NULL; txn = session->txn; fix_upd = tombstone = NULL; - session_flags = 0; upd_appended = false; WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd)); @@ -953,7 +988,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, cbt = (WT_CURSOR_BTREE *)(*cursorp); hs_btree_id = S2BT(session)->id; /* Open a history store table cursor. */ - WT_ERR(__wt_hs_cursor_open(session, &session_flags)); + WT_ERR(__wt_hs_cursor_open(session)); hs_cursor = session->hs_cursor; /* @@ -976,8 +1011,9 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, * and instead write nothing. */ WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, ¬_used)); - WT_WITH_BTREE(session, op->btree, ret = __wt_row_modify(cbt, &cbt->iface.key, NULL, - tombstone, WT_UPDATE_INVALID, false)); + WT_WITH_BTREE(session, op->btree, + ret = + __wt_row_modify(cbt, &cbt->iface.key, NULL, tombstone, WT_UPDATE_INVALID, false)); WT_ERR(ret); tombstone = NULL; } else @@ -1044,7 +1080,7 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, err: if (hs_cursor != NULL) - WT_TRET(__wt_hs_cursor_close(session, session_flags)); + WT_TRET(__wt_hs_cursor_close(session)); if (!upd_appended) __wt_free(session, fix_upd); __wt_free(session, tombstone); @@ -1243,6 +1279,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); + /* Configure the timeout for this commit operation. */ + WT_ERR(__txn_config_operation_timeout(session, cfg)); + /* * Clear the prepared round up flag if the transaction is not prepared. There is no rounding up * to do in that case. @@ -1513,8 +1552,8 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) */ if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE)) WT_RET_ASSERT(session, txn->logrec == NULL, EINVAL, - "A transaction should not have been assigned a log" - " record if WT_CONN_LOG_DEBUG mode is not enabled"); + "A transaction should not have been assigned a log record if WT_CONN_LOG_DEBUG mode is " + "not enabled"); /* Set the prepare timestamp. */ WT_RET(__wt_txn_set_timestamp(session, cfg)); @@ -1639,8 +1678,6 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) u_int i; bool prepare, readonly; - WT_UNUSED(cfg); - cursor = NULL; txn = session->txn; prepare = F_ISSET(txn, WT_TXN_PREPARE); @@ -1652,6 +1689,9 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) if (txn->notify != NULL) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0)); + /* Configure the timeout for this rollback operation. */ + WT_RET(__txn_config_operation_timeout(session, cfg)); + /* * Resolving prepared updates is expensive. Sort prepared modifications so all updates for each * page within each file are done at the same time. @@ -1757,7 +1797,8 @@ __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) txn->snapshot = txn->__snapshot; txn->id = WT_TXN_NONE; - WT_ASSERT(session, S2C(session_ret)->txn_global.txn_shared_list == NULL || + WT_ASSERT(session, + S2C(session_ret)->txn_global.txn_shared_list == NULL || WT_SESSION_TXN_SHARED(session_ret)->pinned_id == WT_TXN_NONE); /* @@ -1950,12 +1991,11 @@ __wt_txn_activity_drain(WT_SESSION_IMPL *session) * Shut down the global transaction state. */ int -__wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const char **cfg) +__wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SESSION *wt_session; WT_SESSION_IMPL *s; char ts_string[WT_TS_INT_STRING_SIZE]; const char *ckpt_cfg; @@ -1975,7 +2015,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha if (conn->txn_global.has_stable_timestamp) F_SET(conn, WT_CONN_CLOSING_TIMESTAMP); } - if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) { + if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY | WT_CONN_PANIC)) { /* * Perform rollback to stable to ensure that the stable version is written to disk on a * clean shutdown. @@ -1992,7 +2032,6 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha if (s != NULL) { const char *checkpoint_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL}; - wt_session = &s->iface; WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true)); /* @@ -2000,7 +2039,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha */ WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s), __wt_tree_modify_set(s)); - WT_TRET(wt_session->close(wt_session, config)); + WT_TRET(__wt_session_close_internal(s)); } } @@ -2051,8 +2090,9 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative) * a transaction, we need to have considered splitting the page in the case that its updates are * on a single page. */ - if (conservative && (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) || - F_ISSET(session, WT_SESSION_RESOLVING_TXN))) + if (conservative && + (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) || + F_ISSET(session, WT_SESSION_RESOLVING_TXN))) return (0); /* @@ -2098,28 +2138,29 @@ __wt_verbose_dump_txn_one( * Dump the information of the passed transaction into a buffer, to be logged with an optional * error message. */ - WT_RET(__wt_snprintf(buf, - sizeof(buf), "transaction id: %" PRIu64 ", mod count: %u" - ", snap min: %" PRIu64 ", snap max: %" PRIu64 ", snapshot count: %u" - ", commit_timestamp: %s" - ", durable_timestamp: %s" - ", first_commit_timestamp: %s" - ", prepare_timestamp: %s" - ", pinned_durable_timestamp: %s" - ", read_timestamp: %s" - ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]" - ", full checkpoint: %s" - ", rollback reason: %s" - ", flags: 0x%08" PRIx32 ", isolation: %s", - txn->id, txn->mod_count, txn->snap_min, txn->snap_max, txn->snapshot_count, - __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]), - __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]), - __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]), - __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]), - __wt_timestamp_to_string(txn_shared->pinned_durable_timestamp, ts_string[4]), - __wt_timestamp_to_string(txn_shared->read_timestamp, ts_string[5]), txn->ckpt_lsn.l.file, - txn->ckpt_lsn.l.offset, txn->full_ckpt ? "true" : "false", - txn->rollback_reason == NULL ? "" : txn->rollback_reason, txn->flags, iso_tag)); + WT_RET( + __wt_snprintf(buf, sizeof(buf), + "transaction id: %" PRIu64 ", mod count: %u" + ", snap min: %" PRIu64 ", snap max: %" PRIu64 ", snapshot count: %u" + ", commit_timestamp: %s" + ", durable_timestamp: %s" + ", first_commit_timestamp: %s" + ", prepare_timestamp: %s" + ", pinned_durable_timestamp: %s" + ", read_timestamp: %s" + ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]" + ", full checkpoint: %s" + ", rollback reason: %s" + ", flags: 0x%08" PRIx32 ", isolation: %s", + txn->id, txn->mod_count, txn->snap_min, txn->snap_max, txn->snapshot_count, + __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]), + __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]), + __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]), + __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]), + __wt_timestamp_to_string(txn_shared->pinned_durable_timestamp, ts_string[4]), + __wt_timestamp_to_string(txn_shared->read_timestamp, ts_string[5]), txn->ckpt_lsn.l.file, + txn->ckpt_lsn.l.offset, txn->full_ckpt ? "true" : "false", + txn->rollback_reason == NULL ? "" : txn->rollback_reason, txn->flags, iso_tag)); /* * Log a message and return an error if error code and an optional error string has been passed. @@ -2258,13 +2299,14 @@ __wt_verbose_dump_update(WT_SESSION_IMPL *session, WT_UPDATE *upd) break; } - __wt_errx(session, "transaction id: %" PRIu64 - ", commit timestamp: %s" - ", durable timestamp: %s" - ", has next: %s" - ", size: %" PRIu32 - ", type: %s" - ", prepare state: %s", + __wt_errx(session, + "transaction id: %" PRIu64 + ", commit timestamp: %s" + ", durable timestamp: %s" + ", has next: %s" + ", size: %" PRIu32 + ", type: %s" + ", prepare state: %s", upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]), __wt_timestamp_to_string(upd->durable_ts, ts_string[1]), upd->next == NULL ? "no" : "yes", upd->size, upd_type, prepare_state); diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 9b3d1bb51fb..59a10c36733 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -8,7 +8,7 @@ #include "wt_internal.h" -static void __checkpoint_timing_stress(WT_SESSION_IMPL *, bool); +static void __checkpoint_timing_stress(WT_SESSION_IMPL *, uint64_t, struct timespec *); static int __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *, bool, bool, bool, const char *[]); static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool); static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); @@ -137,9 +137,7 @@ __checkpoint_apply_operation( } if (v.len != 0) - WT_ERR_MSG(session, EINVAL, - "invalid checkpoint target %.*s: URIs may require " - "quoting", + WT_ERR_MSG(session, EINVAL, "invalid checkpoint target %.*s: URIs may require quoting", (int)cval.len, (char *)cval.str); /* Some objects don't support named checkpoints. */ @@ -517,6 +515,7 @@ __checkpoint_fail_reset(WT_SESSION_IMPL *session) static int __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[]) { + struct timespec tsp; WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -546,6 +545,10 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[ __wt_epoch(session, &conn->ckpt_prep_start); WT_RET(__wt_txn_begin(session, txn_cfg)); + /* Wait 1000 microseconds to simulate slowdown in checkpoint prepare. */ + tsp.tv_sec = 0; + tsp.tv_nsec = WT_MILLION; + __checkpoint_timing_stress(session, WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY, &tsp); original_snap_min = session->txn->snap_min; WT_DIAGNOSTIC_YIELD; @@ -585,7 +588,8 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[ /* * Sanity check that the oldest ID hasn't moved on before we have cleared our entry. */ - WT_ASSERT(session, WT_TXNID_LE(txn_global->oldest_id, txn_shared->id) && + WT_ASSERT(session, + WT_TXNID_LE(txn_global->oldest_id, txn_shared->id) && WT_TXNID_LE(txn_global->oldest_id, txn_shared->pinned_id)); /* @@ -748,6 +752,7 @@ __txn_checkpoint_can_skip( static int __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + struct timespec tsp; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *hs_dhandle; @@ -870,11 +875,14 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) if (full && logging) WT_ERR(__wt_txn_checkpoint_log(session, full, WT_TXN_LOG_CKPT_START, NULL)); - __checkpoint_timing_stress(session, false); + /* Add a ten second wait to simulate checkpoint slowness. */ + tsp.tv_sec = 10; + tsp.tv_nsec = 0; + __checkpoint_timing_stress(session, WT_TIMING_STRESS_CHECKPOINT_SLOW, &tsp); WT_ERR(__checkpoint_apply_to_dhandles(session, cfg, __checkpoint_tree_helper)); /* Wait prior to checkpointing the history store to simulate checkpoint slowness. */ - __checkpoint_timing_stress(session, true); + __checkpoint_timing_stress(session, WT_TIMING_STRESS_HS_CHECKPOINT_DELAY, &tsp); /* * Get a history store dhandle. If the history store file is opened for a special operation this @@ -1129,11 +1137,8 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) */ #undef WT_CHECKPOINT_SESSION_FLAGS #define WT_CHECKPOINT_SESSION_FLAGS (WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE) -#undef WT_CHECKPOINT_SESSION_FLAGS_OFF -#define WT_CHECKPOINT_SESSION_FLAGS_OFF (WT_SESSION_HS_CURSOR) - orig_flags = F_MASK(session, WT_CHECKPOINT_SESSION_FLAGS | WT_CHECKPOINT_SESSION_FLAGS_OFF); + orig_flags = F_MASK(session, WT_CHECKPOINT_SESSION_FLAGS); F_SET(session, WT_CHECKPOINT_SESSION_FLAGS); - F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS_OFF); /* * Only one checkpoint can be active at a time, and checkpoints must run in the same order as @@ -1274,9 +1279,8 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b continue; } WT_RET_MSG(session, EBUSY, - "checkpoint %s blocked by hot backup: it would " - "delete an existing named checkpoint, and such " - "checkpoints cannot be deleted during a hot backup", + "checkpoint %s blocked by hot backup: it would delete an existing named checkpoint, " + "and such checkpoints cannot be deleted during a hot backup", ckpt->name); } /* @@ -1307,8 +1311,9 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b WT_CKPT_FOREACH (ckptbase, ckpt) { if (!F_ISSET(ckpt, WT_CKPT_DELETE)) continue; - WT_ASSERT(session, !WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT) || - conn->hot_backup_start == 0 || ckpt->sec > conn->hot_backup_start); + WT_ASSERT(session, + !WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT) || conn->hot_backup_start == 0 || + ckpt->sec > conn->hot_backup_start); /* * We can't delete checkpoints referenced by a cursor. WiredTiger checkpoints are * uniquely named and it's OK to have multiple in the system: clear the delete flag for @@ -1445,9 +1450,7 @@ __checkpoint_lock_dirty_tree( else if (WT_STRING_MATCH("to", k.str, k.len)) __drop_to(ckptbase, v.str, v.len); else - WT_ERR_MSG(session, EINVAL, - "unexpected value for checkpoint " - "key: %.*s", + WT_ERR_MSG(session, EINVAL, "unexpected value for checkpoint key: %.*s", (int)k.len, k.str); } WT_ERR_NOTFOUND_OK(ret, false); @@ -1536,8 +1539,8 @@ __checkpoint_mark_skip(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force) name = (ckpt - 1)->name; if (ckpt > ckptbase + 1 && deleted < 2 && (strcmp(name, (ckpt - 2)->name) == 0 || - (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && - WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { + (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && + WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { F_SET(btree, WT_BTREE_SKIP_CKPT); /* * If there are potentially extra checkpoints to delete, we set the timer to recheck @@ -1887,7 +1890,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) */ if (btree->modified && !bulk && !__wt_btree_immediately_durable(session) && (S2C(session)->txn_global.has_stable_timestamp || - (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata))) + (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata))) return (__wt_set_return(session, EBUSY)); /* @@ -1915,12 +1918,12 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) /* * __checkpoint_timing_stress -- - * Optionally add a 10 second delay to a checkpoint to simulate a long running checkpoint for - * debug purposes. The reason for this option is finding operations that can block while waiting - * for a checkpoint to complete. + * Optionally add a delay to a checkpoint to simulate a long running checkpoint for debug + * purposes. The reason for this option is finding operations that can block while waiting for a + * checkpoint to complete. */ static void -__checkpoint_timing_stress(WT_SESSION_IMPL *session, bool history_store_stress) +__checkpoint_timing_stress(WT_SESSION_IMPL *session, uint64_t flag, struct timespec *tsp) { WT_CONNECTION_IMPL *conn; @@ -1931,9 +1934,6 @@ __checkpoint_timing_stress(WT_SESSION_IMPL *session, bool history_store_stress) * the session used is either of the two sessions set aside for internal checkpoints. */ if (conn->ckpt_session != session && conn->meta_ckpt_session != session && - ((FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_CHECKPOINT_SLOW) && - !history_store_stress) || - (FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_HS_CHECKPOINT_DELAY) && - history_store_stress))) - __wt_sleep(10, 0); + FLD_ISSET(conn->timing_stress_flags, flag)) + __wt_sleep((uint64_t)tsp->tv_sec, (uint64_t)tsp->tv_nsec / WT_THOUSAND); } diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index d541d8f48d5..b335d4ca3cb 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -554,7 +554,7 @@ __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_ */ if (conn->hot_backup_start == 0 && (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) || - FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) && + FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) && txn->full_ckpt) __wt_log_ckpt(session, ckpt_lsn); diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index bc60efa5d51..fd230dab529 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -282,8 +282,9 @@ done: return (0); err: - __wt_err(session, ret, "operation apply failed during recovery: operation type %" PRIu32 - " at LSN %" PRIu32 "/%" PRIu32, + __wt_err(session, ret, + "operation apply failed during recovery: operation type %" PRIu32 " at LSN %" PRIu32 + "/%" PRIu32, optype, lsnp->l.file, lsnp->l.offset); return (ret); } @@ -477,9 +478,8 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) if (r->files[fileid].uri != NULL) WT_RET_PANIC(r->session, WT_PANIC, - "metadata corruption: files %s and %s have the same " - "file ID %u", - uri, r->files[fileid].uri, fileid); + "metadata corruption: files %s and %s have the same file ID %u", uri, + r->files[fileid].uri, fileid); WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri)); WT_RET(__wt_config_getones(r->session, config, "checkpoint_lsn", &cval)); /* If there is no checkpoint logged for the file, apply everything. */ @@ -851,7 +851,8 @@ done: * written. The rollback to stable operation should only rollback the latest page changes * solely based on the write generation numbers. */ - WT_ASSERT(session, conn->txn_global.has_stable_timestamp == false && + WT_ASSERT(session, + conn->txn_global.has_stable_timestamp == false && conn->txn_global.stable_timestamp == WT_TS_NONE); /* @@ -900,7 +901,7 @@ err: if (eviction_started) WT_TRET(__wt_evict_destroy(session)); - WT_TRET(session->iface.close(&session->iface, NULL)); + WT_TRET(__wt_session_close_internal(session)); F_CLR(conn, WT_CONN_RECOVERING); return (ret); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 2c97ddf48c7..65ae870b8fe 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -35,7 +35,8 @@ __rollback_abort_newer_update(WT_SESSION_IMPL *session, WT_UPDATE *first_upd, * is not configured for key consistency check, the timestamps could be out of order * here. */ - WT_ASSERT(session, !FLD_ISSET(S2BT(session)->assert_flags, WT_ASSERT_COMMIT_TS_KEYS) || + WT_ASSERT(session, + !FLD_ISSET(S2BT(session)->assert_flags, WT_ASSERT_COMMIT_TS_KEYS) || upd == first_upd); first_upd = upd->next; @@ -165,7 +166,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW WT_UPDATE *hs_upd, *tombstone, *upd; wt_timestamp_t hs_durable_ts, hs_start_ts, hs_stop_durable_ts, newer_hs_durable_ts; uint64_t hs_counter, type_full; - uint32_t hs_btree_id, session_flags; + uint32_t hs_btree_id; uint8_t type; int cmp; char ts_string[4][WT_TS_INT_STRING_SIZE]; @@ -178,7 +179,6 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW hs_upd = tombstone = upd = NULL; hs_durable_ts = hs_start_ts = hs_stop_durable_ts = WT_TS_NONE; hs_btree_id = S2BT(session)->id; - session_flags = 0; WT_CLEAR(full_value); valid_update_found = false; #ifdef HAVE_DIAGNOSTIC @@ -200,7 +200,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW newer_hs_durable_ts = unpack->tw.durable_start_ts; /* Open a history store table cursor. */ - WT_ERR(__wt_hs_cursor_open(session, &session_flags)); + WT_ERR(__wt_hs_cursor_open(session)); hs_cursor = session->hs_cursor; cbt = (WT_CURSOR_BTREE *)hs_cursor; @@ -263,8 +263,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW * records newer than or equal to the onpage value if eviction runs concurrently with * checkpoint. In that case, don't verify the first record. */ - WT_ASSERT(session, hs_stop_durable_ts <= newer_hs_durable_ts || - hs_start_ts == hs_stop_durable_ts || first_record); + WT_ASSERT(session, + hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts || + first_record); if (hs_stop_durable_ts < newer_hs_durable_ts) WT_STAT_CONN_INCR(session, txn_rts_hs_stop_older_than_newer_start); @@ -286,8 +287,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW /* Stop processing when we find a stable update according to the given timestamp. */ if (hs_durable_ts <= rollback_timestamp) { __wt_verbose(session, WT_VERB_RTS, - "history store update valid with start timestamp: %s, durable timestamp: %s, " - "stop timestamp: %s and stable timestamp: %s", + "history store update valid with start timestamp: %s, durable timestamp: %s, stop " + "timestamp: %s and stable timestamp: %s", __wt_timestamp_to_string(hs_start_ts, ts_string[0]), __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), @@ -331,8 +332,9 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW upd->txnid = cbt->upd_value->tw.start_txn; upd->durable_ts = cbt->upd_value->tw.durable_start_ts; upd->start_ts = cbt->upd_value->tw.start_ts; - __wt_verbose(session, WT_VERB_RTS, "update restored from history store (txnid: %" PRIu64 - ", start_ts: %s, durable_ts: %s", + __wt_verbose(session, WT_VERB_RTS, + "update restored from history store (txnid: %" PRIu64 + ", start_ts: %s, durable_ts: %s", upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]), __wt_timestamp_to_string(upd->durable_ts, ts_string[1])); @@ -395,7 +397,7 @@ err: __wt_scr_free(session, &hs_value); __wt_scr_free(session, &key); __wt_buf_free(session, &full_value); - WT_TRET(__wt_hs_cursor_close(session, session_flags)); + WT_TRET(__wt_hs_cursor_close(session)); return (ret); } @@ -421,10 +423,15 @@ __rollback_abort_row_ondisk_kv( __wt_row_leaf_value_cell(session, page, rip, NULL, vpack); prepared = vpack->tw.prepare; if (WT_IS_HS(S2BT(session))) { - if (vpack->tw.durable_stop_ts > rollback_timestamp) { + /* + * Abort the history store update with stop durable timestamp greater than the stable + * timestamp or the updates with max stop timestamp which implies that they are associated + * with prepared transactions. + */ + if (vpack->tw.durable_stop_ts > rollback_timestamp || vpack->tw.stop_ts == WT_TS_MAX) { __wt_verbose(session, WT_VERB_RTS, - "hs update aborted with start durable/commit timestamp: %s, %s, " - "stop durable/commit timestamp: %s, %s and stable timestamp: %s", + "hs update aborted with start durable/commit timestamp: %s, %s, stop durable/commit " + "timestamp: %s, %s and stable timestamp: %s", __wt_timestamp_to_string(vpack->tw.durable_start_ts, ts_string[0]), __wt_timestamp_to_string(vpack->tw.start_ts, ts_string[1]), __wt_timestamp_to_string(vpack->tw.durable_stop_ts, ts_string[2]), @@ -595,11 +602,11 @@ __rollback_abort_row_reconciled_page( if (mod->rec_result == WT_PM_REC_REPLACE && (mod->mod_replace.ta.newest_start_durable_ts > rollback_timestamp || - mod->mod_replace.ta.newest_stop_durable_ts > rollback_timestamp || - mod->mod_replace.ta.prepare)) { + mod->mod_replace.ta.newest_stop_durable_ts > rollback_timestamp || + mod->mod_replace.ta.prepare)) { __wt_verbose(session, WT_VERB_RTS, - "reconciled replace block page history store update removal on-disk with start " - "durable timestamp: %s, stop durable timestamp: %s and stable timestamp: %s", + "reconciled replace block page history store update removal on-disk with start durable " + "timestamp: %s, stop durable timestamp: %s and stable timestamp: %s", __wt_timestamp_to_string(mod->mod_replace.ta.newest_start_durable_ts, ts_string[0]), __wt_timestamp_to_string(mod->mod_replace.ta.newest_stop_durable_ts, ts_string[1]), __wt_timestamp_to_string(rollback_timestamp, ts_string[2])); @@ -622,9 +629,8 @@ __rollback_abort_row_reconciled_page( multi->addr.ta.newest_stop_durable_ts > rollback_timestamp || multi->addr.ta.prepare) { __wt_verbose(session, WT_VERB_RTS, - "reconciled multi block page history store update removal on-disk with " - "start durable timestamp: %s, stop durable timestamp: %s and stable " - "timestamp: %s", + "reconciled multi block page history store update removal on-disk with start " + "durable timestamp: %s, stop durable timestamp: %s and stable timestamp: %s", __wt_timestamp_to_string(multi->addr.ta.newest_start_durable_ts, ts_string[0]), __wt_timestamp_to_string(multi->addr.ta.newest_stop_durable_ts, ts_string[1]), __wt_timestamp_to_string(rollback_timestamp, ts_string[2])); @@ -696,6 +702,24 @@ __rollback_abort_newer_row_leaf( } /* + * __rollback_get_ref_max_durable_timestamp -- + * Returns the ref aggregated max durable timestamp. The max durable timestamp is calculated + * between both start and stop durable timestamps except for history store, because most of the + * history store updates have stop timestamp either greater or equal to the start timestamp + * except for the updates written for the prepared updates on the data store. To abort the + * updates with no stop timestamp, we must include the newest stop timestamp also into the + * calculation of maximum durable timestamp of the history store. + */ +static wt_timestamp_t +__rollback_get_ref_max_durable_timestamp(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta) +{ + if (WT_IS_HS(S2BT(session))) + return WT_MAX(ta->newest_stop_durable_ts, ta->newest_stop_ts); + else + return WT_MAX(ta->newest_start_durable_ts, ta->newest_stop_durable_ts); +} + +/* * __rollback_page_needs_abort -- * Check whether the page needs rollback. Return true if the page has modifications newer than * the given timestamp Otherwise return false. @@ -730,16 +754,15 @@ __rollback_page_needs_abort( */ if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) { tag = "reconciled replace block"; - durable_ts = WT_MAX( - mod->mod_replace.ta.newest_start_durable_ts, mod->mod_replace.ta.newest_stop_durable_ts); + durable_ts = __rollback_get_ref_max_durable_timestamp(session, &mod->mod_replace.ta); prepared = mod->mod_replace.ta.prepare; result = (durable_ts > rollback_timestamp) || prepared; } else if (mod != NULL && mod->rec_result == WT_PM_REC_MULTIBLOCK) { tag = "reconciled multi block"; /* Calculate the max durable timestamp by traversing all multi addresses. */ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) { - durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_start_durable_ts); - durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_stop_durable_ts); + durable_ts = WT_MAX( + durable_ts, __rollback_get_ref_max_durable_timestamp(session, &multi->addr.ta)); if (multi->addr.ta.prepare) prepared = true; } @@ -748,12 +771,12 @@ __rollback_page_needs_abort( tag = "on page cell"; /* Check if the page is obsolete using the page disk address. */ __wt_cell_unpack_addr(session, ref->home->dsk, (WT_CELL *)addr, &vpack); - durable_ts = WT_MAX(vpack.ta.newest_start_durable_ts, vpack.ta.newest_stop_durable_ts); + durable_ts = __rollback_get_ref_max_durable_timestamp(session, &vpack.ta); prepared = vpack.ta.prepare; result = (durable_ts > rollback_timestamp) || prepared; } else if (addr != NULL) { tag = "address"; - durable_ts = WT_MAX(addr->ta.newest_start_durable_ts, addr->ta.newest_stop_durable_ts); + durable_ts = __rollback_get_ref_max_durable_timestamp(session, &addr->ta); prepared = addr->ta.prepare; result = (durable_ts > rollback_timestamp) || prepared; } @@ -987,19 +1010,18 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ WT_UPDATE *hs_upd; wt_timestamp_t hs_start_ts; uint64_t hs_counter; - uint32_t hs_btree_id, session_flags; + uint32_t hs_btree_id; int exact; char ts_string[WT_TS_INT_STRING_SIZE]; hs_cursor = NULL; WT_CLEAR(key); hs_upd = NULL; - session_flags = 0; WT_RET(__wt_scr_alloc(session, 0, &hs_key)); /* Open a history store table cursor. */ - WT_ERR(__wt_hs_cursor_open(session, &session_flags)); + WT_ERR(__wt_hs_cursor_open(session)); hs_cursor = session->hs_cursor; cbt = (WT_CURSOR_BTREE *)hs_cursor; @@ -1048,7 +1070,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ err: __wt_scr_free(session, &hs_key); __wt_free(session, hs_upd); - WT_TRET(__wt_hs_cursor_close(session, session_flags)); + WT_TRET(__wt_hs_cursor_close(session)); return (ret); } @@ -1064,7 +1086,7 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll WT_CONFIG ckptconf; WT_CONFIG_ITEM cval, durableval, key; WT_DECL_RET; - wt_timestamp_t max_durable_ts, newest_start_durable_ts, newest_stop_durable_ts; + wt_timestamp_t max_durable_ts, newest_stop_durable_ts, newest_stop_ts; char *config; char ts_string[2][WT_TS_INT_STRING_SIZE]; @@ -1072,22 +1094,27 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll WT_RET(__wt_metadata_search(session, WT_HS_URI, &config)); - /* Find out the max durable timestamp of the object from checkpoint. */ - newest_start_durable_ts = newest_stop_durable_ts = WT_TS_NONE; + /* + * Find out the max durable timestamp of the history store from checkpoint. Most of the history + * store updates have stop timestamp either greater or equal to the start timestamp except for + * the updates written for the prepared updates on the data store. To abort the updates with no + * stop timestamp, we must include the newest stop timestamp also into the calculation of + * maximum timestamp of the history store. + */ + newest_stop_durable_ts = newest_stop_ts = WT_TS_NONE; WT_ERR(__wt_config_getones(session, config, "checkpoint", &cval)); __wt_config_subinit(session, &ckptconf, &cval); for (; __wt_config_next(&ckptconf, &key, &cval) == 0;) { - ret = __wt_config_subgets(session, &cval, "newest_start_durable_ts", &durableval); - if (ret == 0) - newest_start_durable_ts = - WT_MAX(newest_start_durable_ts, (wt_timestamp_t)durableval.val); - WT_ERR_NOTFOUND_OK(ret, false); ret = __wt_config_subgets(session, &cval, "newest_stop_durable_ts", &durableval); if (ret == 0) newest_stop_durable_ts = WT_MAX(newest_stop_durable_ts, (wt_timestamp_t)durableval.val); WT_ERR_NOTFOUND_OK(ret, false); + ret = __wt_config_subgets(session, &cval, "newest_stop_ts", &durableval); + if (ret == 0) + newest_stop_ts = WT_MAX(newest_stop_ts, (wt_timestamp_t)durableval.val); + WT_ERR_NOTFOUND_OK(ret, false); } - max_durable_ts = WT_MAX(newest_start_durable_ts, newest_stop_durable_ts); + max_durable_ts = WT_MAX(newest_stop_ts, newest_stop_durable_ts); WT_ERR(__wt_session_get_dhandle(session, WT_HS_URI, NULL, NULL, 0)); /* @@ -1299,7 +1326,7 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp */ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && !no_ckpt) WT_TRET(session->iface.checkpoint(&session->iface, "force=1")); - WT_TRET(session->iface.close(&session->iface, NULL)); + WT_TRET(__wt_session_close_internal(session)); return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index ba311cc93f0..c631b597f4d 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -422,8 +422,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) if (has_durable && (has_oldest || txn_global->has_oldest_timestamp) && oldest_ts > durable_ts) { __wt_readunlock(session, &txn_global->rwlock); WT_RET_MSG(session, EINVAL, - "set_timestamp: oldest timestamp %s must not be later than " - "durable timestamp %s", + "set_timestamp: oldest timestamp %s must not be later than durable timestamp %s", __wt_timestamp_to_string(oldest_ts, ts_string[0]), __wt_timestamp_to_string(durable_ts, ts_string[1])); } @@ -431,8 +430,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) if (has_durable && (has_stable || txn_global->has_stable_timestamp) && stable_ts > durable_ts) { __wt_readunlock(session, &txn_global->rwlock); WT_RET_MSG(session, EINVAL, - "set_timestamp: stable timestamp %s must not be later than " - "durable timestamp %s", + "set_timestamp: stable timestamp %s must not be later than durable timestamp %s", __wt_timestamp_to_string(stable_ts, ts_string[0]), __wt_timestamp_to_string(durable_ts, ts_string[1])); } @@ -444,8 +442,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) (has_stable || txn_global->has_stable_timestamp) && oldest_ts > stable_ts) { __wt_readunlock(session, &txn_global->rwlock); WT_RET_MSG(session, EINVAL, - "set_timestamp: oldest timestamp %s must not be later than " - "stable timestamp %s", + "set_timestamp: oldest timestamp %s must not be later than stable timestamp %s", __wt_timestamp_to_string(oldest_ts, ts_string[0]), __wt_timestamp_to_string(stable_ts, ts_string[1])); } @@ -537,9 +534,8 @@ __txn_assert_after_reads( if (tmp_timestamp >= ts) { __wt_readunlock(session, &txn_global->read_timestamp_rwlock); WT_RET_MSG(session, EINVAL, - "%s timestamp %s must be greater than the " - "latest active read timestamp %s ", - op, __wt_timestamp_to_string(ts, ts_string[0]), + "%s timestamp %s must be greater than the latest active read timestamp %s ", op, + __wt_timestamp_to_string(ts, ts_string[0]), __wt_timestamp_to_string(tmp_timestamp, ts_string[1])); } break; @@ -583,9 +579,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts if (txn->isolation != WT_ISO_SNAPSHOT) WT_RET_MSG(session, EINVAL, - "setting a commit_timestamp" - " requires a transaction running at snapshot" - " isolation"); + "setting a commit_timestamp requires a transaction running at snapshot isolation"); /* * Compare against the oldest and the stable timestamp. Return an error if the given timestamp @@ -604,16 +598,12 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts * timestamp. */ if (has_oldest_ts && commit_ts < oldest_ts) - WT_RET_MSG(session, EINVAL, - "commit timestamp %s is less than the oldest " - "timestamp %s", + WT_RET_MSG(session, EINVAL, "commit timestamp %s is less than the oldest timestamp %s", __wt_timestamp_to_string(commit_ts, ts_string[0]), __wt_timestamp_to_string(oldest_ts, ts_string[1])); if (has_stable_ts && commit_ts < stable_ts) - WT_RET_MSG(session, EINVAL, - "commit timestamp %s is less than the stable " - "timestamp %s", + WT_RET_MSG(session, EINVAL, "commit timestamp %s is less than the stable timestamp %s", __wt_timestamp_to_string(commit_ts, ts_string[0]), __wt_timestamp_to_string(stable_ts, ts_string[1])); @@ -623,8 +613,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts */ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && commit_ts < txn->first_commit_timestamp) WT_RET_MSG(session, EINVAL, - "commit timestamp %s older than the first " - "commit timestamp %s for this transaction", + "commit timestamp %s older than the first commit timestamp %s for this transaction", __wt_timestamp_to_string(commit_ts, ts_string[0]), __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[1])); @@ -641,8 +630,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts if (txn->prepare_timestamp > commit_ts) { if (!F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED)) WT_RET_MSG(session, EINVAL, - "commit timestamp %s is less than the " - "prepare timestamp %s for this transaction", + "commit timestamp %s is less than the prepare timestamp %s for this transaction", __wt_timestamp_to_string(commit_ts, ts_string[0]), __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[1])); commit_ts = txn->prepare_timestamp; @@ -691,8 +679,7 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ if (!F_ISSET(txn, WT_TXN_PREPARE)) WT_RET_MSG(session, EINVAL, - "durable timestamp should not be specified for " - "non-prepared transaction"); + "durable timestamp should not be specified for non-prepared transaction"); if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) WT_RET_MSG(session, EINVAL, "commit timestamp is needed before the durable timestamp"); @@ -725,8 +712,7 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ /* Check if the durable timestamp is less than the commit timestamp. */ if (durable_ts < txn->commit_timestamp) WT_RET_MSG(session, EINVAL, - "durable timestamp %s is less than the commit timestamp %s " - "for this transaction", + "durable timestamp %s is less than the commit timestamp %s for this transaction", __wt_timestamp_to_string(durable_ts, ts_string[0]), __wt_timestamp_to_string(txn->commit_timestamp, ts_string[1])); @@ -760,8 +746,7 @@ __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) WT_RET_MSG(session, EINVAL, - "commit timestamp " - "should not have been set before the prepare timestamp"); + "commit timestamp should not have been set before the prepare timestamp"); WT_RET(__txn_assert_after_reads(session, "prepare", prepare_ts, &prev_shared)); @@ -781,16 +766,14 @@ __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_ WT_ASSERT(session, prev_shared == NULL); __wt_verbose(session, WT_VERB_TIMESTAMP, - "prepare timestamp %s rounded to oldest " - "timestamp %s", + "prepare timestamp %s rounded to oldest timestamp %s", __wt_timestamp_to_string(prepare_ts, ts_string[0]), __wt_timestamp_to_string(oldest_ts, ts_string[1])); prepare_ts = oldest_ts; } else WT_RET_MSG(session, EINVAL, - "prepare timestamp %s is older than the oldest " - "timestamp %s", + "prepare timestamp %s is older than the oldest timestamp %s", __wt_timestamp_to_string(prepare_ts, ts_string[0]), __wt_timestamp_to_string(oldest_ts, ts_string[1])); } @@ -825,15 +808,11 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts) txn->isolation = WT_ISO_SNAPSHOT; else if (txn->isolation != WT_ISO_SNAPSHOT) WT_RET_MSG(session, EINVAL, - "setting a read_timestamp" - " requires a transaction running at snapshot" - " isolation"); + "setting a read_timestamp requires a transaction running at snapshot isolation"); /* Read timestamps can't change once set. */ if (F_ISSET(txn, WT_TXN_SHARED_TS_READ)) - WT_RET_MSG(session, EINVAL, - "a read_timestamp" - " may only be set once per transaction"); + WT_RET_MSG(session, EINVAL, "a read_timestamp may only be set once per transaction"); /* * This code is not using the timestamp validate function to avoid a race between checking and @@ -860,9 +839,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts) * error message because that logs a MongoDB error, use an informational message to * provide the context instead. */ - WT_RET(__wt_msg(session, - "read timestamp " - "%s less than the oldest timestamp %s", + WT_RET(__wt_msg(session, "read timestamp %s less than the oldest timestamp %s", __wt_timestamp_to_string(read_ts, ts_string[0]), __wt_timestamp_to_string(ts_oldest, ts_string[1]))); return (EINVAL); @@ -878,8 +855,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts) */ if (did_roundup_to_oldest) __wt_verbose(session, WT_VERB_TIMESTAMP, - "read " - "timestamp %s : rounded to oldest timestamp %s", + "read timestamp %s : rounded to oldest timestamp %s", __wt_timestamp_to_string(read_ts, ts_string[0]), __wt_timestamp_to_string(ts_oldest, ts_string[1])); |