diff options
author | Louis Williams <louis.williams@mongodb.com> | 2019-06-11 13:23:46 -0400 |
---|---|---|
committer | Louis Williams <louis.williams@mongodb.com> | 2019-06-11 13:23:46 -0400 |
commit | c23da6ac95bf5f687b6d21f2cc86f4775be88cd5 (patch) | |
tree | 01ddfa412a5cddf83a46eae66d387370fe4053b6 /src | |
parent | 01e68d5ac26a64b5b2d3a836428c07e3874488bf (diff) | |
download | mongo-c23da6ac95bf5f687b6d21f2cc86f4775be88cd5.tar.gz |
SERVER-41525 Use ignore_prepare=force for transactions that correctly do writes while ignoring prepare conflicts
Diffstat (limited to 'src')
21 files changed, 187 insertions, 103 deletions
diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp index 5ae98cbddf8..01c7ccdce92 100644 --- a/src/mongo/db/commands/create_indexes.cpp +++ b/src/mongo/db/commands/create_indexes.cpp @@ -440,9 +440,10 @@ bool runCreateIndexes(OperationContext* opCtx, boost::optional<Lock::CollectionLock> exclusiveCollectionLock( boost::in_place_init, opCtx, ns, MODE_X); - // Index builds can safely ignore prepare conflicts. On primaries, an exclusive lock in the - // final drain phase conflicts with prepared transactions. - opCtx->recoveryUnit()->setIgnorePrepared(true); + // Index builds can safely ignore prepare conflicts and perform writes. On primaries, an + // exclusive lock in the final drain phase conflicts with prepared transactions. + opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kIgnoreConflictsAllowWrites); auto collection = getOrCreateCollection(opCtx, db, ns, cmdObj, &errmsg, &result); diff --git a/src/mongo/db/commands/dbhash.cpp b/src/mongo/db/commands/dbhash.cpp index 7e861b43abb..7dc15854506 100644 --- a/src/mongo/db/commands/dbhash.cpp +++ b/src/mongo/db/commands/dbhash.cpp @@ -184,8 +184,9 @@ public: targetClusterTime); // The $_internalReadAtClusterTime option also causes any storage-layer cursors created - // during plan execution to block on prepared transactions. - opCtx->recoveryUnit()->setIgnorePrepared(false); + // during plan execution to block on prepared transactions. Since the dbhash command + // ignores prepare conflicts by default, change the behavior. + opCtx->recoveryUnit()->setPrepareConflictBehavior(PrepareConflictBehavior::kEnforce); } // We lock the entire database in S-mode in order to ensure that the contents will not diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index a3d0b8ccff1..47e08334bf9 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -358,8 +358,10 @@ public: targetClusterTime); // The $_internalReadAtClusterTime option also causes any storage-layer cursors - // created during plan execution to block on prepared transactions. - opCtx->recoveryUnit()->setIgnorePrepared(false); + // created during plan execution to block on prepared transactions. Since the find + // command ignores prepare conflicts by default, change the behavior. + opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kEnforce); } // Acquire locks. If the query is on a view, we release our locks and convert the query diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 3e0fadf592e..83d2c3c5dcc 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -373,8 +373,10 @@ public: RecoveryUnit::ReadSource::kProvided, clusterTime); // The $_internalReadAtClusterTime option also causes any storage-layer cursors - // created during plan execution to block on prepared transactions. - opCtx->recoveryUnit()->setIgnorePrepared(false); + // created during plan execution to block on prepared transactions. Since the + // getMore command ignores prepare conflicts by default, change the behavior. + opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kEnforce); } } if (cursorPin->lockPolicy() == ClientCursorParams::LockPolicy::kLocksInternally) { diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index b866688ce21..c5c5f3a8ef6 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -420,8 +420,8 @@ void _adjustChangeStreamReadConcern(OperationContext* opCtx) { } // Wait for read concern again since we changed the original read concern. - uassertStatusOK( - waitForReadConcern(opCtx, readConcernArgs, true, PrepareConflictBehavior::kIgnore)); + uassertStatusOK(waitForReadConcern( + opCtx, readConcernArgs, true, PrepareConflictBehavior::kIgnoreConflicts)); } /** diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h index c8ff0b6bef1..f5497c6f3a1 100644 --- a/src/mongo/db/db_raii.h +++ b/src/mongo/db/db_raii.h @@ -230,10 +230,11 @@ LockMode getLockModeForQuery(OperationContext* opCtx, const boost::optional<Name class EnforcePrepareConflictsBlock { public: explicit EnforcePrepareConflictsBlock(OperationContext* opCtx) - : _opCtx(opCtx), _originalValue(opCtx->recoveryUnit()->getIgnorePrepared()) { - // It is illegal to call setIgnorePrepared() while any storage transaction is active. - // setIgnorePrepared() invariants that there is no active storage transaction. - _opCtx->recoveryUnit()->setIgnorePrepared(false); + : _opCtx(opCtx), _originalValue(opCtx->recoveryUnit()->getPrepareConflictBehavior()) { + // It is illegal to call setPrepareConflictBehavior() while any storage transaction is + // active. setPrepareConflictBehavior() invariants that there is no active storage + // transaction. + _opCtx->recoveryUnit()->setPrepareConflictBehavior(PrepareConflictBehavior::kEnforce); } ~EnforcePrepareConflictsBlock() { @@ -246,16 +247,16 @@ public: if (_opCtx->lockState()->isLocked()) { _opCtx->recoveryUnit()->abandonSnapshot(); } - // It is illegal to call setIgnorePrepared() while any storage transaction is active. There - // should not be any active transaction if we are not holding locks. If locks are still - // being held, the above abandonSnapshot() call should have already closed all storage - // transactions. - _opCtx->recoveryUnit()->setIgnorePrepared(_originalValue); + // It is illegal to call setPrepareConflictBehavior() while any storage transaction is + // active. There should not be any active transaction if we are not holding locks. If locks + // are still being held, the above abandonSnapshot() call should have already closed all + // storage transactions. + _opCtx->recoveryUnit()->setPrepareConflictBehavior(_originalValue); } private: OperationContext* _opCtx; - bool _originalValue; + PrepareConflictBehavior _originalValue; }; } // namespace mongo diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 51d67a2dbbf..6991f274277 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -853,10 +853,11 @@ void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx, invariant(_indexBuildsManager.isBackgroundBuilding(replState->buildUUID) || storageGlobalParams.engine == "mobile"); - // Index builds can safely ignore prepare conflicts. On secondaries, prepare operations wait for - // index builds to complete. + // Index builds can safely ignore prepare conflicts and perform writes. On secondaries, prepare + // operations wait for index builds to complete. opCtx->recoveryUnit()->abandonSnapshot(); - opCtx->recoveryUnit()->setIgnorePrepared(true); + opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kIgnoreConflictsAllowWrites); // Collection scan and insert into index, followed by a drain of writes received in the // background. diff --git a/src/mongo/db/read_concern.h b/src/mongo/db/read_concern.h index 8e8ca0b3575..7bd7594e143 100644 --- a/src/mongo/db/read_concern.h +++ b/src/mongo/db/read_concern.h @@ -38,19 +38,12 @@ class OperationContext; class Status; template <typename T> class StatusWith; +enum class PrepareConflictBehavior; namespace repl { class ReadConcernArgs; class SpeculativeMajorityReadInfo; } -enum class PrepareConflictBehavior { - /* When prepare conflicts are encountered, block until the conflict is resolved. */ - kEnforce, - /* Ignore prepare conflicts when they are encountered. This should only be enabled for - * operations than only perform reads. */ - kIgnore -}; - /** * Given the specified read concern arguments, performs checks that the read concern can actually be * satisfied given the current state of the server and if so calls into the replication subsystem to diff --git a/src/mongo/db/read_concern_mongod.cpp b/src/mongo/db/read_concern_mongod.cpp index e362702fb2e..7844f28ebea 100644 --- a/src/mongo/db/read_concern_mongod.cpp +++ b/src/mongo/db/read_concern_mongod.cpp @@ -43,6 +43,7 @@ #include "mongo/db/repl/speculative_majority_read_info.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/server_options.h" +#include "mongo/db/storage/recovery_unit.h" #include "mongo/s/grid.h" #include "mongo/util/concurrency/notification.h" #include "mongo/util/log.h" @@ -205,25 +206,27 @@ Status makeNoopWriteIfNeeded(OperationContext* opCtx, LogicalTime clusterTime) { } /** - * Returns whether the command should ignore prepare conflicts or not. + * Returns the PrepareConflictBehavior that a command should use given the requested behavior and + * readConcern options. */ -bool shouldIgnorePrepared(PrepareConflictBehavior prepareConflictBehavior, - repl::ReadConcernLevel readConcernLevel, - boost::optional<LogicalTime> afterClusterTime, - boost::optional<LogicalTime> atClusterTime) { +PrepareConflictBehavior getPrepareBehaviorForReadConcern( + PrepareConflictBehavior requestedBehavior, + repl::ReadConcernLevel readConcernLevel, + boost::optional<LogicalTime> afterClusterTime, + boost::optional<LogicalTime> atClusterTime) { // Only these read concern levels are eligible for ignoring prepare conflicts. if (readConcernLevel != repl::ReadConcernLevel::kLocalReadConcern && readConcernLevel != repl::ReadConcernLevel::kAvailableReadConcern && readConcernLevel != repl::ReadConcernLevel::kMajorityReadConcern) { - return false; + return PrepareConflictBehavior::kEnforce; } if (afterClusterTime || atClusterTime) { - return false; + return PrepareConflictBehavior::kEnforce; } - return prepareConflictBehavior == PrepareConflictBehavior::kIgnore; + return requestedBehavior; } } // namespace @@ -367,8 +370,9 @@ MONGO_REGISTER_SHIM(waitForReadConcern) // DBDirectClient should inherit whether or not to ignore prepare conflicts from its parent. if (!opCtx->getClient()->isInDirectClient()) { // Set whether this command should ignore prepare conflicts or not. - opCtx->recoveryUnit()->setIgnorePrepared(shouldIgnorePrepared( - prepareConflictBehavior, readConcernArgs.getLevel(), afterClusterTime, atClusterTime)); + const auto behavior = getPrepareBehaviorForReadConcern( + prepareConflictBehavior, readConcernArgs.getLevel(), afterClusterTime, atClusterTime); + opCtx->recoveryUnit()->setPrepareConflictBehavior(behavior); } return Status::OK(); @@ -394,6 +398,16 @@ MONGO_REGISTER_SHIM(waitForLinearizableReadConcern) "No longer primary when waiting for linearizable read concern"}; } + // With linearizable readConcern, read commands may write to the oplog, which is an + // exception to the rule that writes are not allowed while ignoring prepare conflicts. If we + // are ignoring prepare conflicts (during a read command), force the prepare conflict + // behavior to permit writes. + auto originalBehavior = opCtx->recoveryUnit()->getPrepareConflictBehavior(); + if (originalBehavior == PrepareConflictBehavior::kIgnoreConflicts) { + opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kIgnoreConflictsAllowWrites); + } + writeConflictRetry( opCtx, "waitForLinearizableReadConcern", diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp index ab04138036f..09c60294c18 100644 --- a/src/mongo/db/service_entry_point_mongod.cpp +++ b/src/mongo/db/service_entry_point_mongod.cpp @@ -68,7 +68,7 @@ public: const CommandInvocation* invocation, const OpMsgRequest& request) const override { const auto prepareConflictBehavior = invocation->canIgnorePrepareConflicts() - ? PrepareConflictBehavior::kIgnore + ? PrepareConflictBehavior::kIgnoreConflicts : PrepareConflictBehavior::kEnforce; Status rcStatus = mongo::waitForReadConcern(opCtx, diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index bbdac76d109..33c202aeb12 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -44,6 +44,37 @@ class BSONObjBuilder; class OperationContext; /** + * The PrepareConflictBehavior specifies how operations should behave when encountering prepare + * conflicts. + */ +enum class PrepareConflictBehavior { + /** + * When prepare conflicts are encountered, block until the conflict is resolved. + */ + kEnforce, + + /** + * Ignore prepare conflicts when they are encountered. + * + * When a prepared update is encountered, the previous version of a record will be returned. + * This behavior can result in reading different versions of a record within the same snapshot + * if the prepared update is committed during that snapshot. For this reason, operations that + * ignore prepared updates may only perform reads. This is to prevent updating a record based on + * an older version of itself, because a write conflict will not be generated in this scenario. + */ + kIgnoreConflicts, + + /** + * Ignore prepare conflicts when they are encountered, and allow operations to perform writes, + * an exception to the rule of kIgnoreConflicts. + * + * This should only be used in cases where this is known to be impossible to perform writes + * based on other prepared updates. + */ + kIgnoreConflictsAllowWrites +}; + +/** * Storage statistics management class, with interfaces to provide the statistics in the BSON format * and an operator to add the statistics values. */ @@ -125,18 +156,17 @@ public: } /** - * Sets whether or not to ignore prepared transactions if supported by this storage engine. When - * 'ignore' is true, allows reading data from before prepared transactions, but will not show - * prepared data. This may not be called while a transaction is already open. + * Sets the behavior of handling conflicts that are encountered due to prepared transactions, if + * supported by this storage engine. See PrepareConflictBehavior. */ - virtual void setIgnorePrepared(bool ignore) {} + virtual void setPrepareConflictBehavior(PrepareConflictBehavior behavior) {} /** - * Returns whether or not we are ignoring prepared conflicts. Defaults to false if prepared - * transactions are not supported by this storage engine. + * Returns the behavior of handling conflicts that are encountered due to prepared transactions. + * Defaults to kEnforce if prepared transactions are not supported by this storage engine. */ - virtual bool getIgnorePrepared() const { - return false; + virtual PrepareConflictBehavior getPrepareConflictBehavior() const { + return PrepareConflictBehavior::kEnforce; } /** diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.cpp index 2e51a57db4c..dd3396770bf 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.cpp @@ -35,6 +35,7 @@ #include <fmt/format.h> #include "mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h" + #include "mongo/db/storage/wiredtiger/wiredtiger_util.h" #include "mongo/util/errno_util.h" #include "mongo/util/log.h" @@ -44,15 +45,17 @@ using namespace fmt::literals; WiredTigerBeginTxnBlock::WiredTigerBeginTxnBlock( WT_SESSION* session, - IgnorePrepared ignorePrepare, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps, RoundUpReadTimestamp roundUpReadTimestamp) : _session(session) { invariant(!_rollback); str::stream builder; - if (ignorePrepare == IgnorePrepared::kIgnore) { + if (prepareConflictBehavior == PrepareConflictBehavior::kIgnoreConflicts) { builder << "ignore_prepare=true,"; + } else if (prepareConflictBehavior == PrepareConflictBehavior::kIgnoreConflictsAllowWrites) { + builder << "ignore_prepare=force,"; } if (roundUpPreparedTimestamps == RoundUpPreparedTimestamps::kRound || roundUpReadTimestamp == RoundUpReadTimestamp::kRound) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h index 89fac89b1b0..0ff0ff10897 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h @@ -35,6 +35,7 @@ #include "mongo/base/status.h" #include "mongo/bson/timestamp.h" +#include "mongo/db/storage/recovery_unit.h" namespace mongo { @@ -44,13 +45,6 @@ namespace mongo { */ class WiredTigerBeginTxnBlock { public: - // Whether or not to ignore prepared transactions. - enum class IgnorePrepared { - kNoIgnore, // Enforce prepare conflicts when encountering updates from prepared - // transactions. - kIgnore // Ignore prepare conflicts, but don't show prepared data. - }; - // Whether or not to round up to the oldest timestamp when the read timestamp is behind it. enum class RoundUpReadTimestamp { kNoRound, // Do not round to the oldest timestamp. BadValue error may be returned. @@ -66,7 +60,7 @@ public: WiredTigerBeginTxnBlock( WT_SESSION* session, - IgnorePrepared ignorePrepared, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps, RoundUpReadTimestamp roundUpReadTimestamp = RoundUpReadTimestamp::kNoRound); WiredTigerBeginTxnBlock(WT_SESSION* session, const char* config); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block_bm.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block_bm.cpp index 23504054675..2a33a5ca8f4 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block_bm.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block_bm.cpp @@ -124,11 +124,11 @@ void BM_WiredTigerBeginTxnBlock(benchmark::State& state) { using mongo::WiredTigerBeginTxnBlock; -template <IgnorePrepared ignore, RoundUpPreparedTimestamps round> +template <PrepareConflictBehavior behavior, RoundUpPreparedTimestamps round> void BM_WiredTigerBeginTxnBlockWithArgs(benchmark::State& state) { WiredTigerTestHelper helper; for (auto _ : state) { - WiredTigerBeginTxnBlock beginTxn(helper.wtSession(), ignore, round); + WiredTigerBeginTxnBlock beginTxn(helper.wtSession(), behavior, round); } } @@ -143,16 +143,22 @@ void BM_setTimestamp(benchmark::State& state) { BENCHMARK(BM_WiredTigerBeginTxnBlock); BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, - IgnorePrepared::kNoIgnore, + PrepareConflictBehavior::kEnforce, RoundUpPreparedTimestamps::kNoRound); BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, - IgnorePrepared::kNoIgnore, + PrepareConflictBehavior::kEnforce, RoundUpPreparedTimestamps::kRound); BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, - IgnorePrepared::kIgnore, + PrepareConflictBehavior::kIgnoreConflicts, RoundUpPreparedTimestamps::kNoRound); BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, - IgnorePrepared::kIgnore, + PrepareConflictBehavior::kIgnoreConflicts, + RoundUpPreparedTimestamps::kRound); +BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, + PrepareConflictBehavior::kIgnoreConflictsAllowWrites, + RoundUpPreparedTimestamps::kNoRound); +BENCHMARK_TEMPLATE(BM_WiredTigerBeginTxnBlockWithArgs, + PrepareConflictBehavior::kIgnoreConflictsAllowWrites, RoundUpPreparedTimestamps::kRound); BENCHMARK(BM_setTimestamp); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index 9d681eb45f3..0bfe56fa5d9 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -524,7 +524,8 @@ void WiredTigerRecoveryUnit::_txnOpen() { if (_isOplogReader) { _oplogVisibleTs = static_cast<std::int64_t>(_oplogManager->getOplogReadTimestamp()); } - WiredTigerBeginTxnBlock(session, _ignorePrepared, _roundUpPreparedTimestamps).done(); + WiredTigerBeginTxnBlock(session, _prepareConflictBehavior, _roundUpPreparedTimestamps) + .done(); break; } case ReadSource::kMajorityCommitted: { @@ -532,15 +533,16 @@ void WiredTigerRecoveryUnit::_txnOpen() { // transaction was started. _majorityCommittedSnapshot = _sessionCache->snapshotManager().beginTransactionOnCommittedSnapshot( - session, _ignorePrepared, _roundUpPreparedTimestamps); + session, _prepareConflictBehavior, _roundUpPreparedTimestamps); break; } case ReadSource::kLastApplied: { if (_sessionCache->snapshotManager().getLocalSnapshot()) { _readAtTimestamp = _sessionCache->snapshotManager().beginTransactionOnLocalSnapshot( - session, _ignorePrepared, _roundUpPreparedTimestamps); + session, _prepareConflictBehavior, _roundUpPreparedTimestamps); } else { - WiredTigerBeginTxnBlock(session, _ignorePrepared, _roundUpPreparedTimestamps) + WiredTigerBeginTxnBlock( + session, _prepareConflictBehavior, _roundUpPreparedTimestamps) .done(); } break; @@ -557,7 +559,8 @@ void WiredTigerRecoveryUnit::_txnOpen() { // Intentionally continue to the next case to read at the _readAtTimestamp. } case ReadSource::kProvided: { - WiredTigerBeginTxnBlock txnOpen(session, _ignorePrepared, _roundUpPreparedTimestamps); + WiredTigerBeginTxnBlock txnOpen( + session, _prepareConflictBehavior, _roundUpPreparedTimestamps); auto status = txnOpen.setReadSnapshot(_readAtTimestamp); if (!status.isOK() && status.code() == ErrorCodes::BadValue) { @@ -575,8 +578,10 @@ void WiredTigerRecoveryUnit::_txnOpen() { } Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllCommittedTimestamp(WT_SESSION* session) { - WiredTigerBeginTxnBlock txnOpen( - session, _ignorePrepared, _roundUpPreparedTimestamps, RoundUpReadTimestamp::kRound); + WiredTigerBeginTxnBlock txnOpen(session, + _prepareConflictBehavior, + _roundUpPreparedTimestamps, + RoundUpReadTimestamp::kRound); Timestamp txnTimestamp = Timestamp(_oplogManager->fetchAllCommittedValue(session->connection)); auto status = txnOpen.setReadSnapshot(txnTimestamp); fassert(50948, status); @@ -620,8 +625,10 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI // should read afterward. Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allCommitted) : allCommitted; - WiredTigerBeginTxnBlock txnOpen( - session, _ignorePrepared, _roundUpPreparedTimestamps, RoundUpReadTimestamp::kRound); + WiredTigerBeginTxnBlock txnOpen(session, + _prepareConflictBehavior, + _roundUpPreparedTimestamps, + RoundUpReadTimestamp::kRound); auto status = txnOpen.setReadSnapshot(readTimestamp); fassert(51066, status); @@ -751,21 +758,20 @@ Timestamp WiredTigerRecoveryUnit::getPrepareTimestamp() const { return _prepareTimestamp; } -void WiredTigerRecoveryUnit::setIgnorePrepared(bool value) { - auto newValue = (value) ? IgnorePrepared::kIgnore : IgnorePrepared::kNoIgnore; - +void WiredTigerRecoveryUnit::setPrepareConflictBehavior(PrepareConflictBehavior behavior) { // If there is an open storage transaction, it is not valid to try to change the behavior of // ignoring prepare conflicts, since that behavior is applied when the transaction is opened. - invariant(!_isActive(), - str::stream() << "Current state: " << toString(_state) - << ". Invalid internal state while setting ignore_prepare to: " - << value); + invariant( + !_isActive(), + str::stream() << "Current state: " << toString(_state) + << ". Invalid internal state while setting prepare conflict behavior to: " + << static_cast<int>(behavior)); - _ignorePrepared = newValue; + _prepareConflictBehavior = behavior; } -bool WiredTigerRecoveryUnit::getIgnorePrepared() const { - return _ignorePrepared == IgnorePrepared::kIgnore; +PrepareConflictBehavior WiredTigerRecoveryUnit::getPrepareConflictBehavior() const { + return _prepareConflictBehavior; } void WiredTigerRecoveryUnit::setRoundUpPreparedTimestamps(bool value) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index aecbd30f69e..f3e6b31162b 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -48,7 +48,6 @@ namespace mongo { -using IgnorePrepared = WiredTigerBeginTxnBlock::IgnorePrepared; using RoundUpPreparedTimestamps = WiredTigerBeginTxnBlock::RoundUpPreparedTimestamps; using RoundUpReadTimestamp = WiredTigerBeginTxnBlock::RoundUpReadTimestamp; @@ -139,9 +138,9 @@ public: Timestamp getPrepareTimestamp() const override; - void setIgnorePrepared(bool ignore) override; + void setPrepareConflictBehavior(PrepareConflictBehavior behavior) override; - bool getIgnorePrepared() const override; + PrepareConflictBehavior getPrepareConflictBehavior() const override; void setRoundUpPreparedTimestamps(bool value) override; @@ -310,9 +309,8 @@ private: // When 'true', data read from disk should not be kept in the storage engine cache. bool _readOnce = false; - // If set to kIgnore, updates from prepared transactions will not return prepare conflicts and - // will not allow seeing prepared data. - IgnorePrepared _ignorePrepared{IgnorePrepared::kNoIgnore}; + // The behavior of handling prepare conflicts. + PrepareConflictBehavior _prepareConflictBehavior{PrepareConflictBehavior::kEnforce}; // Dictates whether to round up prepare and commit timestamp of a prepared transaction. RoundUpPreparedTimestamps _roundUpPreparedTimestamps{RoundUpPreparedTimestamps::kNoRound}; Timestamp _commitTimestamp; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp index 6d4a5468b4b..5769add2cb1 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp @@ -253,7 +253,7 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, // A transaction that chooses to ignore prepare conflicts does not see the record instead of // returning a prepare conflict. ru2->beginUnitOfWork(clientAndCtx2.second.get()); - ru2->setIgnorePrepared(true); + ru2->setPrepareConflictBehavior(PrepareConflictBehavior::kIgnoreConflicts); getCursor(ru2, &cursor); cursor->set_key(cursor, "key"); int ret = cursor->search(cursor); @@ -263,6 +263,38 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, ru2->abortUnitOfWork(); } +TEST_F(WiredTigerRecoveryUnitTestFixture, WriteAllowedWhileIgnorePrepareFalse) { + // Prepare but don't commit a transaction + ru1->beginUnitOfWork(clientAndCtx1.second.get()); + WT_CURSOR* cursor; + getCursor(ru1, &cursor); + cursor->set_key(cursor, "key1"); + cursor->set_value(cursor, "value1"); + invariantWTOK(cursor->insert(cursor)); + ru1->setPrepareTimestamp({1, 1}); + ru1->prepareUnitOfWork(); + + // A transaction that chooses to ignore prepare conflicts with kIgnoreConflictsAllowWrites does + // not see the record + ru2->beginUnitOfWork(clientAndCtx2.second.get()); + ru2->setPrepareConflictBehavior(PrepareConflictBehavior::kIgnoreConflictsAllowWrites); + + // The prepared write is not visible. + getCursor(ru2, &cursor); + cursor->set_key(cursor, "key1"); + ASSERT_EQ(WT_NOTFOUND, cursor->search(cursor)); + + getCursor(ru2, &cursor); + cursor->set_key(cursor, "key2"); + cursor->set_value(cursor, "value2"); + + // The write is allowed. + invariantWTOK(cursor->insert(cursor)); + + ru1->abortUnitOfWork(); + ru2->abortUnitOfWork(); +} + TEST_F(WiredTigerRecoveryUnitTestFixture, WriteOnADocumentBeingPreparedTriggersWTRollback) { // Prepare but don't commit a transaction ru1->beginUnitOfWork(clientAndCtx1.second.get()); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp index ad653b84095..32299a71648 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp @@ -74,9 +74,9 @@ boost::optional<Timestamp> WiredTigerSnapshotManager::getMinSnapshotForNextCommi Timestamp WiredTigerSnapshotManager::beginTransactionOnCommittedSnapshot( WT_SESSION* session, - IgnorePrepared ignorePrepared, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps) const { - WiredTigerBeginTxnBlock txnOpen(session, ignorePrepared, roundUpPreparedTimestamps); + WiredTigerBeginTxnBlock txnOpen(session, prepareConflictBehavior, roundUpPreparedTimestamps); stdx::lock_guard<stdx::mutex> lock(_committedSnapshotMutex); uassert(ErrorCodes::ReadConcernMajorityNotAvailableYet, @@ -92,9 +92,9 @@ Timestamp WiredTigerSnapshotManager::beginTransactionOnCommittedSnapshot( Timestamp WiredTigerSnapshotManager::beginTransactionOnLocalSnapshot( WT_SESSION* session, - IgnorePrepared ignorePrepared, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps) const { - WiredTigerBeginTxnBlock txnOpen(session, ignorePrepared, roundUpPreparedTimestamps); + WiredTigerBeginTxnBlock txnOpen(session, prepareConflictBehavior, roundUpPreparedTimestamps); stdx::lock_guard<stdx::mutex> lock(_localSnapshotMutex); invariant(_localSnapshot); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h index c75a3085df6..c5f2fc17651 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h @@ -39,7 +39,6 @@ namespace mongo { -using IgnorePrepared = WiredTigerBeginTxnBlock::IgnorePrepared; using RoundUpPreparedTimestamps = WiredTigerBeginTxnBlock::RoundUpPreparedTimestamps; class WiredTigerOplogManager; @@ -67,7 +66,7 @@ public: */ Timestamp beginTransactionOnCommittedSnapshot( WT_SESSION* session, - IgnorePrepared ignorePrepared, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; /** @@ -77,7 +76,7 @@ public: */ Timestamp beginTransactionOnLocalSnapshot( WT_SESSION* session, - IgnorePrepared ignorePrepared, + PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; /** diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index b232f1c4d59..12571d08513 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -112,12 +112,13 @@ class IgnorePrepareBlock { public: IgnorePrepareBlock(OperationContext* opCtx) : _opCtx(opCtx) { _opCtx->recoveryUnit()->abandonSnapshot(); - _opCtx->recoveryUnit()->setIgnorePrepared(true); + _opCtx->recoveryUnit()->setPrepareConflictBehavior( + PrepareConflictBehavior::kIgnoreConflicts); } ~IgnorePrepareBlock() { _opCtx->recoveryUnit()->abandonSnapshot(); - _opCtx->recoveryUnit()->setIgnorePrepared(false); + _opCtx->recoveryUnit()->setPrepareConflictBehavior(PrepareConflictBehavior::kEnforce); } private: diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp index dbf8d03ef4a..d8d3f55c236 100644 --- a/src/mongo/embedded/service_entry_point_embedded.cpp +++ b/src/mongo/embedded/service_entry_point_embedded.cpp @@ -51,7 +51,7 @@ public: const CommandInvocation* invocation, const OpMsgRequest& request) const override { const auto prepareConflictBehavior = invocation->canIgnorePrepareConflicts() - ? PrepareConflictBehavior::kIgnore + ? PrepareConflictBehavior::kIgnoreConflicts : PrepareConflictBehavior::kEnforce; auto rcStatus = mongo::waitForReadConcern(opCtx, repl::ReadConcernArgs::get(opCtx), |