summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2019-03-14 12:34:23 -0400
committerLouis Williams <louis.williams@mongodb.com>2019-03-19 17:03:40 -0400
commit197233a97c2a8859b82ba1ffeac97ba2719f6470 (patch)
tree56d7bfb37a6957c27bc7ee13c062086742a59fa9 /src
parent81d045a17ddf08e9f0eb7f30f96b45a352fab5cc (diff)
downloadmongo-197233a97c2a8859b82ba1ffeac97ba2719f6470.tar.gz
SERVER-39074 All operations enforce prepare conflicts by default
Prepare conflicts may only be safely ignored when a command can guarantee it does not perform writes. Prepare conflicts are ignored when the read concern is local, available, or majority and the command is aggregate, count, distinct, find, getMore, or group. Aggregate is a special case because it may perform writes to an output collection, but it enables prepare conflict enforcement before doing so. Additionally, connections from a DBDirectClient inherit the ignore_prepare state from their parent operation.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/dbhash.cpp4
-rw-r--r--src/mongo/db/commands/find_cmd.cpp4
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp4
-rw-r--r--src/mongo/db/commands/run_aggregate.cpp3
-rw-r--r--src/mongo/db/pipeline/document_source_out.cpp1
-rw-r--r--src/mongo/db/pipeline/document_source_out.h17
-rw-r--r--src/mongo/db/pipeline/document_source_out_in_place.h2
-rw-r--r--src/mongo/db/pipeline/document_source_out_replace_coll.cpp6
-rw-r--r--src/mongo/db/read_concern.h7
-rw-r--r--src/mongo/db/read_concern_mongod.cpp50
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp4
-rw-r--r--src/mongo/db/repl/sync_tail.cpp4
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp2
-rw-r--r--src/mongo/db/service_entry_point_mongod.cpp6
-rw-r--r--src/mongo/db/storage/recovery_unit.h12
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp17
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h8
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp8
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp28
-rw-r--r--src/mongo/embedded/read_concern_embedded.cpp5
-rw-r--r--src/mongo/embedded/service_entry_point_embedded.cpp6
22 files changed, 145 insertions, 59 deletions
diff --git a/src/mongo/db/commands/dbhash.cpp b/src/mongo/db/commands/dbhash.cpp
index 6a2241b478c..9c4e0905694 100644
--- a/src/mongo/db/commands/dbhash.cpp
+++ b/src/mongo/db/commands/dbhash.cpp
@@ -168,10 +168,6 @@ public:
// clusterTime, even across yields.
opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
targetClusterTime);
-
- // The $_internalReadAtClusterTime option also causes any storage-layer cursors created
- // during plan execution to block on prepared transactions.
- opCtx->recoveryUnit()->setIgnorePrepared(false);
}
// We lock the entire database in S-mode in order to ensure that the contents will not
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp
index 03f8b605a6f..ddde7ac8972 100644
--- a/src/mongo/db/commands/find_cmd.cpp
+++ b/src/mongo/db/commands/find_cmd.cpp
@@ -325,10 +325,6 @@ public:
// clusterTime, even across yields.
opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
targetClusterTime);
-
- // The $_internalReadAtClusterTime option also causes any storage-layer cursors
- // created during plan execution to block on prepared transactions.
- opCtx->recoveryUnit()->setIgnorePrepared(false);
}
// Acquire locks. If the query is on a view, we release our locks and convert the query
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index dda5bbcc29e..3fbad104b55 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -329,10 +329,6 @@ public:
// the supplied clusterTime, even across yields.
opCtx->recoveryUnit()->setTimestampReadSource(
RecoveryUnit::ReadSource::kProvided, clusterTime);
-
- // The $_internalReadAtClusterTime option also causes any storage-layer cursors
- // created during plan execution to block on prepared transactions.
- opCtx->recoveryUnit()->setIgnorePrepared(false);
}
}
if (cursorPin->lockPolicy() == ClientCursorParams::LockPolicy::kLocksInternally) {
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 8b7d7fd7863..3bd009146ad 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -381,8 +381,9 @@ void _adjustChangeStreamReadConcern(OperationContext* opCtx) {
repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative);
}
}
+
// Wait for read concern again since we changed the original read concern.
- uassertStatusOK(waitForReadConcern(opCtx, readConcernArgs, true));
+ uassertStatusOK(waitForReadConcern(opCtx, readConcernArgs, true, "aggregate"));
}
} // namespace
diff --git a/src/mongo/db/pipeline/document_source_out.cpp b/src/mongo/db/pipeline/document_source_out.cpp
index 5557790a86b..e9b2d15df5e 100644
--- a/src/mongo/db/pipeline/document_source_out.cpp
+++ b/src/mongo/db/pipeline/document_source_out.cpp
@@ -305,7 +305,6 @@ intrusive_ptr<DocumentSourceOut> DocumentSourceOut::create(
// during lite parsing, we need to do it here as well in case mongos is stale or the command is
// sent directly to the shard.
if (mode == WriteModeEnum::kModeReplaceCollection) {
- LocalReadConcernBlock readLocal(expCtx->opCtx);
uassert(17017,
str::stream() << "$out with mode " << WriteMode_serializer(mode)
<< " is not supported to an existing *sharded* output collection.",
diff --git a/src/mongo/db/pipeline/document_source_out.h b/src/mongo/db/pipeline/document_source_out.h
index d4694a681e2..8f313213983 100644
--- a/src/mongo/db/pipeline/document_source_out.h
+++ b/src/mongo/db/pipeline/document_source_out.h
@@ -38,26 +38,31 @@ namespace mongo {
/**
* Manipulates the state of the OperationContext so that while this object is in scope, reads and
- * writes will use a local read concern and see the latest version of the data. Resets the
- * OperationContext back to its original state upon destruction.
+ * writes will use a local read concern and see the latest version of the data. It will also reset
+ * ignore_prepared on the recovery unit so that any reads or writes will block on a conflict with a
+ * prepared transaction. Resets the OperationContext back to its original state upon destruction.
*/
-class LocalReadConcernBlock {
+class OutStageWriteBlock {
OperationContext* _opCtx;
repl::ReadConcernArgs _originalArgs;
RecoveryUnit::ReadSource _originalSource;
+ bool _originalIgnorePrepared;
public:
- LocalReadConcernBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+ OutStageWriteBlock(OperationContext* opCtx) : _opCtx(opCtx) {
_originalArgs = repl::ReadConcernArgs::get(_opCtx);
_originalSource = _opCtx->recoveryUnit()->getTimestampReadSource();
+ _originalIgnorePrepared = _opCtx->recoveryUnit()->getIgnorePrepared();
repl::ReadConcernArgs::get(_opCtx) = repl::ReadConcernArgs();
_opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::kUnset);
+ _opCtx->recoveryUnit()->setIgnorePrepared(false);
}
- ~LocalReadConcernBlock() {
+ ~OutStageWriteBlock() {
repl::ReadConcernArgs::get(_opCtx) = _originalArgs;
_opCtx->recoveryUnit()->setTimestampReadSource(_originalSource);
+ _opCtx->recoveryUnit()->setIgnorePrepared(_originalIgnorePrepared);
}
};
@@ -217,7 +222,7 @@ public:
* Writes the documents in 'batch' to the write namespace.
*/
virtual void spill(BatchedObjects&& batch) {
- LocalReadConcernBlock readLocal(pExpCtx->opCtx);
+ OutStageWriteBlock writeBlock(pExpCtx->opCtx);
pExpCtx->mongoProcessInterface->insert(
pExpCtx, getWriteNs(), std::move(batch.objects), _writeConcern, _targetEpoch());
diff --git a/src/mongo/db/pipeline/document_source_out_in_place.h b/src/mongo/db/pipeline/document_source_out_in_place.h
index 0bcdab19b4e..f791ad99e3d 100644
--- a/src/mongo/db/pipeline/document_source_out_in_place.h
+++ b/src/mongo/db/pipeline/document_source_out_in_place.h
@@ -68,7 +68,7 @@ public:
constexpr auto upsert = true;
constexpr auto multi = false;
try {
- LocalReadConcernBlock readLocal(pExpCtx->opCtx);
+ OutStageWriteBlock writeBlock(pExpCtx->opCtx);
pExpCtx->mongoProcessInterface->update(pExpCtx,
getWriteNs(),
diff --git a/src/mongo/db/pipeline/document_source_out_replace_coll.cpp b/src/mongo/db/pipeline/document_source_out_replace_coll.cpp
index 700c4de6b44..64d75beac37 100644
--- a/src/mongo/db/pipeline/document_source_out_replace_coll.cpp
+++ b/src/mongo/db/pipeline/document_source_out_replace_coll.cpp
@@ -50,7 +50,7 @@ DocumentSourceOutReplaceColl::~DocumentSourceOutReplaceColl() {
// not affect the dropCollection operation below.
auto cleanupOpCtx = cc().makeOperationContext();
- LocalReadConcernBlock readLocal(cleanupOpCtx.get());
+ OutStageWriteBlock writeBlock(cleanupOpCtx.get());
// Reset the operation context back to original once dropCollection is done.
ON_BLOCK_EXIT(
@@ -62,7 +62,7 @@ DocumentSourceOutReplaceColl::~DocumentSourceOutReplaceColl() {
}
void DocumentSourceOutReplaceColl::initializeWriteNs() {
- LocalReadConcernBlock readLocal(pExpCtx->opCtx);
+ OutStageWriteBlock writeBlock(pExpCtx->opCtx);
DBClientBase* conn = pExpCtx->mongoProcessInterface->directClient();
@@ -123,7 +123,7 @@ void DocumentSourceOutReplaceColl::initializeWriteNs() {
};
void DocumentSourceOutReplaceColl::finalize() {
- LocalReadConcernBlock readLocal(pExpCtx->opCtx);
+ OutStageWriteBlock writeBlock(pExpCtx->opCtx);
const auto& outputNs = getOutputNs();
auto renameCommandObj =
diff --git a/src/mongo/db/read_concern.h b/src/mongo/db/read_concern.h
index 2fd42beb71f..5f66a46f26c 100644
--- a/src/mongo/db/read_concern.h
+++ b/src/mongo/db/read_concern.h
@@ -43,16 +43,17 @@ class ReadConcernArgs;
class SpeculativeMajorityReadInfo;
}
-
/**
* Given the specified read concern arguments, performs checks that the read concern can actually be
* satisfied given the current state of the server and if so calls into the replication subsystem to
* perform the wait. If allowAfterClusterTime is false returns an error if afterClusterTime is
- * set on the readConcernArgs.
+ * set on the readConcernArgs. Both cmdName and readConcernArgs are used to determine whether or not
+ * prepare conflicts can be ignored.
*/
extern MONGO_DECLARE_SHIM((OperationContext * opCtx,
const repl::ReadConcernArgs& readConcernArgs,
- bool allowAfterClusterTime)
+ bool allowAfterClusterTime,
+ StringData cmdName)
->Status) waitForReadConcern;
/*
diff --git a/src/mongo/db/read_concern_mongod.cpp b/src/mongo/db/read_concern_mongod.cpp
index c961956ec48..97a93de9788 100644
--- a/src/mongo/db/read_concern_mongod.cpp
+++ b/src/mongo/db/read_concern_mongod.cpp
@@ -197,10 +197,45 @@ Status makeNoopWriteIfNeeded(OperationContext* opCtx, LogicalTime clusterTime) {
}
return Status::OK();
}
+
+// These commands are known to only perform reads, and therefore may be able to safely ignore
+// prepare conflicts. The exception is aggregate, which may do writes to an output collection, but
+// it enables enforcement of prepare conflicts before performing writes.
+static const stdx::unordered_set<std::string> ignorePrepareCommandWhitelist = {
+ "aggregate", "count", "distinct", "find", "getMore", "group"};
+
+/**
+ * Returns whether the command should ignore prepare conflicts or not.
+ */
+bool shouldIgnorePrepared(StringData cmdName,
+ repl::ReadConcernLevel readConcernLevel,
+ boost::optional<LogicalTime> afterClusterTime,
+ boost::optional<LogicalTime> atClusterTime) {
+
+ // Only these read concern levels are eligible for ignoring prepare conflicts.
+ if (readConcernLevel != repl::ReadConcernLevel::kLocalReadConcern &&
+ readConcernLevel != repl::ReadConcernLevel::kAvailableReadConcern &&
+ readConcernLevel != repl::ReadConcernLevel::kMajorityReadConcern) {
+ return false;
+ }
+
+ if (afterClusterTime || atClusterTime) {
+ return false;
+ }
+
+ if (ignorePrepareCommandWhitelist.count(cmdName.toString())) {
+ return true;
+ }
+
+ return false;
+}
} // namespace
MONGO_REGISTER_SHIM(waitForReadConcern)
-(OperationContext* opCtx, const repl::ReadConcernArgs& readConcernArgs, bool allowAfterClusterTime)
+(OperationContext* opCtx,
+ const repl::ReadConcernArgs& readConcernArgs,
+ bool allowAfterClusterTime,
+ StringData cmdName)
->Status {
// If we are in a direct client within a transaction, then we may be holding locks, so it is
// illegal to wait for read concern. This is fine, since the outer operation should have handled
@@ -208,7 +243,6 @@ MONGO_REGISTER_SHIM(waitForReadConcern)
// should block on prepared transactions.
if (opCtx->getClient()->isInDirectClient() &&
readConcernArgs.getLevel() == repl::ReadConcernLevel::kSnapshotReadConcern) {
- opCtx->recoveryUnit()->setIgnorePrepared(false);
return Status::OK();
}
@@ -334,13 +368,11 @@ MONGO_REGISTER_SHIM(waitForReadConcern)
<< " with readTs: " << opCtx->recoveryUnit()->getPointInTimeReadTimestamp();
}
- // Only snapshot, linearizable and afterClusterTime reads should block on prepared transactions.
- if (readConcernArgs.getLevel() != repl::ReadConcernLevel::kSnapshotReadConcern &&
- readConcernArgs.getLevel() != repl::ReadConcernLevel::kLinearizableReadConcern &&
- !afterClusterTime && !atClusterTime) {
- opCtx->recoveryUnit()->setIgnorePrepared(true);
- } else {
- opCtx->recoveryUnit()->setIgnorePrepared(false);
+ // DBDirectClient should inherit whether or not to ignore prepare conflicts from its parent.
+ if (!opCtx->getClient()->isInDirectClient()) {
+ // Set whether this command should ignore prepare conflicts or not.
+ opCtx->recoveryUnit()->setIgnorePrepared(shouldIgnorePrepared(
+ cmdName, readConcernArgs.getLevel(), afterClusterTime, atClusterTime));
}
return Status::OK();
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index 0b8f5186de7..2f797d7be53 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -310,6 +310,10 @@ void ReplicationRecoveryImpl::_reconstructPreparedTransactions(OperationContext*
// Snapshot transaction can never conflict with the PBWM lock.
newOpCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false);
+ // TODO: SERVER-40177 This should be removed once it is guaranteed operations applied on
+ // recovering nodes cannot encounter unnecessary prepare conflicts.
+ newOpCtx->recoveryUnit()->setIgnorePrepared(true);
+
// Checks out the session, applies the operations and prepares the transactions.
uassertStatusOK(applyRecoveredPrepareTransaction(newOpCtx.get(), prepareOplogEntry));
}
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index b528223341d..20632cf225d 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -1108,6 +1108,10 @@ Status multiSyncApply(OperationContext* opCtx,
// Explicitly start future read transactions without a timestamp.
opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ // TODO: SERVER-40177 This should be removed once it is guaranteed operations applied on
+ // secondaries cannot encounter unnecessary prepare conflicts.
+ opCtx->recoveryUnit()->setIgnorePrepared(true);
+
ApplierHelpers::stableSortByNamespace(ops);
// Assume we are recovering if oplog writes are disabled in the options.
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index a084c80f662..a4138943f9e 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -279,7 +279,7 @@ Status MigrationSourceManager::startClone(OperationContext* opCtx) {
auto const readConcernArgs = repl::ReadConcernArgs(
replCoord->getMyLastAppliedOpTime(), repl::ReadConcernLevel::kLocalReadConcern);
- uassertStatusOK(waitForReadConcern(opCtx, readConcernArgs, false));
+ uassertStatusOK(waitForReadConcern(opCtx, readConcernArgs, false, "moveChunk"));
}
Status startCloneStatus = _cloneDriver->startClone(opCtx);
diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp
index 0e870bf80a6..3d7a8097beb 100644
--- a/src/mongo/db/service_entry_point_mongod.cpp
+++ b/src/mongo/db/service_entry_point_mongod.cpp
@@ -67,8 +67,10 @@ public:
void waitForReadConcern(OperationContext* opCtx,
const CommandInvocation* invocation,
const OpMsgRequest& request) const override {
- Status rcStatus = mongo::waitForReadConcern(
- opCtx, repl::ReadConcernArgs::get(opCtx), invocation->allowsAfterClusterTime());
+ Status rcStatus = mongo::waitForReadConcern(opCtx,
+ repl::ReadConcernArgs::get(opCtx),
+ invocation->allowsAfterClusterTime(),
+ request.getCommandName());
if (!rcStatus.isOK()) {
if (ErrorCodes::isExceededTimeLimitError(rcStatus.code())) {
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index 0bd3d6c080e..ce62e0ba29a 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -124,14 +124,22 @@ public:
"This storage engine does not support prepared transactions");
}
-
/**
* Sets whether or not to ignore prepared transactions if supported by this storage engine. When
- * 'ignore' is true, allows reading data in prepared, but uncommitted transactions.
+ * 'ignore' is true, allows reading data from before prepared transactions, but will not show
+ * prepared data. This may not be called while a transaction is already open.
*/
virtual void setIgnorePrepared(bool ignore) {}
/**
+ * Returns whether or not we are ignoring prepared conflicts. Defaults to false if prepared
+ * transactions are not supported by this storage engine.
+ */
+ virtual bool getIgnorePrepared() const {
+ return false;
+ }
+
+ /**
* Waits until all commits that happened before this call are durable in the journal. Returns
* true, unless the storage engine cannot guarantee durability, which should never happen when
* isDurable() returned true. This cannot be called from inside a unit of work, and should
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h
index 5a8ba96e08a..f027b9ba693 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_begin_transaction_block.h
@@ -46,7 +46,8 @@ class WiredTigerBeginTxnBlock {
public:
// Whether or not to ignore prepared transactions.
enum class IgnorePrepared {
- kNoIgnore, // Do not ignore prepared transactions and return prepare conflicts.
+ kNoIgnore, // Enforce prepare conflicts when encountering updates from prepared
+ // transactions.
kIgnore // Ignore prepare conflicts, but don't show prepared data.
};
@@ -56,8 +57,7 @@ public:
kRound // Round the read timestamp up to the oldest timestamp when it is behind.
};
- WiredTigerBeginTxnBlock(WT_SESSION* session,
- IgnorePrepared ignorePrepared = IgnorePrepared::kIgnore);
+ WiredTigerBeginTxnBlock(WT_SESSION* session, IgnorePrepared ignorePrepared);
WiredTigerBeginTxnBlock(WT_SESSION* session, const char* config);
~WiredTigerBeginTxnBlock();
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index e232d1b22ca..9548b660af1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -750,8 +750,21 @@ Timestamp WiredTigerRecoveryUnit::getPrepareTimestamp() const {
}
void WiredTigerRecoveryUnit::setIgnorePrepared(bool value) {
- _ignorePrepared = (value) ? WiredTigerBeginTxnBlock::IgnorePrepared::kIgnore
- : WiredTigerBeginTxnBlock::IgnorePrepared::kNoIgnore;
+ auto newValue = (value) ? WiredTigerBeginTxnBlock::IgnorePrepared::kIgnore
+ : WiredTigerBeginTxnBlock::IgnorePrepared::kNoIgnore;
+
+ // If there is an open storage transaction, it is not valid to try to change the behavior of
+ // ignoring prepare conflicts, since that behavior is applied when the transaction is opened.
+ invariant(!_isActive(),
+ str::stream() << "Current state: " << toString(_state)
+ << ". Invalid internal state while setting ignore_prepare to: "
+ << value);
+
+ _ignorePrepared = newValue;
+}
+
+bool WiredTigerRecoveryUnit::getIgnorePrepared() const {
+ return _ignorePrepared == WiredTigerBeginTxnBlock::IgnorePrepared::kIgnore;
}
void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource,
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 2440a79dec3..bd4262cf815 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -137,6 +137,8 @@ public:
void setIgnorePrepared(bool ignore) override;
+ bool getIgnorePrepared() const override;
+
void setTimestampReadSource(ReadSource source,
boost::optional<Timestamp> provided = boost::none) override;
@@ -302,10 +304,10 @@ private:
// When 'true', data read from disk should not be kept in the storage engine cache.
bool _readOnce = false;
- // Ignoring prepared transactions will not return prepare conflicts and will not allow seeing
- // prepared data.
+ // If set to kIgnore, updates from prepared transactions will not return prepare conflicts and
+ // will not allow seeing prepared data.
WiredTigerBeginTxnBlock::IgnorePrepared _ignorePrepared{
- WiredTigerBeginTxnBlock::IgnorePrepared::kIgnore};
+ WiredTigerBeginTxnBlock::IgnorePrepared::kNoIgnore};
Timestamp _commitTimestamp;
Timestamp _durableTimestamp;
Timestamp _prepareTimestamp;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index 3ceec57fd9f..99fc4f51477 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -225,9 +225,8 @@ TEST_F(WiredTigerRecoveryUnitTestFixture,
ru1->setPrepareTimestamp({1, 1});
ru1->prepareUnitOfWork();
- // Transaction read that does not ignore prepare conflicts triggers WT_PREPARE_CONFLICT
+ // The transaction read default enforces prepare conflicts and triggers a WT_PREPARE_CONFLICT.
ru2->beginUnitOfWork(clientAndCtx2.second.get());
- ru2->setIgnorePrepared(false);
getCursor(ru2, &cursor);
cursor->set_key(cursor, "key");
int ret = cursor->search(cursor);
@@ -249,9 +248,10 @@ TEST_F(WiredTigerRecoveryUnitTestFixture,
ru1->setPrepareTimestamp({1, 1});
ru1->prepareUnitOfWork();
- // Transaction read default ignores prepare conflicts but should not be able to read
- // data from the prepared transaction.
+ // A transaction that chooses to ignore prepare conflicts does not see the record instead of
+ // returning a prepare conflict.
ru2->beginUnitOfWork(clientAndCtx2.second.get());
+ ru2->setIgnorePrepared(true);
getCursor(ru2, &cursor);
cursor->set_key(cursor, "key");
int ret = cursor->search(cursor);
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 54a823d9b79..3f8495713b6 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -107,6 +107,22 @@ public:
private:
OperationContext* _opCtx;
};
+
+class IgnorePrepareBlock {
+public:
+ IgnorePrepareBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ _opCtx->recoveryUnit()->setIgnorePrepared(true);
+ }
+
+ ~IgnorePrepareBlock() {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ _opCtx->recoveryUnit()->setIgnorePrepared(false);
+ }
+
+private:
+ OperationContext* _opCtx;
+};
}
const auto kIndexVersion = IndexDescriptor::IndexVersion::kV2;
@@ -2981,9 +2997,15 @@ public:
assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
assertDocumentAtTimestamp(coll, secondOplogEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, nullTs, BSONObj());
+
+ {
+ IgnorePrepareBlock ignorePrepare(_opCtx);
+ // Perform the following while ignoring prepare conflicts. These calls would
+ // otherwise wait forever until the prepared transaction committed or aborted.
+ assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, nullTs, BSONObj());
+ }
assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
diff --git a/src/mongo/embedded/read_concern_embedded.cpp b/src/mongo/embedded/read_concern_embedded.cpp
index db4dbe3ef83..baa683c7dfa 100644
--- a/src/mongo/embedded/read_concern_embedded.cpp
+++ b/src/mongo/embedded/read_concern_embedded.cpp
@@ -34,7 +34,10 @@
namespace mongo {
MONGO_REGISTER_SHIM(waitForReadConcern)
-(OperationContext* opCtx, const repl::ReadConcernArgs& readConcernArgs, bool allowAfterClusterTime)
+(OperationContext* opCtx,
+ const repl::ReadConcernArgs& readConcernArgs,
+ bool allowAfterClusterTime,
+ StringData cmdName)
->Status {
if (readConcernArgs.getLevel() == repl::ReadConcernLevel::kLinearizableReadConcern) {
return {ErrorCodes::NotImplemented, "linearizable read concern not supported on embedded"};
diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp
index bacdad22ffd..6a7c17e5b64 100644
--- a/src/mongo/embedded/service_entry_point_embedded.cpp
+++ b/src/mongo/embedded/service_entry_point_embedded.cpp
@@ -50,8 +50,10 @@ public:
void waitForReadConcern(OperationContext* opCtx,
const CommandInvocation* invocation,
const OpMsgRequest& request) const override {
- auto rcStatus = mongo::waitForReadConcern(
- opCtx, repl::ReadConcernArgs::get(opCtx), invocation->allowsAfterClusterTime());
+ auto rcStatus = mongo::waitForReadConcern(opCtx,
+ repl::ReadConcernArgs::get(opCtx),
+ invocation->allowsAfterClusterTime(),
+ request.getCommandName());
uassertStatusOK(rcStatus);
}