summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/snapshot_helper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/snapshot_helper.cpp')
-rw-r--r--src/mongo/db/storage/snapshot_helper.cpp80
1 files changed, 54 insertions, 26 deletions
diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp
index 5acbcd3a513..84af208d391 100644
--- a/src/mongo/db/storage/snapshot_helper.cpp
+++ b/src/mongo/db/storage/snapshot_helper.cpp
@@ -38,29 +38,37 @@
#include "mongo/logv2/log.h"
namespace mongo {
-namespace SnapshotHelper {
-bool canSwitchReadSource(OperationContext* opCtx) {
-
- // Most readConcerns have behavior controlled at higher levels. Local and available are the only
- // ReadConcerns that should consider changing, since they read without a timestamp by default.
+namespace {
+bool canReadAtLastApplied(OperationContext* opCtx) {
+ // Local and available are the only ReadConcern levels that allow their ReadSource to be
+ // overridden to read at lastApplied. They read without a timestamp by default, but this check
+ // allows user secondary reads from conflicting with oplog batch application by reading at a
+ // consistent point in time.
+ // Internal operations use DBDirectClient as a loopback to perform local operations, and they
+ // expect the same level of consistency guarantees as any user operation. For that reason,
+ // DBDirectClient should be able to change the owning operation's ReadSource in order to serve
+ // consistent data.
const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
- if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
- readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) {
+ if ((opCtx->getClient()->isFromUserConnection() || opCtx->getClient()->isInDirectClient()) &&
+ (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
+ readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern)) {
return true;
}
-
return false;
}
+} // namespace
+namespace SnapshotHelper {
bool shouldReadAtLastApplied(OperationContext* opCtx,
const NamespaceString& nss,
std::string* reason) {
-
// If this is true, then the operation opted-in to the PBWM lock, implying that it cannot change
// its ReadSource. It's important to note that it is possible for this to be false, but still be
// holding the PBWM lock, explained below.
if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) {
- *reason = "conflicts with batch application";
+ if (reason) {
+ *reason = "conflicts with batch application";
+ }
return false;
}
@@ -71,16 +79,32 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
// guaranteed to observe all previous writes. This may occur when multiple collection locks are
// held concurrently, which is often the case when DBDirectClient is used.
if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) {
- *reason = "PBWM lock is held";
+ if (reason) {
+ *reason = "PBWM lock is held";
+ }
LOGV2_DEBUG(20577, 1, "not reading at lastApplied because the PBWM lock is held");
return false;
}
- // If we are in a replication state (like secondary or primary catch-up) where we are not
- // accepting writes, we should read at lastApplied. If this node can accept writes, then no
- // conflicting replication batches are being applied and we can read from the default snapshot.
+ // If this node can accept writes (i.e. primary), then no conflicting replication batches are
+ // being applied and we can read from the default snapshot. If we are in a replication state
+ // (like secondary or primary catch-up) where we are not accepting writes, we should read at
+ // lastApplied.
if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) {
- *reason = "primary";
+ if (reason) {
+ *reason = "primary";
+ }
+ return false;
+ }
+
+ // If we are not secondary, then we should not attempt to read at lastApplied because it may not
+ // be available or valid. Any operations reading outside of the primary or secondary states must
+ // be internal. We give these operations the benefit of the doubt rather than attempting to read
+ // at a lastApplied timestamp that is not valid.
+ if (!repl::ReplicationCoordinator::get(opCtx)->isInPrimaryOrSecondaryState(opCtx)) {
+ if (reason) {
+ *reason = "not primary or secondary";
+ }
return false;
}
@@ -88,7 +112,9 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
// written by the replication system. However, the oplog is special, as it *is* written by the
// replication system.
if (!nss.isReplicated() && !nss.isOplog()) {
- *reason = "unreplicated collection";
+ if (reason) {
+ *reason = "unreplicated collection";
+ }
return false;
}
@@ -96,15 +122,14 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
}
boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
const NamespaceString& nss) {
- const bool canSwitch = canSwitchReadSource(opCtx);
- if (!canSwitch) {
+ if (!canReadAtLastApplied(opCtx)) {
return boost::none;
}
const auto existing = opCtx->recoveryUnit()->getTimestampReadSource();
std::string reason;
const bool readAtLastApplied = shouldReadAtLastApplied(opCtx, nss, &reason);
- if (existing == RecoveryUnit::ReadSource::kUnset) {
+ if (existing == RecoveryUnit::ReadSource::kNoTimestamp) {
// Shifting from reading without a timestamp to reading with a timestamp can be dangerous
// because writes will appear to vanish. This case is intended for new reads on secondaries
// and query yield recovery after state transitions from primary to secondary.
@@ -122,13 +147,16 @@ boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opC
// Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to
// reading without one. More writes will become visible.
if (!readAtLastApplied) {
- LOGV2_DEBUG(
- 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason);
- // This shift to kUnset assumes that callers will not make future attempts to manipulate
- // their ReadSources after performing reads at an un-timetamped snapshot. The only
- // exception is callers of this function that may need to change from kUnset to
- // kLastApplied in the event of a catalog conflict or query yield.
- return RecoveryUnit::ReadSource::kUnset;
+ LOGV2_DEBUG(4452902,
+ 2,
+ "Changing ReadSource to kNoTimestamp",
+ logAttrs(nss),
+ "reason"_attr = reason);
+ // This shift to kNoTimestamp assumes that callers will not make future attempts to
+ // manipulate their ReadSources after performing reads at an un-timetamped snapshot. The
+ // only exception is callers of this function that may need to change from kNoTimestamp
+ // to kLastApplied in the event of a catalog conflict or query yield.
+ return RecoveryUnit::ReadSource::kNoTimestamp;
}
}
return boost::none;