diff options
Diffstat (limited to 'src/mongo/db/storage/snapshot_helper.cpp')
-rw-r--r-- | src/mongo/db/storage/snapshot_helper.cpp | 80 |
1 files changed, 54 insertions, 26 deletions
diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp index 5acbcd3a513..84af208d391 100644 --- a/src/mongo/db/storage/snapshot_helper.cpp +++ b/src/mongo/db/storage/snapshot_helper.cpp @@ -38,29 +38,37 @@ #include "mongo/logv2/log.h" namespace mongo { -namespace SnapshotHelper { -bool canSwitchReadSource(OperationContext* opCtx) { - - // Most readConcerns have behavior controlled at higher levels. Local and available are the only - // ReadConcerns that should consider changing, since they read without a timestamp by default. +namespace { +bool canReadAtLastApplied(OperationContext* opCtx) { + // Local and available are the only ReadConcern levels that allow their ReadSource to be + // overridden to read at lastApplied. They read without a timestamp by default, but this check + // allows user secondary reads from conflicting with oplog batch application by reading at a + // consistent point in time. + // Internal operations use DBDirectClient as a loopback to perform local operations, and they + // expect the same level of consistency guarantees as any user operation. For that reason, + // DBDirectClient should be able to change the owning operation's ReadSource in order to serve + // consistent data. const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel(); - if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || - readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) { + if ((opCtx->getClient()->isFromUserConnection() || opCtx->getClient()->isInDirectClient()) && + (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || + readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern)) { return true; } - return false; } +} // namespace +namespace SnapshotHelper { bool shouldReadAtLastApplied(OperationContext* opCtx, const NamespaceString& nss, std::string* reason) { - // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot change // its ReadSource. It's important to note that it is possible for this to be false, but still be // holding the PBWM lock, explained below. if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { - *reason = "conflicts with batch application"; + if (reason) { + *reason = "conflicts with batch application"; + } return false; } @@ -71,16 +79,32 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, // guaranteed to observe all previous writes. This may occur when multiple collection locks are // held concurrently, which is often the case when DBDirectClient is used. if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { - *reason = "PBWM lock is held"; + if (reason) { + *reason = "PBWM lock is held"; + } LOGV2_DEBUG(20577, 1, "not reading at lastApplied because the PBWM lock is held"); return false; } - // If we are in a replication state (like secondary or primary catch-up) where we are not - // accepting writes, we should read at lastApplied. If this node can accept writes, then no - // conflicting replication batches are being applied and we can read from the default snapshot. + // If this node can accept writes (i.e. primary), then no conflicting replication batches are + // being applied and we can read from the default snapshot. If we are in a replication state + // (like secondary or primary catch-up) where we are not accepting writes, we should read at + // lastApplied. if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { - *reason = "primary"; + if (reason) { + *reason = "primary"; + } + return false; + } + + // If we are not secondary, then we should not attempt to read at lastApplied because it may not + // be available or valid. Any operations reading outside of the primary or secondary states must + // be internal. We give these operations the benefit of the doubt rather than attempting to read + // at a lastApplied timestamp that is not valid. + if (!repl::ReplicationCoordinator::get(opCtx)->isInPrimaryOrSecondaryState(opCtx)) { + if (reason) { + *reason = "not primary or secondary"; + } return false; } @@ -88,7 +112,9 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, // written by the replication system. However, the oplog is special, as it *is* written by the // replication system. if (!nss.isReplicated() && !nss.isOplog()) { - *reason = "unreplicated collection"; + if (reason) { + *reason = "unreplicated collection"; + } return false; } @@ -96,15 +122,14 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, } boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx, const NamespaceString& nss) { - const bool canSwitch = canSwitchReadSource(opCtx); - if (!canSwitch) { + if (!canReadAtLastApplied(opCtx)) { return boost::none; } const auto existing = opCtx->recoveryUnit()->getTimestampReadSource(); std::string reason; const bool readAtLastApplied = shouldReadAtLastApplied(opCtx, nss, &reason); - if (existing == RecoveryUnit::ReadSource::kUnset) { + if (existing == RecoveryUnit::ReadSource::kNoTimestamp) { // Shifting from reading without a timestamp to reading with a timestamp can be dangerous // because writes will appear to vanish. This case is intended for new reads on secondaries // and query yield recovery after state transitions from primary to secondary. @@ -122,13 +147,16 @@ boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opC // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to // reading without one. More writes will become visible. if (!readAtLastApplied) { - LOGV2_DEBUG( - 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason); - // This shift to kUnset assumes that callers will not make future attempts to manipulate - // their ReadSources after performing reads at an un-timetamped snapshot. The only - // exception is callers of this function that may need to change from kUnset to - // kLastApplied in the event of a catalog conflict or query yield. - return RecoveryUnit::ReadSource::kUnset; + LOGV2_DEBUG(4452902, + 2, + "Changing ReadSource to kNoTimestamp", + logAttrs(nss), + "reason"_attr = reason); + // This shift to kNoTimestamp assumes that callers will not make future attempts to + // manipulate their ReadSources after performing reads at an un-timetamped snapshot. The + // only exception is callers of this function that may need to change from kNoTimestamp + // to kLastApplied in the event of a catalog conflict or query yield. + return RecoveryUnit::ReadSource::kNoTimestamp; } } return boost::none; |