diff options
author | Lingzhi Deng <lingzhi.deng@mongodb.com> | 2020-05-01 16:09:04 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-05 00:08:42 +0000 |
commit | da18d99f15a19713af4035a5d2a6ef4e7caab46d (patch) | |
tree | d7bcb99de9600ca774c81689fce44b09c8697c5d /src/mongo/db/repl | |
parent | ebf32d3a3e3f297d981053337b104fca4a32ac9e (diff) | |
download | mongo-da18d99f15a19713af4035a5d2a6ef4e7caab46d.tar.gz |
SERVER-47881: Make getLatestWriteOpTime return StatusWith<OpTime>
(cherry picked from commit 892bce4528b2ec97d9f264b5a982d54da0e4971d)
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r-- | src/mongo/db/repl/repl_client_info.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.h | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_noop.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_noop.h | 2 |
8 files changed, 51 insertions, 40 deletions
diff --git a/src/mongo/db/repl/repl_client_info.cpp b/src/mongo/db/repl/repl_client_info.cpp index 43f193b4c4b..2538e7d16c5 100644 --- a/src/mongo/db/repl/repl_client_info.cpp +++ b/src/mongo/db/repl/repl_client_info.cpp @@ -69,33 +69,33 @@ void ReplClientInfo::setLastOp(OperationContext* opCtx, const OpTime& ot) { void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* opCtx) { auto replCoord = repl::ReplicationCoordinator::get(opCtx->getServiceContext()); if (replCoord->isReplEnabled() && opCtx->writesAreReplicated()) { + auto latestWriteOpTimeSW = replCoord->getLatestWriteOpTime(opCtx); + auto status = latestWriteOpTimeSW.getStatus(); OpTime systemOpTime; - auto status = [&] { - try { - // Get the latest OpTime from oplog. - systemOpTime = replCoord->getLatestWriteOpTime(opCtx); - return Status::OK(); - } catch (const DBException& e) { - // Fall back to use my lastAppliedOpTime if we failed to get the latest OpTime from - // storage. In most cases, it is safe to ignore errors because if - // getLatestWriteOpTime throws, we cannot use the same opCtx to wait for - // writeConcern anyways. But getLastError from the same client could use a different - // opCtx to wait for the lastOp. So this is a best effort attempt to set the lastOp - // to the in-memory lastAppliedOpTime (which could be lagged). And this is a known - // bug in getLastError. - systemOpTime = replCoord->getMyLastAppliedOpTime(); - if (e.toStatus() == ErrorCodes::OplogOperationUnsupported || - e.toStatus() == ErrorCodes::NamespaceNotFound || - e.toStatus() == ErrorCodes::CollectionIsEmpty || - ErrorCodes::isNotMasterError(e.toStatus())) { - // It is ok if the storage engine does not support getLatestOplogTimestamp() or - // if the oplog is empty. If the node stepped down in between, it is correct to - // use lastAppliedOpTime as last OpTime. - return Status::OK(); - } - return e.toStatus(); + if (status.isOK()) { + systemOpTime = latestWriteOpTimeSW.getValue(); + } else { + // Fall back to use my lastAppliedOpTime if we failed to get the latest OpTime from + // storage. In most cases, it is safe to ignore errors because if + // getLatestWriteOpTime returns an error, we cannot use the same opCtx to wait for + // writeConcern anyways. But getLastError from the same client could use a different + // opCtx to wait for the lastOp. So this is a best effort attempt to set the lastOp + // to the in-memory lastAppliedOpTime (which could be lagged). And this is a known + // bug in getLastError. + systemOpTime = replCoord->getMyLastAppliedOpTime(); + if (status == ErrorCodes::OplogOperationUnsupported || + status == ErrorCodes::NamespaceNotFound || + status == ErrorCodes::CollectionIsEmpty || ErrorCodes::isNotMasterError(status)) { + // It is ok if the storage engine does not support getLatestOplogTimestamp() or + // if the oplog is empty. If the node stepped down in between, it is correct to + // use lastAppliedOpTime as last OpTime. + status = Status::OK(); } - }(); + // We will continue trying to set client's lastOp to lastAppliedOpTime as a best-effort + // alternative to getLatestWriteOpTime. And we will then throw after setting client's + // lastOp if getLatestWriteOpTime has failed with a error code other than the ones + // above. + } // If the system optime has gone backwards, that must mean that there was a rollback. // This is safe, but the last op for a Client should never go backwards, so just leave diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index afa0fbadff0..314ae8ccd4a 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -998,14 +998,14 @@ public: /** * Returns the OpTime that consists of the timestamp of the latest oplog entry and the current * term. - * This function throws if: + * This function returns a non-ok status if: * 1. It is called on secondaries. * 2. OperationContext times out or is interrupted. * 3. Oplog collection does not exist. * 4. Oplog collection is empty. * 5. Getting latest oplog timestamp is not supported by the storage engine. */ - virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const = 0; + virtual StatusWith<OpTime> getLatestWriteOpTime(OperationContext* opCtx) const noexcept = 0; /** * Returns the HostAndPort of the current primary, or an empty HostAndPort if there is no diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index a46aeccd9ef..1279437998d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -2270,18 +2270,25 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorImpl::awaitIsMaste return statusWithIsMaster.getValue(); } -OpTime ReplicationCoordinatorImpl::getLatestWriteOpTime(OperationContext* opCtx) const { +StatusWith<OpTime> ReplicationCoordinatorImpl::getLatestWriteOpTime(OperationContext* opCtx) const + noexcept try { ShouldNotConflictWithSecondaryBatchApplicationBlock noPBWMBlock(opCtx->lockState()); Lock::GlobalLock globalLock(opCtx, MODE_IS); // Check if the node is primary after acquiring global IS lock. - uassert(ErrorCodes::NotMaster, - "Not primary so can't get latest write optime", - canAcceptNonLocalWrites()); + if (!canAcceptNonLocalWrites()) { + return {ErrorCodes::NotMaster, "Not primary so can't get latest write optime"}; + } auto oplog = LocalOplogInfo::get(opCtx)->getCollection(); - uassert(ErrorCodes::NamespaceNotFound, "oplog collection does not exist.", oplog); - auto latestOplogTimestamp = - uassertStatusOK(oplog->getRecordStore()->getLatestOplogTimestamp(opCtx)); - return OpTime(latestOplogTimestamp, getTerm()); + if (!oplog) { + return {ErrorCodes::NamespaceNotFound, "oplog collection does not exist"}; + } + auto latestOplogTimestampSW = oplog->getRecordStore()->getLatestOplogTimestamp(opCtx); + if (!latestOplogTimestampSW.isOK()) { + return latestOplogTimestampSW.getStatus(); + } + return OpTime(latestOplogTimestampSW.getValue(), getTerm()); +} catch (const DBException& e) { + return e.toStatus(); } HostAndPort ReplicationCoordinatorImpl::getCurrentPrimaryHostAndPort() const { diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 82cf33ae8df..ac5d76919bd 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -363,7 +363,8 @@ public: boost::optional<TopologyVersion> clientTopologyVersion, boost::optional<Date_t> deadline) override; - virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const override; + virtual StatusWith<OpTime> getLatestWriteOpTime(OperationContext* opCtx) const + noexcept override; virtual HostAndPort getCurrentPrimaryHostAndPort() const override; diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index e30c8f0b417..07a15b346ad 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -599,7 +599,8 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorMock::awaitIsMaste return response; } -OpTime ReplicationCoordinatorMock::getLatestWriteOpTime(OperationContext* opCtx) const { +StatusWith<OpTime> ReplicationCoordinatorMock::getLatestWriteOpTime(OperationContext* opCtx) const + noexcept { return getMyLastAppliedOpTime(); } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 9b9aab0017f..1336fba406e 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -342,7 +342,8 @@ public: const SplitHorizon::Parameters& horizonParams, boost::optional<TopologyVersion> clientTopologyVersion) override; - virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const override; + virtual StatusWith<OpTime> getLatestWriteOpTime(OperationContext* opCtx) const + noexcept override; virtual HostAndPort getCurrentPrimaryHostAndPort() const override; diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index 902dd065fb8..4c5fc88a601 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -500,7 +500,8 @@ ReplicationCoordinatorNoOp::getIsMasterResponseFuture( MONGO_UNREACHABLE; } -OpTime ReplicationCoordinatorNoOp::getLatestWriteOpTime(OperationContext* opCtx) const { +StatusWith<OpTime> ReplicationCoordinatorNoOp::getLatestWriteOpTime(OperationContext* opCtx) const + noexcept { return getMyLastAppliedOpTime(); } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index e672a820757..48af2f87a9f 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -277,7 +277,7 @@ public: const SplitHorizon::Parameters& horizonParams, boost::optional<TopologyVersion> clientTopologyVersion) final; - OpTime getLatestWriteOpTime(OperationContext* opCtx) const override; + StatusWith<OpTime> getLatestWriteOpTime(OperationContext* opCtx) const noexcept override; HostAndPort getCurrentPrimaryHostAndPort() const override; |