diff options
author | Huayu Ouyang <huayu.ouyang@mongodb.com> | 2021-10-29 21:52:30 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-29 22:50:51 +0000 |
commit | b08e22b646d2ba2893bf890bf20d25ce5d4ff1b6 (patch) | |
tree | b8f54e253081f615b32ad0a3b13d9e60aa85c56f | |
parent | 89c2892cb4dd0fe2c3d6d6e62c1e7cda63196471 (diff) | |
download | mongo-b08e22b646d2ba2893bf890bf20d25ce5d4ff1b6.tar.gz |
SERVER-61007 ReplSetGetStatus calls storage with no lock
13 files changed, 85 insertions, 19 deletions
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp index de1b9a458db..41ae248bf07 100644 --- a/src/mongo/db/repl/repl_set_commands.cpp +++ b/src/mongo/db/repl/repl_set_commands.cpp @@ -134,11 +134,34 @@ public: uassertStatusOK(status); return true; } else if (cmdObj.hasElement("getLastStableRecoveryTimestamp")) { - boost::optional<Timestamp> ts = - StorageInterface::get(getGlobalServiceContext()) - ->getLastStableRecoveryTimestamp(getGlobalServiceContext()); - if (ts) { - result.append("lastStableRecoveryTimestamp", ts.get()); + try { + // Retrieving last stable recovery timestamp should not be blocked by oplog + // application. + ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( + opCtx->lockState()); + opCtx->lockState()->skipAcquireTicket(); + // We need to hold the lock so that we don't run when storage is being shutdown. + Lock::GlobalLock lk(opCtx, + MODE_IS, + Date_t::now() + Milliseconds(5), + Lock::InterruptBehavior::kLeaveUnlocked, + true /* skipRSTLLock */); + if (lk.isLocked()) { + boost::optional<Timestamp> ts = + StorageInterface::get(getGlobalServiceContext()) + ->getLastStableRecoveryTimestamp(getGlobalServiceContext()); + if (ts) { + result.append("lastStableRecoveryTimestamp", ts.get()); + } + } else { + LOGV2_WARNING(6100700, + "Failed to get last stable recovery timestamp due to {error}", + "error"_attr = "lock acquire timeout"_sd); + } + } catch (const ExceptionForCat<ErrorCategory::Interruption>& ex) { + LOGV2_WARNING(6100701, + "Failed to get last stable recovery timestamp due to {error}", + "error"_attr = redact(ex)); } return true; } else if (cmdObj.hasElement("restartHeartbeats")) { diff --git a/src/mongo/db/repl/repl_set_get_status_cmd.cpp b/src/mongo/db/repl/repl_set_get_status_cmd.cpp index 0860f2f97f7..ad4fe674ccd 100644 --- a/src/mongo/db/repl/repl_set_get_status_cmd.cpp +++ b/src/mongo/db/repl/repl_set_get_status_cmd.cpp @@ -64,8 +64,8 @@ public: if (includeInitialSync) { responseStyle = ReplicationCoordinator::ReplSetGetStatusResponseStyle::kInitialSync; } - status = - ReplicationCoordinator::get(opCtx)->processReplSetGetStatus(&result, responseStyle); + status = ReplicationCoordinator::get(opCtx)->processReplSetGetStatus( + opCtx, &result, responseStyle); uassertStatusOK(status); return true; } diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index b3eba2dab7e..06810324584 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -612,7 +612,8 @@ public: * Handles an incoming replSetGetStatus command. Adds BSON to 'result'. If kInitialSync is * requested but initial sync is not running, kBasic will be used. */ - virtual Status processReplSetGetStatus(BSONObjBuilder* result, + virtual Status processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder* result, ReplSetGetStatusResponseStyle responseStyle) = 0; /** diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index c3c67f39cb2..4e4d1fa1aac 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -3043,7 +3043,9 @@ StatusWith<BSONObj> ReplicationCoordinatorImpl::prepareReplSetUpdatePositionComm } Status ReplicationCoordinatorImpl::processReplSetGetStatus( - BSONObjBuilder* response, ReplSetGetStatusResponseStyle responseStyle) { + OperationContext* opCtx, + BSONObjBuilder* response, + ReplSetGetStatusResponseStyle responseStyle) { BSONObj initialSyncProgress; if (responseStyle == ReplSetGetStatusResponseStyle::kInitialSync) { @@ -3066,6 +3068,32 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( BSONObj electionParticipantMetrics = ReplicationMetrics::get(getServiceContext()).getElectionParticipantMetricsBSON(); + boost::optional<Timestamp> lastStableRecoveryTimestamp = boost::none; + try { + // Retrieving last stable recovery timestamp should not be blocked by oplog + // application. + ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( + opCtx->lockState()); + opCtx->lockState()->skipAcquireTicket(); + // We need to hold the lock so that we don't run when storage is being shutdown. + Lock::GlobalLock lk(opCtx, + MODE_IS, + Date_t::now() + Milliseconds(5), + Lock::InterruptBehavior::kLeaveUnlocked, + true /* skipRSTLLock */); + if (lk.isLocked()) { + lastStableRecoveryTimestamp = _storage->getLastStableRecoveryTimestamp(_service); + } else { + LOGV2_WARNING(6100702, + "Failed to get last stable recovery timestamp due to {error}", + "error"_attr = "lock acquire timeout"_sd); + } + } catch (const ExceptionForCat<ErrorCategory::Interruption>& ex) { + LOGV2_WARNING(6100703, + "Failed to get last stable recovery timestamp due to {error}", + "error"_attr = redact(ex)); + } + stdx::lock_guard<Latch> lk(_mutex); if (_inShutdown) { return Status(ErrorCodes::ShutdownInProgress, "shutdown in progress"); @@ -3080,7 +3108,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( initialSyncProgress, electionCandidateMetrics, electionParticipantMetrics, - _storage->getLastStableRecoveryTimestamp(_service), + lastStableRecoveryTimestamp, _externalState->tooStale()}, response, &result); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index b0e9fb0cba9..dae323acb51 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -224,7 +224,8 @@ public: virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override; - virtual Status processReplSetGetStatus(BSONObjBuilder* result, + virtual Status processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder* result, ReplSetGetStatusResponseStyle responseStyle) override; virtual void appendSecondaryInfoData(BSONObjBuilder* result) override; diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp index 1922af89a9a..2e08f3890d0 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp @@ -1109,8 +1109,11 @@ TEST_F(ReplCoordHBV1Test, IgnoreTheContentsOfMetadataWhenItsReplicaSetIdDoesNotM ASSERT_NOT_EQUALS(opTime.getTerm(), getTopoCoord().getTerm()); BSONObjBuilder statusBuilder; + auto opCtx = makeOperationContext(); ASSERT_OK(getReplCoord()->processReplSetGetStatus( - &statusBuilder, ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic)); + opCtx.get(), + &statusBuilder, + ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic)); auto statusObj = statusBuilder.obj(); LOGV2(21495, "replica set status = {statusObj}", "statusObj"_attr = statusObj); diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index f478e858094..788f2991a70 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -456,7 +456,8 @@ void ReplicationCoordinatorMock::advanceCommitPoint( void ReplicationCoordinatorMock::cancelAndRescheduleElectionTimeout() {} -Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder*, +Status ReplicationCoordinatorMock::processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, ReplSetGetStatusResponseStyle) { return Status::OK(); } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 54255c8398c..f20e2cadadc 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -189,7 +189,9 @@ public: virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override; - virtual Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle); + virtual Status processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, + ReplSetGetStatusResponseStyle); virtual void appendSecondaryInfoData(BSONObjBuilder* result); diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index f691df78494..655a5f48904 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -285,7 +285,8 @@ StatusWith<BSONObj> ReplicationCoordinatorNoOp::prepareReplSetUpdatePositionComm MONGO_UNREACHABLE; } -Status ReplicationCoordinatorNoOp::processReplSetGetStatus(BSONObjBuilder*, +Status ReplicationCoordinatorNoOp::processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, ReplSetGetStatusResponseStyle) { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index b6d3f6af039..7183fdaac4a 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -167,7 +167,9 @@ public: StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const final; - Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle) final; + Status processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, + ReplSetGetStatusResponseStyle) final; void appendSecondaryInfoData(BSONObjBuilder*) final; diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 6c162d5ebd5..2da14343ef9 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -372,7 +372,8 @@ public: const BSONObj electionCandidateMetrics; const BSONObj electionParticipantMetrics; - // boost::none if the storage engine does not support recovery to a timestamp. + // boost::none if the storage engine does not support recovery to a timestamp, or if the + // storage engine is not available. // Timestamp::min() if a stable recovery timestamp is yet to be taken. // // On the replication layer, a non-min() timestamp ensures recoverable rollback is possible, diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 3a9eb124d59..5a117927635 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -309,7 +309,8 @@ StatusWith<BSONObj> ReplicationCoordinatorEmbedded::prepareReplSetUpdatePosition UASSERT_NOT_IMPLEMENTED; } -Status ReplicationCoordinatorEmbedded::processReplSetGetStatus(BSONObjBuilder*, +Status ReplicationCoordinatorEmbedded::processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, ReplSetGetStatusResponseStyle) { UASSERT_NOT_IMPLEMENTED; } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index bcbf9efa036..9a408d645fd 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -176,7 +176,9 @@ public: StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override; - Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle) override; + Status processReplSetGetStatus(OperationContext* opCtx, + BSONObjBuilder*, + ReplSetGetStatusResponseStyle) override; void appendSecondaryInfoData(BSONObjBuilder*) override; |