summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHuayu Ouyang <huayu.ouyang@mongodb.com>2021-10-29 21:52:30 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-29 22:50:51 +0000
commitb08e22b646d2ba2893bf890bf20d25ce5d4ff1b6 (patch)
treeb8f54e253081f615b32ad0a3b13d9e60aa85c56f
parent89c2892cb4dd0fe2c3d6d6e62c1e7cda63196471 (diff)
downloadmongo-b08e22b646d2ba2893bf890bf20d25ce5d4ff1b6.tar.gz
SERVER-61007 ReplSetGetStatus calls storage with no lock
-rw-r--r--src/mongo/db/repl/repl_set_commands.cpp33
-rw-r--r--src/mongo/db/repl/repl_set_get_status_cmd.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator.h3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp32
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp5
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.h4
-rw-r--r--src/mongo/db/repl/topology_coordinator.h3
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.cpp3
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.h4
13 files changed, 85 insertions, 19 deletions
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp
index de1b9a458db..41ae248bf07 100644
--- a/src/mongo/db/repl/repl_set_commands.cpp
+++ b/src/mongo/db/repl/repl_set_commands.cpp
@@ -134,11 +134,34 @@ public:
uassertStatusOK(status);
return true;
} else if (cmdObj.hasElement("getLastStableRecoveryTimestamp")) {
- boost::optional<Timestamp> ts =
- StorageInterface::get(getGlobalServiceContext())
- ->getLastStableRecoveryTimestamp(getGlobalServiceContext());
- if (ts) {
- result.append("lastStableRecoveryTimestamp", ts.get());
+ try {
+ // Retrieving last stable recovery timestamp should not be blocked by oplog
+ // application.
+ ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock(
+ opCtx->lockState());
+ opCtx->lockState()->skipAcquireTicket();
+ // We need to hold the lock so that we don't run when storage is being shutdown.
+ Lock::GlobalLock lk(opCtx,
+ MODE_IS,
+ Date_t::now() + Milliseconds(5),
+ Lock::InterruptBehavior::kLeaveUnlocked,
+ true /* skipRSTLLock */);
+ if (lk.isLocked()) {
+ boost::optional<Timestamp> ts =
+ StorageInterface::get(getGlobalServiceContext())
+ ->getLastStableRecoveryTimestamp(getGlobalServiceContext());
+ if (ts) {
+ result.append("lastStableRecoveryTimestamp", ts.get());
+ }
+ } else {
+ LOGV2_WARNING(6100700,
+ "Failed to get last stable recovery timestamp due to {error}",
+ "error"_attr = "lock acquire timeout"_sd);
+ }
+ } catch (const ExceptionForCat<ErrorCategory::Interruption>& ex) {
+ LOGV2_WARNING(6100701,
+ "Failed to get last stable recovery timestamp due to {error}",
+ "error"_attr = redact(ex));
}
return true;
} else if (cmdObj.hasElement("restartHeartbeats")) {
diff --git a/src/mongo/db/repl/repl_set_get_status_cmd.cpp b/src/mongo/db/repl/repl_set_get_status_cmd.cpp
index 0860f2f97f7..ad4fe674ccd 100644
--- a/src/mongo/db/repl/repl_set_get_status_cmd.cpp
+++ b/src/mongo/db/repl/repl_set_get_status_cmd.cpp
@@ -64,8 +64,8 @@ public:
if (includeInitialSync) {
responseStyle = ReplicationCoordinator::ReplSetGetStatusResponseStyle::kInitialSync;
}
- status =
- ReplicationCoordinator::get(opCtx)->processReplSetGetStatus(&result, responseStyle);
+ status = ReplicationCoordinator::get(opCtx)->processReplSetGetStatus(
+ opCtx, &result, responseStyle);
uassertStatusOK(status);
return true;
}
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index b3eba2dab7e..06810324584 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -612,7 +612,8 @@ public:
* Handles an incoming replSetGetStatus command. Adds BSON to 'result'. If kInitialSync is
* requested but initial sync is not running, kBasic will be used.
*/
- virtual Status processReplSetGetStatus(BSONObjBuilder* result,
+ virtual Status processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder* result,
ReplSetGetStatusResponseStyle responseStyle) = 0;
/**
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c3c67f39cb2..4e4d1fa1aac 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3043,7 +3043,9 @@ StatusWith<BSONObj> ReplicationCoordinatorImpl::prepareReplSetUpdatePositionComm
}
Status ReplicationCoordinatorImpl::processReplSetGetStatus(
- BSONObjBuilder* response, ReplSetGetStatusResponseStyle responseStyle) {
+ OperationContext* opCtx,
+ BSONObjBuilder* response,
+ ReplSetGetStatusResponseStyle responseStyle) {
BSONObj initialSyncProgress;
if (responseStyle == ReplSetGetStatusResponseStyle::kInitialSync) {
@@ -3066,6 +3068,32 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(
BSONObj electionParticipantMetrics =
ReplicationMetrics::get(getServiceContext()).getElectionParticipantMetricsBSON();
+ boost::optional<Timestamp> lastStableRecoveryTimestamp = boost::none;
+ try {
+ // Retrieving last stable recovery timestamp should not be blocked by oplog
+ // application.
+ ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock(
+ opCtx->lockState());
+ opCtx->lockState()->skipAcquireTicket();
+ // We need to hold the lock so that we don't run when storage is being shutdown.
+ Lock::GlobalLock lk(opCtx,
+ MODE_IS,
+ Date_t::now() + Milliseconds(5),
+ Lock::InterruptBehavior::kLeaveUnlocked,
+ true /* skipRSTLLock */);
+ if (lk.isLocked()) {
+ lastStableRecoveryTimestamp = _storage->getLastStableRecoveryTimestamp(_service);
+ } else {
+ LOGV2_WARNING(6100702,
+ "Failed to get last stable recovery timestamp due to {error}",
+ "error"_attr = "lock acquire timeout"_sd);
+ }
+ } catch (const ExceptionForCat<ErrorCategory::Interruption>& ex) {
+ LOGV2_WARNING(6100703,
+ "Failed to get last stable recovery timestamp due to {error}",
+ "error"_attr = redact(ex));
+ }
+
stdx::lock_guard<Latch> lk(_mutex);
if (_inShutdown) {
return Status(ErrorCodes::ShutdownInProgress, "shutdown in progress");
@@ -3080,7 +3108,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(
initialSyncProgress,
electionCandidateMetrics,
electionParticipantMetrics,
- _storage->getLastStableRecoveryTimestamp(_service),
+ lastStableRecoveryTimestamp,
_externalState->tooStale()},
response,
&result);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index b0e9fb0cba9..dae323acb51 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -224,7 +224,8 @@ public:
virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override;
- virtual Status processReplSetGetStatus(BSONObjBuilder* result,
+ virtual Status processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder* result,
ReplSetGetStatusResponseStyle responseStyle) override;
virtual void appendSecondaryInfoData(BSONObjBuilder* result) override;
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 1922af89a9a..2e08f3890d0 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -1109,8 +1109,11 @@ TEST_F(ReplCoordHBV1Test, IgnoreTheContentsOfMetadataWhenItsReplicaSetIdDoesNotM
ASSERT_NOT_EQUALS(opTime.getTerm(), getTopoCoord().getTerm());
BSONObjBuilder statusBuilder;
+ auto opCtx = makeOperationContext();
ASSERT_OK(getReplCoord()->processReplSetGetStatus(
- &statusBuilder, ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic));
+ opCtx.get(),
+ &statusBuilder,
+ ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic));
auto statusObj = statusBuilder.obj();
LOGV2(21495, "replica set status = {statusObj}", "statusObj"_attr = statusObj);
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index f478e858094..788f2991a70 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -456,7 +456,8 @@ void ReplicationCoordinatorMock::advanceCommitPoint(
void ReplicationCoordinatorMock::cancelAndRescheduleElectionTimeout() {}
-Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder*,
+Status ReplicationCoordinatorMock::processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
ReplSetGetStatusResponseStyle) {
return Status::OK();
}
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 54255c8398c..f20e2cadadc 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -189,7 +189,9 @@ public:
virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override;
- virtual Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle);
+ virtual Status processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
+ ReplSetGetStatusResponseStyle);
virtual void appendSecondaryInfoData(BSONObjBuilder* result);
diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp
index f691df78494..655a5f48904 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.cpp
+++ b/src/mongo/db/repl/replication_coordinator_noop.cpp
@@ -285,7 +285,8 @@ StatusWith<BSONObj> ReplicationCoordinatorNoOp::prepareReplSetUpdatePositionComm
MONGO_UNREACHABLE;
}
-Status ReplicationCoordinatorNoOp::processReplSetGetStatus(BSONObjBuilder*,
+Status ReplicationCoordinatorNoOp::processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
ReplSetGetStatusResponseStyle) {
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h
index b6d3f6af039..7183fdaac4a 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.h
+++ b/src/mongo/db/repl/replication_coordinator_noop.h
@@ -167,7 +167,9 @@ public:
StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const final;
- Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle) final;
+ Status processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
+ ReplSetGetStatusResponseStyle) final;
void appendSecondaryInfoData(BSONObjBuilder*) final;
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 6c162d5ebd5..2da14343ef9 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -372,7 +372,8 @@ public:
const BSONObj electionCandidateMetrics;
const BSONObj electionParticipantMetrics;
- // boost::none if the storage engine does not support recovery to a timestamp.
+ // boost::none if the storage engine does not support recovery to a timestamp, or if the
+ // storage engine is not available.
// Timestamp::min() if a stable recovery timestamp is yet to be taken.
//
// On the replication layer, a non-min() timestamp ensures recoverable rollback is possible,
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index 3a9eb124d59..5a117927635 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -309,7 +309,8 @@ StatusWith<BSONObj> ReplicationCoordinatorEmbedded::prepareReplSetUpdatePosition
UASSERT_NOT_IMPLEMENTED;
}
-Status ReplicationCoordinatorEmbedded::processReplSetGetStatus(BSONObjBuilder*,
+Status ReplicationCoordinatorEmbedded::processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
ReplSetGetStatusResponseStyle) {
UASSERT_NOT_IMPLEMENTED;
}
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index bcbf9efa036..9a408d645fd 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -176,7 +176,9 @@ public:
StatusWith<BSONObj> prepareReplSetUpdatePositionCommand() const override;
- Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle) override;
+ Status processReplSetGetStatus(OperationContext* opCtx,
+ BSONObjBuilder*,
+ ReplSetGetStatusResponseStyle) override;
void appendSecondaryInfoData(BSONObjBuilder*) override;