diff options
author | Matthew Russotto <matthew.russotto@mongodb.com> | 2022-09-27 11:41:11 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-28 19:13:33 +0000 |
commit | 6c1b9191fd8fc814aae17b0c99785983c190f5bf (patch) | |
tree | 569c18bf5ec633b73d275301701682b57ead47c5 | |
parent | ea921842b91b81bce3508c131576ac3d6a9d111f (diff) | |
download | mongo-6c1b9191fd8fc814aae17b0c99785983c190f5bf.tar.gz |
SERVER-69861 Uninterruptible lock guard in election causes FCBIS to hang
Support for reading last vote in data_replicator_external_state
(cherry picked from commit 810d5c1f2b0f8d3767df55812c3324d6171aa107)
8 files changed, 37 insertions, 1 deletions
diff --git a/src/mongo/db/repl/data_replicator_external_state.h b/src/mongo/db/repl/data_replicator_external_state.h index 87826b0f199..d5be160d5cf 100644 --- a/src/mongo/db/repl/data_replicator_external_state.h +++ b/src/mongo/db/repl/data_replicator_external_state.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/base/status_with.h" +#include "mongo/db/repl/last_vote.h" #include "mongo/db/repl/multiapplier.h" #include "mongo/db/repl/oplog_applier.h" #include "mongo/db/repl/oplog_buffer.h" @@ -144,6 +145,12 @@ public: virtual Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) = 0; /** + * Returns the current stored replica set "last vote" if there is one, or an error why there + * isn't. + */ + virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const = 0; + + /** * Returns the replication journal listener. */ virtual JournalListener* getReplicationJournalListener() = 0; diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.cpp b/src/mongo/db/repl/data_replicator_external_state_impl.cpp index 00c924ff1ea..9bd60084aed 100644 --- a/src/mongo/db/repl/data_replicator_external_state_impl.cpp +++ b/src/mongo/db/repl/data_replicator_external_state_impl.cpp @@ -175,6 +175,11 @@ Status DataReplicatorExternalStateImpl::storeLocalConfigDocument(OperationContex opCtx, config, false /* write oplog entry */); } +StatusWith<LastVote> DataReplicatorExternalStateImpl::loadLocalLastVoteDocument( + OperationContext* opCtx) const { + return _replicationCoordinatorExternalState->loadLocalLastVoteDocument(opCtx); +} + JournalListener* DataReplicatorExternalStateImpl::getReplicationJournalListener() { return _replicationCoordinatorExternalState->getReplicationJournalListener(); } diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.h b/src/mongo/db/repl/data_replicator_external_state_impl.h index c408c484dc9..9cd2364927e 100644 --- a/src/mongo/db/repl/data_replicator_external_state_impl.h +++ b/src/mongo/db/repl/data_replicator_external_state_impl.h @@ -87,6 +87,8 @@ public: Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) override; + StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const override; + JournalListener* getReplicationJournalListener() override; protected: diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.cpp b/src/mongo/db/repl/data_replicator_external_state_mock.cpp index ddcfc701ca6..617f4f24098 100644 --- a/src/mongo/db/repl/data_replicator_external_state_mock.cpp +++ b/src/mongo/db/repl/data_replicator_external_state_mock.cpp @@ -147,5 +147,10 @@ JournalListener* DataReplicatorExternalStateMock::getReplicationJournalListener( return nullptr; } +StatusWith<LastVote> DataReplicatorExternalStateMock::loadLocalLastVoteDocument( + OperationContext* opCtx) const { + return StatusWith<LastVote>(ErrorCodes::NoMatchingDocument, "mock"); +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.h b/src/mongo/db/repl/data_replicator_external_state_mock.h index 535ee513102..beb7ecdc28d 100644 --- a/src/mongo/db/repl/data_replicator_external_state_mock.h +++ b/src/mongo/db/repl/data_replicator_external_state_mock.h @@ -77,6 +77,8 @@ public: Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) override; + StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const override; + JournalListener* getReplicationJournalListener() override; // Task executor. diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 27b6786fe61..f77c5f88bdc 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -701,7 +701,16 @@ Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument( // don't want to have this process interrupted due to us stepping down, since we // want to be able to cast our vote for a new primary right away. Both the write's lock // acquisition and the "waitUntilDurable" lock acquisition must be uninterruptible. - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + // + // It is not safe to take an uninterruptible lock during STARTUP2, so we only take this lock + // if we are primary or secondary. We do not have the RSTL but that is OK because we never + // move in to STARTUP2 from PRIMARY or SECONDARY, so the consequence of a stale state is + // only that we don't take an uninterruptible lock when we should. + auto* replCoord = ReplicationCoordinator::get(opCtx); + + boost::optional<UninterruptibleLockGuard> noInterrupt; + if (replCoord->isInPrimaryOrSecondaryState_UNSAFE()) + noInterrupt.emplace(opCtx->lockState()); Status status = writeConflictRetry( opCtx, diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index f3b5ee9ad01..c047382f891 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -5654,6 +5654,8 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes( LastVote lastVote{args.getTerm(), args.getCandidateIndex()}; Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote); if (!status.isOK()) { + // Note the topology coordinator has already advanced its last vote at this point, + // so this node will not be able to vote in this election; this is a "spoiled" vote. LOGV2_ERROR(21428, "replSetRequestVotes failed to store LastVote document", "error"_attr = status); diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp index 3f32411c555..de8cda27137 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp @@ -256,6 +256,10 @@ public: MONGO_UNREACHABLE; } + StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const final { + MONGO_UNREACHABLE; + } + JournalListener* getReplicationJournalListener() final { MONGO_UNREACHABLE; } |