summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@mongodb.com>2022-09-27 11:41:11 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-28 19:13:33 +0000
commit6c1b9191fd8fc814aae17b0c99785983c190f5bf (patch)
tree569c18bf5ec633b73d275301701682b57ead47c5
parentea921842b91b81bce3508c131576ac3d6a9d111f (diff)
downloadmongo-6c1b9191fd8fc814aae17b0c99785983c190f5bf.tar.gz
SERVER-69861 Uninterruptible lock guard in election causes FCBIS to hang
Support for reading last vote in data_replicator_external_state (cherry picked from commit 810d5c1f2b0f8d3767df55812c3324d6171aa107)
-rw-r--r--src/mongo/db/repl/data_replicator_external_state.h7
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_impl.cpp5
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_impl.h2
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_mock.cpp5
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_mock.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp11
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp2
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp4
8 files changed, 37 insertions, 1 deletions
diff --git a/src/mongo/db/repl/data_replicator_external_state.h b/src/mongo/db/repl/data_replicator_external_state.h
index 87826b0f199..d5be160d5cf 100644
--- a/src/mongo/db/repl/data_replicator_external_state.h
+++ b/src/mongo/db/repl/data_replicator_external_state.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/base/status_with.h"
+#include "mongo/db/repl/last_vote.h"
#include "mongo/db/repl/multiapplier.h"
#include "mongo/db/repl/oplog_applier.h"
#include "mongo/db/repl/oplog_buffer.h"
@@ -144,6 +145,12 @@ public:
virtual Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) = 0;
/**
+ * Returns the current stored replica set "last vote" if there is one, or an error why there
+ * isn't.
+ */
+ virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const = 0;
+
+ /**
* Returns the replication journal listener.
*/
virtual JournalListener* getReplicationJournalListener() = 0;
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.cpp b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
index 00c924ff1ea..9bd60084aed 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
@@ -175,6 +175,11 @@ Status DataReplicatorExternalStateImpl::storeLocalConfigDocument(OperationContex
opCtx, config, false /* write oplog entry */);
}
+StatusWith<LastVote> DataReplicatorExternalStateImpl::loadLocalLastVoteDocument(
+ OperationContext* opCtx) const {
+ return _replicationCoordinatorExternalState->loadLocalLastVoteDocument(opCtx);
+}
+
JournalListener* DataReplicatorExternalStateImpl::getReplicationJournalListener() {
return _replicationCoordinatorExternalState->getReplicationJournalListener();
}
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.h b/src/mongo/db/repl/data_replicator_external_state_impl.h
index c408c484dc9..9cd2364927e 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.h
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.h
@@ -87,6 +87,8 @@ public:
Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) override;
+ StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const override;
+
JournalListener* getReplicationJournalListener() override;
protected:
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.cpp b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
index ddcfc701ca6..617f4f24098 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
@@ -147,5 +147,10 @@ JournalListener* DataReplicatorExternalStateMock::getReplicationJournalListener(
return nullptr;
}
+StatusWith<LastVote> DataReplicatorExternalStateMock::loadLocalLastVoteDocument(
+ OperationContext* opCtx) const {
+ return StatusWith<LastVote>(ErrorCodes::NoMatchingDocument, "mock");
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.h b/src/mongo/db/repl/data_replicator_external_state_mock.h
index 535ee513102..beb7ecdc28d 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.h
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.h
@@ -77,6 +77,8 @@ public:
Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) override;
+ StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const override;
+
JournalListener* getReplicationJournalListener() override;
// Task executor.
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 27b6786fe61..f77c5f88bdc 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -701,7 +701,16 @@ Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument(
// don't want to have this process interrupted due to us stepping down, since we
// want to be able to cast our vote for a new primary right away. Both the write's lock
// acquisition and the "waitUntilDurable" lock acquisition must be uninterruptible.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+ //
+ // It is not safe to take an uninterruptible lock during STARTUP2, so we only take this lock
+ // if we are primary or secondary. We do not have the RSTL but that is OK because we never
+ // move in to STARTUP2 from PRIMARY or SECONDARY, so the consequence of a stale state is
+ // only that we don't take an uninterruptible lock when we should.
+ auto* replCoord = ReplicationCoordinator::get(opCtx);
+
+ boost::optional<UninterruptibleLockGuard> noInterrupt;
+ if (replCoord->isInPrimaryOrSecondaryState_UNSAFE())
+ noInterrupt.emplace(opCtx->lockState());
Status status = writeConflictRetry(
opCtx,
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index f3b5ee9ad01..c047382f891 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -5654,6 +5654,8 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes(
LastVote lastVote{args.getTerm(), args.getCandidateIndex()};
Status status = _externalState->storeLocalLastVoteDocument(opCtx, lastVote);
if (!status.isOK()) {
+ // Note the topology coordinator has already advanced its last vote at this point,
+ // so this node will not be able to vote in this election; this is a "spoiled" vote.
LOGV2_ERROR(21428,
"replSetRequestVotes failed to store LastVote document",
"error"_attr = status);
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 3f32411c555..de8cda27137 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -256,6 +256,10 @@ public:
MONGO_UNREACHABLE;
}
+ StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx) const final {
+ MONGO_UNREACHABLE;
+ }
+
JournalListener* getReplicationJournalListener() final {
MONGO_UNREACHABLE;
}