summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp8
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp13
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp18
3 files changed, 24 insertions, 15 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index df3fc0c82fd..d37f370a72e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -126,7 +126,9 @@ MONGO_FAIL_POINT_DEFINE(hangWhileWaitingForHelloResponse);
MONGO_FAIL_POINT_DEFINE(skipDurableTimestampUpdates);
// Will cause a reconfig to hang after completing the config quorum check.
MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
-// Will cause signal drain complete to hang after reconfig
+// Will cause signal drain complete to hang before reconfig.
+MONGO_FAIL_POINT_DEFINE(hangBeforeReconfigOnDrainComplete);
+// Will cause signal drain complete to hang after reconfig.
MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete);
// Number of times we tried to go live as a secondary.
@@ -1135,6 +1137,10 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
lk.unlock();
if (needBumpConfigTerm) {
+ if (MONGO_unlikely(hangBeforeReconfigOnDrainComplete.shouldFail())) {
+ LOGV2(5726200, "Hanging due to hangBeforeReconfigOnDrainComplete failpoint");
+ hangBeforeReconfigOnDrainComplete.pauseWhileSet(opCtx);
+ }
// We re-write the term but keep version the same. This conceptually a no-op
// in the config consensus group, analogous to writing a new oplog entry
// in Raft log state machine on step up.
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index d46bf2b3c9a..869de681227 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -3101,9 +3101,9 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
return;
}
- if (args.getConfigVersionAndTerm() != _rsConfig.getConfigVersionAndTerm()) {
+ if (args.getConfigVersionAndTerm() < _rsConfig.getConfigVersionAndTerm()) {
response->setVoteGranted(false);
- response->setReason("candidate's config with {} differs from mine with {}"_format(
+ response->setReason("candidate's config with {} is older than mine with {}"_format(
args.getConfigVersionAndTerm(), _rsConfig.getConfigVersionAndTerm()));
} else if (args.getTerm() < _term) {
response->setVoteGranted(false);
@@ -3125,8 +3125,15 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
_rsConfig.getMemberAt(_lastVote.getCandidateIndex()).getHostAndPort(),
_lastVote.getTerm()));
} else {
+ bool isSameConfig = args.getConfigVersionAndTerm() == _rsConfig.getConfigVersionAndTerm();
int betterPrimary = _findHealthyPrimaryOfEqualOrGreaterPriority(args.getCandidateIndex());
- if (_selfConfig().isArbiter() && betterPrimary >= 0) {
+ // Do not grant vote if we are arbiter and can see a healthy primary of greater or equal
+ // priority, to prevent primary flapping when there are two nodes that can't talk to each
+ // other but we that can talk to both as arbiter. We only do this if the voter's config
+ // is same as ours, otherwise the primary information might be stale and we might not be
+ // arbiter in the candidate's newer config. We might also hit an invariant described in
+ // SERVER-46387 without the check for same config.
+ if (isSameConfig && _selfConfig().isArbiter() && betterPrimary >= 0) {
response->setVoteGranted(false);
response->setReason(
"can see a healthy primary ({}) of equal or greater priority"_format(
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index 1be2b970576..476adf62f90 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -3491,7 +3491,7 @@ public:
TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigVersionIsLower) {
auto response = testWithArbiter(false, 1, 2);
ASSERT_EQUALS(
- "candidate's config with {version: 1, term: 2} differs from mine with"
+ "candidate's config with {version: 1, term: 2} is older than mine with"
" {version: 2, term: 2}",
response.getReason());
}
@@ -3499,7 +3499,7 @@ TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigVersionIs
TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigVersionIsLower) {
auto response = testWithArbiter(true, 1, 2);
ASSERT_EQUALS(
- "candidate's config with {version: 1, term: 2} differs from mine with"
+ "candidate's config with {version: 1, term: 2} is older than mine with"
" {version: 2, term: 2}",
response.getReason());
}
@@ -3507,7 +3507,7 @@ TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigVersionIsL
TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigTermIsLower) {
auto response = testWithArbiter(false, 2, 1);
ASSERT_EQUALS(
- "candidate's config with {version: 2, term: 1} differs from mine with"
+ "candidate's config with {version: 2, term: 1} is older than mine with"
" {version: 2, term: 2}",
response.getReason());
}
@@ -3515,7 +3515,7 @@ TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigTermIsLow
TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigTermIsLower) {
auto response = testWithArbiter(true, 2, 1);
ASSERT_EQUALS(
- "candidate's config with {version: 2, term: 1} differs from mine with"
+ "candidate's config with {version: 2, term: 1} is older than mine with"
" {version: 2, term: 2}",
response.getReason());
}
@@ -3683,7 +3683,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionIsLower) {
getTopoCoord().processReplSetRequestVotes(args, &response);
ASSERT_EQUALS(
- "candidate's config with {version: 0, term: 1} differs from mine with {version: 1, term: "
+ "candidate's config with {version: 0, term: 1} is older than mine with {version: 1, term: "
"1}",
response.getReason());
ASSERT_EQUALS(1, response.getTerm());
@@ -3738,7 +3738,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) {
ASSERT_FALSE(response.getVoteGranted());
}
-TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsHigherButConfigVersionIsLower) {
+TEST_F(TopoCoordTest, NodeGrantsVoteWhenTermIsHigherButConfigVersionIsLower) {
updateConfig(BSON("_id"
<< "rs0"
<< "version" << 2 << "term" << 1LL << "members"
@@ -3769,11 +3769,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsHigherButConfigVersionIsLowe
getTopoCoord().processReplSetRequestVotes(args, &response);
// Candidates config(t, v) is (2, 1) and our config is (1, 2). Even though the candidate's
// config version is lower, we grant our vote because the candidate's config term is higher.
- ASSERT_FALSE(response.getVoteGranted());
- ASSERT_EQ(
- "candidate's config with {version: 1, term: 2} differs from mine with {version: 2, term: "
- "1}",
- response.getReason());
+ ASSERT_TRUE(response.getVoteGranted());
}
TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) {