summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2017-01-11 18:36:16 -0500
committerJack Mulrow <jack.mulrow@mongodb.com>2017-01-25 10:43:56 -0500
commitef3e1bc78e997f0d9f22f45aeb1d8e3b6ac14a14 (patch)
tree96e8d10d589c4c739fe95f487910426d105add22
parent190538da7580eee02ab36993c426bf9b94005247 (diff)
downloadmongo-ef3e1bc78e997f0d9f22f45aeb1d8e3b6ac14a14.tar.gz
SERVER-27125 Arbiters in PV1 vote no if they can see a healthy primary of equal or greater priority to the candidater3.2.12-rc0r3.2.12r3.2.11
(cherry picked from commit 97718a410d3d40f5c3f01dfae76e7ddd910897af)
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_legacy.yml2
-rw-r--r--jstests/replsets/no_flapping_during_network_partition.js60
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp21
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h4
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp37
5 files changed, 124 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
index 0fcf3cdfe7a..70ef1cada5e 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
@@ -19,6 +19,8 @@ selector:
- jstests/replsets/last_vote.js
# PV0's w:majority guarantees aren't strong enough for this test to pass.
- jstests/replsets/write_concern_after_stepdown_and_stepup.js
+ # This test expects the server to log a PV1-only vote-not-granted reason
+ - jstests/replsets/no_flapping_during_network_partition.js
executor:
js_test:
diff --git a/jstests/replsets/no_flapping_during_network_partition.js b/jstests/replsets/no_flapping_during_network_partition.js
new file mode 100644
index 00000000000..1fb987aef99
--- /dev/null
+++ b/jstests/replsets/no_flapping_during_network_partition.js
@@ -0,0 +1,60 @@
+/*
+ * Test that arbiters vote no in elections if they can see a healthy primary of equal or greater
+ * priority to the candidate, preventing flapping during certain kinds of network partitions.
+ *
+ * 1. Initiate a 3-node replica set with one arbiter (PSA) and a higher priority primary.
+ * 2. Create a network partition between the primary and secondary.
+ * 3. Wait long enough for the secondary to call for an election.
+ * 4. Verify the primary and secondary did not change.
+ * 5. Heal the partition.
+ * 6. Verify the primary and secondary did not change and are in the initial term.
+ */
+
+(function() {
+ "use strict";
+
+ load("jstests/libs/check_log.js");
+
+ var name = "no_flapping_during_network_partition";
+
+ var replTest = new ReplSetTest({name: name, nodes: 3, useBridge: true});
+ var nodes = replTest.startSet();
+ var config = replTest.getReplSetConfig();
+ config.members[0].priority = 5;
+ config.members[2].arbiterOnly = true;
+ config.settings = {
+ electionTimeoutMillis: 2000
+ };
+ replTest.initiate(config);
+
+ function getTerm(node) {
+ return node.adminCommand({replSetGetStatus: 1}).term;
+ }
+
+ replTest.waitForState(nodes[0], ReplSetTest.State.PRIMARY);
+
+ var primary = replTest.getPrimary();
+ var secondary = replTest.getSecondary();
+ var initialTerm = getTerm(primary);
+
+ jsTestLog("Create a network partition between the primary and secondary.");
+ primary.disconnect(secondary);
+
+ jsTestLog("Wait long enough for the secondary to call for an election.");
+ checkLog.contains(secondary, "can see a healthy primary of equal or greater priority");
+
+ jsTestLog("Verify the primary and secondary do not change during the partition.");
+ assert.eq(primary, replTest.getPrimary());
+ assert.eq(secondary, replTest.getSecondary());
+
+ jsTestLog("Heal the partition.");
+ primary.reconnect(secondary);
+
+ jsTestLog("Verify the primary and secondary did not change and are in the initial term.");
+ assert.eq(primary, replTest.getPrimary());
+ assert.eq(secondary, replTest.getSecondary());
+ assert.eq(initialTerm, getTerm(primary));
+ assert.eq(initialTerm, getTerm(secondary));
+
+ replTest.stopSet();
+})();
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index b235cbf5482..56978bc18a7 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -1361,6 +1361,23 @@ bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const {
return vUp * 2 > _rsConfig.getTotalVotingMembers();
}
+bool TopologyCoordinatorImpl::_canSeeHealthyPrimaryOfEqualOrGreaterPriority(
+ const int candidateIndex) const {
+ const double candidatePriority = _rsConfig.getMemberAt(candidateIndex).getPriority();
+ for (auto it = _hbdata.begin(); it != _hbdata.end(); ++it) {
+ if (!it->up() || it->getState() != MemberState::RS_PRIMARY) {
+ continue;
+ }
+ const int itIndex = indexOfIterator(_hbdata, it);
+ const double priority = _rsConfig.getMemberAt(itIndex).getPriority();
+ if (itIndex != candidateIndex && priority >= candidatePriority) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(
const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const {
const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied);
@@ -2461,6 +2478,10 @@ void TopologyCoordinatorImpl::processReplSetRequestVotes(const ReplSetRequestVot
} else if (!args.isADryRun() && _lastVote.getTerm() == args.getTerm()) {
response->setVoteGranted(false);
response->setReason("already voted for another candidate this term");
+ } else if (_selfConfig().isArbiter() &&
+ _canSeeHealthyPrimaryOfEqualOrGreaterPriority(args.getCandidateIndex())) {
+ response->setVoteGranted(false);
+ response->setReason("can see a healthy primary of equal or greater priority");
} else {
if (!args.isADryRun()) {
_lastVote.setTerm(args.getTerm());
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index a6c1504829f..4361f27f3bc 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -308,6 +308,10 @@ private:
// Sees if a majority number of votes are held by members who are currently "up"
bool _aMajoritySeemsToBeUp() const;
+ // Returns true if the node can see a healthy primary of equal or greater priority to the
+ // candidate.
+ bool _canSeeHealthyPrimaryOfEqualOrGreaterPriority(const int candidateIndex) const;
+
// Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify
// for an election
bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime,
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index 8d3ae2b6516..e35499c7797 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -5513,6 +5513,43 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenConfigVersionDoesNotMatch) {
ASSERT_FALSE(response.getVoteGranted());
}
+TEST_F(TopoCoordTest, ArbiterDoesNotGrantVoteWhenItCanSeeAHealthyPrimaryOfEqualOrGreaterPriority) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 20 << "host"
+ << "h2"
+ << "priority" << 5) << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+
+ ReplSetRequestVotesArgs args;
+ args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateIndex" << 2LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response;
+ OpTime lastAppliedOpTime;
+
+ getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
+ ASSERT_EQUALS("can see a healthy primary of equal or greater priority", response.getReason());
+ ASSERT_FALSE(response.getVoteGranted());
+}
+
TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsStale) {
updateConfig(BSON("_id"
<< "rs0"