diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2017-01-11 18:36:16 -0500 |
---|---|---|
committer | Jack Mulrow <jack.mulrow@mongodb.com> | 2017-01-25 10:43:56 -0500 |
commit | ef3e1bc78e997f0d9f22f45aeb1d8e3b6ac14a14 (patch) | |
tree | 96e8d10d589c4c739fe95f487910426d105add22 | |
parent | 190538da7580eee02ab36993c426bf9b94005247 (diff) | |
download | mongo-r3.2.12-rc0.tar.gz |
SERVER-27125 Arbiters in PV1 vote no if they can see a healthy primary of equal or greater priority to the candidater3.2.12-rc0r3.2.12r3.2.11
(cherry picked from commit 97718a410d3d40f5c3f01dfae76e7ddd910897af)
5 files changed, 124 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml index 0fcf3cdfe7a..70ef1cada5e 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml @@ -19,6 +19,8 @@ selector: - jstests/replsets/last_vote.js # PV0's w:majority guarantees aren't strong enough for this test to pass. - jstests/replsets/write_concern_after_stepdown_and_stepup.js + # This test expects the server to log a PV1-only vote-not-granted reason + - jstests/replsets/no_flapping_during_network_partition.js executor: js_test: diff --git a/jstests/replsets/no_flapping_during_network_partition.js b/jstests/replsets/no_flapping_during_network_partition.js new file mode 100644 index 00000000000..1fb987aef99 --- /dev/null +++ b/jstests/replsets/no_flapping_during_network_partition.js @@ -0,0 +1,60 @@ +/* + * Test that arbiters vote no in elections if they can see a healthy primary of equal or greater + * priority to the candidate, preventing flapping during certain kinds of network partitions. + * + * 1. Initiate a 3-node replica set with one arbiter (PSA) and a higher priority primary. + * 2. Create a network partition between the primary and secondary. + * 3. Wait long enough for the secondary to call for an election. + * 4. Verify the primary and secondary did not change. + * 5. Heal the partition. + * 6. Verify the primary and secondary did not change and are in the initial term. + */ + +(function() { + "use strict"; + + load("jstests/libs/check_log.js"); + + var name = "no_flapping_during_network_partition"; + + var replTest = new ReplSetTest({name: name, nodes: 3, useBridge: true}); + var nodes = replTest.startSet(); + var config = replTest.getReplSetConfig(); + config.members[0].priority = 5; + config.members[2].arbiterOnly = true; + config.settings = { + electionTimeoutMillis: 2000 + }; + replTest.initiate(config); + + function getTerm(node) { + return node.adminCommand({replSetGetStatus: 1}).term; + } + + replTest.waitForState(nodes[0], ReplSetTest.State.PRIMARY); + + var primary = replTest.getPrimary(); + var secondary = replTest.getSecondary(); + var initialTerm = getTerm(primary); + + jsTestLog("Create a network partition between the primary and secondary."); + primary.disconnect(secondary); + + jsTestLog("Wait long enough for the secondary to call for an election."); + checkLog.contains(secondary, "can see a healthy primary of equal or greater priority"); + + jsTestLog("Verify the primary and secondary do not change during the partition."); + assert.eq(primary, replTest.getPrimary()); + assert.eq(secondary, replTest.getSecondary()); + + jsTestLog("Heal the partition."); + primary.reconnect(secondary); + + jsTestLog("Verify the primary and secondary did not change and are in the initial term."); + assert.eq(primary, replTest.getPrimary()); + assert.eq(secondary, replTest.getSecondary()); + assert.eq(initialTerm, getTerm(primary)); + assert.eq(initialTerm, getTerm(secondary)); + + replTest.stopSet(); +})(); diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp index b235cbf5482..56978bc18a7 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl.cpp @@ -1361,6 +1361,23 @@ bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const { return vUp * 2 > _rsConfig.getTotalVotingMembers(); } +bool TopologyCoordinatorImpl::_canSeeHealthyPrimaryOfEqualOrGreaterPriority( + const int candidateIndex) const { + const double candidatePriority = _rsConfig.getMemberAt(candidateIndex).getPriority(); + for (auto it = _hbdata.begin(); it != _hbdata.end(); ++it) { + if (!it->up() || it->getState() != MemberState::RS_PRIMARY) { + continue; + } + const int itIndex = indexOfIterator(_hbdata, it); + const double priority = _rsConfig.getMemberAt(itIndex).getPriority(); + if (itIndex != candidateIndex && priority >= candidatePriority) { + return true; + } + } + + return false; +} + bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect( const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const { const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied); @@ -2461,6 +2478,10 @@ void TopologyCoordinatorImpl::processReplSetRequestVotes(const ReplSetRequestVot } else if (!args.isADryRun() && _lastVote.getTerm() == args.getTerm()) { response->setVoteGranted(false); response->setReason("already voted for another candidate this term"); + } else if (_selfConfig().isArbiter() && + _canSeeHealthyPrimaryOfEqualOrGreaterPriority(args.getCandidateIndex())) { + response->setVoteGranted(false); + response->setReason("can see a healthy primary of equal or greater priority"); } else { if (!args.isADryRun()) { _lastVote.setTerm(args.getTerm()); diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h index a6c1504829f..4361f27f3bc 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.h +++ b/src/mongo/db/repl/topology_coordinator_impl.h @@ -308,6 +308,10 @@ private: // Sees if a majority number of votes are held by members who are currently "up" bool _aMajoritySeemsToBeUp() const; + // Returns true if the node can see a healthy primary of equal or greater priority to the + // candidate. + bool _canSeeHealthyPrimaryOfEqualOrGreaterPriority(const int candidateIndex) const; + // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify // for an election bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime, diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp index 8d3ae2b6516..e35499c7797 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp @@ -5513,6 +5513,43 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenConfigVersionDoesNotMatch) { ASSERT_FALSE(response.getVoteGranted()); } +TEST_F(TopoCoordTest, ArbiterDoesNotGrantVoteWhenItCanSeeAHealthyPrimaryOfEqualOrGreaterPriority) { + updateConfig(BSON("_id" + << "rs0" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 10 << "host" + << "hself" + << "arbiterOnly" << true) + << BSON("_id" << 20 << "host" + << "h2" + << "priority" << 5) << BSON("_id" << 30 << "host" + << "h3"))), + 0); + heartbeatFromMember(HostAndPort("h2"), + "rs0", + MemberState::RS_PRIMARY, + OpTime(Timestamp(0, 0), 0), + Milliseconds(300)); + heartbeatFromMember(HostAndPort("h3"), + "rs0", + MemberState::RS_SECONDARY, + OpTime(Timestamp(0, 0), 0), + Milliseconds(300)); + + ReplSetRequestVotesArgs args; + args.initialize(BSON("replSetRequestVotes" << 1 << "setName" + << "rs0" + << "term" << 1LL << "candidateIndex" << 2LL + << "configVersion" << 1LL << "lastCommittedOp" + << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); + ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; + + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); + ASSERT_EQUALS("can see a healthy primary of equal or greater priority", response.getReason()); + ASSERT_FALSE(response.getVoteGranted()); +} + TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsStale) { updateConfig(BSON("_id" << "rs0" |