diff options
author | Judah Schvimer <judah@mongodb.com> | 2017-01-10 10:56:30 -0500 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2017-01-23 10:51:31 -0500 |
commit | 3dc538cc7d564a4894f6c958eb60c64f91998d0e (patch) | |
tree | 3836c0957399643f344ac232222af82db893c23d | |
parent | b17d2587904571cb837a2be625a2d9d2ce296d89 (diff) | |
download | mongo-3dc538cc7d564a4894f6c958eb60c64f91998d0e.tar.gz |
SERVER-27582 initialize lastVote properly
(cherry picked from commit e5b42cc86c8b97c84c6bec4e9e41ea5f3064e2b4)
-rw-r--r-- | buildscripts/resmokeconfig/suites/replica_sets_legacy.yml | 2 | ||||
-rw-r--r-- | jstests/replsets/last_vote.js | 208 | ||||
-rw-r--r-- | src/mongo/db/repl/last_vote.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/repl/last_vote.h | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_external_state_impl.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl.h | 2 |
8 files changed, 241 insertions, 26 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml index 44b1b7a572d..5a75af572a0 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml @@ -15,6 +15,8 @@ selector: - jstests/replsets/read_majority_two_arbs.js # The combination of new bridges and PV0 can lead to an improper spanning tree in sync2.js. - jstests/replsets/sync2.js + # PV0 does not persist the last vote + - jstests/replsets/last_vote.js executor: js_test: diff --git a/jstests/replsets/last_vote.js b/jstests/replsets/last_vote.js new file mode 100644 index 00000000000..bbb38bd4af3 --- /dev/null +++ b/jstests/replsets/last_vote.js @@ -0,0 +1,208 @@ +// Tests that the last vote document is stored during elections and that it is loaded and used on +// startup. +// +// The test first runs a few elections and checks that the lastVote document is set correctly +// after each one. +// +// The test then restarts one node as a standalone, changes its last vote doc, and stops the +// other node. It then restarts the first node as a replicaset and manually runs +// replSetRequestVotes commands against it and checks that its response is correct. +// +// @tags: [requires_persistence] + +(function() { + "use strict"; + load("jstests/replsets/rslib.js"); // For getLatestOp() + + var name = "last_vote"; + var rst = new ReplSetTest({ + name: name, + nodes: 2, + }); + rst.startSet(); + + // Lower the election timeout to make the test run faster since it waits for multiple elections. + var conf = rst.getReplSetConfig(); + conf.settings = { + electionTimeoutMillis: 3000, + }; + rst.initiate(conf); + + const lastVoteNS = 'local.replset.election'; + + function getLastVoteDoc(conn) { + assert.eq( + conn.getCollection(lastVoteNS).find().itcount(), 1, 'last vote should be singleton'); + return conn.getCollection(lastVoteNS).findOne(); + } + + function setLastVoteDoc(conn, term, candidate) { + var newLastVote = { + term: term, + candidateIndex: rst.getNodeId(candidate) + }; + return assert.writeOK(conn.getCollection(lastVoteNS).update({}, newLastVote)); + } + + function assertNodeHasLastVote(node, term, candidate) { + var lastVoteDoc = getLastVoteDoc(node); + assert.eq(lastVoteDoc.term, term, node.host + " had wrong last vote term."); + assert.eq(lastVoteDoc.candidateIndex, + rst.getNodeId(candidate), + node.host + " had wrong last vote candidate."); + } + + function assertCurrentTerm(node, term) { + var stat = assert.commandWorked(node.adminCommand({replSetGetStatus: 1})); + assert.eq(stat.term, term, "Term changed when it should not have"); + } + + jsTestLog("Test that last vote is set on successive elections"); + + for (var i = 0; i < 3; i++) { + var primary = rst.getPrimary(); + var term = getLatestOp(primary).t; + jsTestLog("Last vote should have term: " + term + " and candidate: " + primary.host + + ", index: " + rst.getNodeId(primary)); + rst.nodes.forEach(function(node) { + assertNodeHasLastVote(node, term, primary); + }); + assert.throws(function() { + primary.adminCommand({replSetStepDown: 5, force: true}); + }); + rst.waitForState(primary, ReplSetTest.State.SECONDARY); + } + + var term = getLatestOp(rst.getPrimary()).t + 100; + + jsTestLog("Test that last vote is loaded on startup"); + jsTestLog("Reconfiguring cluster to make node 0 unelectable so it stays SECONDARY on restart"); + conf = rst.getReplSetConfigFromNode(); + conf.version++; + conf.members[0].priority = 0; + reconfig(rst, conf); + + jsTestLog("Restarting node 0 as a standalone"); + var node0 = rst.restart(0, {noReplSet: true}); // Restart as a standalone node. + jsTestLog("Stopping node 1"); + rst.stop(1); // Stop node 1 so that node 0 controls the term by itself. + jsTestLog("Setting the lastVote on node 0 to term: " + term + " candidate: " + + rst.nodes[0].host + ", index: 0"); + setLastVoteDoc(node0, term, rst.nodes[0]); + + jsTestLog("Restarting node 0 in replica set mode"); + node0 = rst.restart(0); // Restart in replSet mode again. + assertCurrentTerm(node0, term); + + jsTestLog("Manually sending node 0 a dryRun replSetRequestVotes command, " + + "expecting failure in old term"); + var response = assert.commandWorked(node0.adminCommand({ + replSetRequestVotes: 1, + setName: name, + dryRun: true, + term: term - 1, + candidateIndex: 1, + configVersion: 2, + lastCommittedOp: getLatestOp(node0) + })); + assert.eq(response.term, + term, + "replSetRequestVotes response had the wrong term: " + tojson(response)); + assert(!response.voteGranted, + "node granted vote in term before last vote doc: " + tojson(response)); + assert.eq(response.reason, + "candidate's term is lower than mine", + "replSetRequestVotes response had the wrong reason: " + tojson(response)); + assertNodeHasLastVote(node0, term, rst.nodes[0]); + assertCurrentTerm(node0, term); + + jsTestLog("Manually sending node 0 a dryRun replSetRequestVotes command in same term, " + + "expecting success but no recording of lastVote"); + response = assert.commandWorked(node0.adminCommand({ + replSetRequestVotes: 1, + setName: name, + dryRun: true, + term: term, + candidateIndex: 1, + configVersion: 2, + lastCommittedOp: getLatestOp(node0) + })); + assert.eq(response.term, + term, + "replSetRequestVotes response had the wrong term: " + tojson(response)); + assert(response.voteGranted, + "node failed to grant dryRun vote in term equal to last vote doc: " + tojson(response)); + assert.eq(response.reason, + "", + "replSetRequestVotes response had the wrong reason: " + tojson(response)); + assertNodeHasLastVote(node0, term, rst.nodes[0]); + assertCurrentTerm(node0, term); + + jsTestLog( + "Manually sending node 0 a replSetRequestVotes command, expecting failure in same term"); + response = assert.commandWorked(node0.adminCommand({ + replSetRequestVotes: 1, + setName: name, + dryRun: false, + term: term, + candidateIndex: 1, + configVersion: 2, + lastCommittedOp: getLatestOp(node0) + })); + assert.eq(response.term, + term, + "replSetRequestVotes response had the wrong term: " + tojson(response)); + assert(!response.voteGranted, + "node granted vote in term of last vote doc: " + tojson(response)); + assert.eq(response.reason, + "already voted for another candidate this term", + "replSetRequestVotes response had the wrong reason: " + tojson(response)); + assertNodeHasLastVote(node0, term, rst.nodes[0]); + assertCurrentTerm(node0, term); + + jsTestLog("Manually sending node 0 a replSetRequestVotes command, " + + "expecting success with a recording of the new lastVote"); + response = assert.commandWorked(node0.adminCommand({ + replSetRequestVotes: 1, + setName: name, + dryRun: false, + term: term + 1, + candidateIndex: 1, + configVersion: 2, + lastCommittedOp: getLatestOp(node0) + })); + assert.eq(response.term, + term + 1, + "replSetRequestVotes response had the wrong term: " + tojson(response)); + assert(response.voteGranted, + "node failed to grant vote in term greater than last vote doc: " + tojson(response)); + assert.eq(response.reason, + "", + "replSetRequestVotes response had the wrong reason: " + tojson(response)); + assertNodeHasLastVote(node0, term + 1, rst.nodes[1]); + assertCurrentTerm(node0, term + 1); + + jsTestLog("Manually sending node 0 a dryRun replSetRequestVotes command in future term, " + + "expecting success but no recording of lastVote"); + response = assert.commandWorked(node0.adminCommand({ + replSetRequestVotes: 1, + setName: name, + dryRun: true, + term: term + 2, + candidateIndex: 1, + configVersion: 2, + lastCommittedOp: getLatestOp(node0) + })); + assert.eq(response.term, + term + 2, + "replSetRequestVotes response had the wrong term: " + tojson(response)); + assert(response.voteGranted, + "node failed to grant vote in term greater than last vote doc: " + tojson(response)); + assert.eq(response.reason, + "", + "replSetRequestVotes response had the wrong reason: " + tojson(response)); + assertNodeHasLastVote(node0, term + 1, rst.nodes[1]); + assertCurrentTerm(node0, term + 2); + + rst.stopSet(); +})(); diff --git a/src/mongo/db/repl/last_vote.cpp b/src/mongo/db/repl/last_vote.cpp index 0b125624019..e7cc78ace9f 100644 --- a/src/mongo/db/repl/last_vote.cpp +++ b/src/mongo/db/repl/last_vote.cpp @@ -38,27 +38,31 @@ namespace { const std::string kCandidateIndexFieldName = "candidateIndex"; const std::string kTermFieldName = "term"; +const std::string kIdFieldName = "_id"; -const std::string kLegalFieldNames[] = { - kCandidateIndexFieldName, kTermFieldName, -}; +const std::string kLegalFieldNames[] = {kCandidateIndexFieldName, kTermFieldName, kIdFieldName}; } // namespace -Status LastVote::initialize(const BSONObj& argsObj) { - Status status = bsonCheckOnlyHasFields("VotedFar", argsObj, kLegalFieldNames); +LastVote::LastVote(long long term, long long candidateIndex) + : _candidateIndex(candidateIndex), _term(term) {} + +StatusWith<LastVote> LastVote::readFromLastVote(const BSONObj& doc) { + Status status = bsonCheckOnlyHasFields("LastVote", doc, kLegalFieldNames); if (!status.isOK()) return status; - status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term); + long long term; + status = bsonExtractIntegerField(doc, kTermFieldName, &term); if (!status.isOK()) return status; - status = bsonExtractIntegerField(argsObj, kCandidateIndexFieldName, &_candidateIndex); + long long candidateIndex; + status = bsonExtractIntegerField(doc, kCandidateIndexFieldName, &candidateIndex); if (!status.isOK()) return status; - return Status::OK(); + return LastVote{term, candidateIndex}; } void LastVote::setTerm(long long term) { diff --git a/src/mongo/db/repl/last_vote.h b/src/mongo/db/repl/last_vote.h index 0466ffccbec..db1a9ab75e0 100644 --- a/src/mongo/db/repl/last_vote.h +++ b/src/mongo/db/repl/last_vote.h @@ -28,17 +28,20 @@ #pragma once +#include "mongo/base/status_with.h" + namespace mongo { class BSONObj; class BSONObjBuilder; -class Status; namespace repl { class LastVote { public: - Status initialize(const BSONObj& argsObj); + LastVote(long long term, long long candidateIndex); + + static StatusWith<LastVote> readFromLastVote(const BSONObj& doc); long long getTerm() const; long long getCandidateIndex() const; @@ -48,8 +51,8 @@ public: BSONObj toBSON() const; private: - long long _candidateIndex = -1; - long long _term = -1; + long long _candidateIndex; + long long _term; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 0b951fbe41b..d8518b8d6a1 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -297,9 +297,7 @@ StatusWith<LastVote> ReplicationCoordinatorExternalStateImpl::loadLocalLastVoteD << "Did not find replica set lastVote document in " << lastVoteCollectionName); } - LastVote lastVote; - lastVote.initialize(lastVoteObj); - return StatusWith<LastVote>(lastVote); + return LastVote::readFromLastVote(lastVoteObj); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END( txn, "load replica set lastVote", lastVoteCollectionName); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ad00b7e86a8..7c30cd983dd 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -348,7 +348,13 @@ void ReplicationCoordinatorImpl::_updateLastVote(const LastVote& lastVote) { bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* txn) { StatusWith<LastVote> lastVote = _externalState->loadLocalLastVoteDocument(txn); if (!lastVote.isOK()) { - log() << "Did not find local voted for document at startup; " << lastVote.getStatus(); + if (lastVote.getStatus() == ErrorCodes::NoMatchingDocument) { + log() << "Did not find local voted for document at startup."; + } else { + severe() << "Error loading local voted for document at startup; " + << lastVote.getStatus(); + fassertFailedNoTrace(40367); + } } else { LastVote vote = lastVote.getValue(); _replExecutor.scheduleWork( @@ -3205,9 +3211,7 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes( } _replExecutor.wait(cbh.getValue()); if (!args.isADryRun() && response->getVoteGranted()) { - LastVote lastVote; - lastVote.setTerm(args.getTerm()); - lastVote.setCandidateIndex(args.getCandidateIndex()); + LastVote lastVote{args.getTerm(), args.getCandidateIndex()}; Status status = _externalState->storeLocalLastVoteDocument(txn, lastVote); if (!status.isOK()) { @@ -3628,9 +3632,7 @@ void ReplicationCoordinatorImpl::_resetElectionInfoOnProtocolVersionUpgrade( } invariant(cbData.txn); - LastVote lastVote; - lastVote.setTerm(OpTime::kInitialTerm); - lastVote.setCandidateIndex(-1); + LastVote lastVote{OpTime::kInitialTerm, -1}; auto status = _externalState->storeLocalLastVoteDocument(cbData.txn, lastVote); invariant(status.isOK()); }); diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index 97b545a1292..b52c5a124e9 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -188,9 +188,7 @@ void ReplicationCoordinatorImpl::_onDryRunComplete(long long originalTerm) { _topCoord->voteForMyselfV1(); // Store the vote in persistent storage. - LastVote lastVote; - lastVote.setTerm(originalTerm + 1); - lastVote.setCandidateIndex(_selfIndex); + LastVote lastVote{originalTerm + 1, _selfIndex}; auto cbStatus = _replExecutor.scheduleDBWork( [this, lastVote](const ReplicationExecutor::CallbackArgs& cbData) { diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h index 3645ebe1aaf..a6c1504829f 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.h +++ b/src/mongo/db/repl/topology_coordinator_impl.h @@ -460,7 +460,7 @@ private: } _voteLease; // V1 last vote info for elections - LastVote _lastVote; + LastVote _lastVote{OpTime::kInitialTerm, -1}; enum class ReadCommittedSupport { kUnknown, |