summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2018-05-16 18:44:59 -0400
committerWilliam Schultz <william.schultz@mongodb.com>2018-05-16 18:44:59 -0400
commit6ccdeffc5aa60822d352ac389cb9e8e5647c242d (patch)
treeeb482f26e94c6a6ccfdebe173d537aa6e1fc7dc2
parent0a03faba456a3acbbaabd6ad5694f98acfdf50bd (diff)
downloadmongo-6ccdeffc5aa60822d352ac389cb9e8e5647c242d.tar.gz
SERVER-34682 Old primary should be able to store last vote when casting vote in new term
This patch allows a current primary to step down and cast its vote for a new primary in a higher term in one step. This allows for a new candidate to become elected faster if it relies on the vote of an old primary. Previously, when learning of a higher term via a replSetRequestVote request, a primary would step down, causing us to interrupt the processing of the vote request when we try to acquire a lock to write down our LastVote document with an InterruptedDueToReplStateChange code. This patch allows us to ignore these interrupts, so we can proceed with processing the vote request. Additionally, this patch also asserts that the vote response is actually ok, so that we don't erroneously send a voteGranted=yes response even if we failed to persist our last vote document.
-rw-r--r--jstests/replsets/primary_casts_vote_on_stepdown.js35
-rw-r--r--src/mongo/db/repl/repl_set_request_votes.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp61
5 files changed, 107 insertions, 2 deletions
diff --git a/jstests/replsets/primary_casts_vote_on_stepdown.js b/jstests/replsets/primary_casts_vote_on_stepdown.js
new file mode 100644
index 00000000000..6271a353ea9
--- /dev/null
+++ b/jstests/replsets/primary_casts_vote_on_stepdown.js
@@ -0,0 +1,35 @@
+/**
+ * In a 2 (or 3) node replica set, a new candidate should be able to overtake a current primary with
+ * a single round of election votes. This is enabled by the ability of a current primary to both
+ * step down *and* cast its vote for a new primary in a single step, in response to a vote request
+ * from a higher term than its own. This test verifies that an old primary is able to do this
+ * successfully.
+ */
+(function() {
+ "use strict";
+
+ let name = "primary_casts_vote_on_stepdown";
+ let replTest = new ReplSetTest({name: name, nodes: 2});
+
+ let nodes = replTest.startSet();
+ replTest.initiate();
+
+ // Make sure node 0 is initially primary, and then step up node 1 and make sure it is able to
+ // become primary in one election, gathering the vote of node 0, who will be forced to step
+ // down in the act of granting its vote to node 1.
+ jsTestLog("Make sure node 0 (" + nodes[0] + ") is primary.");
+ replTest.waitForState(nodes[0], ReplSetTest.State.PRIMARY);
+ let res = assert.commandWorked(nodes[0].adminCommand("replSetGetStatus"));
+ let firstPrimaryTerm = res.term;
+
+ jsTestLog("Stepping up node 1 (" + nodes[1] + ").");
+ replTest.stepUp(nodes[1]);
+ replTest.waitForState(nodes[1], ReplSetTest.State.PRIMARY);
+ // The election should have happened in a single attempt, so the term of the new primary should
+ // be exactly 1 greater than the old primary.
+ res = assert.commandWorked(nodes[1].adminCommand("replSetGetStatus"));
+ assert.eq(firstPrimaryTerm + 1, res.term);
+
+ replTest.stopSet();
+
+})();
diff --git a/src/mongo/db/repl/repl_set_request_votes.cpp b/src/mongo/db/repl/repl_set_request_votes.cpp
index 6a62caad7ee..f0cd9a36b60 100644
--- a/src/mongo/db/repl/repl_set_request_votes.cpp
+++ b/src/mongo/db/repl/repl_set_request_votes.cpp
@@ -61,8 +61,10 @@ private:
ReplSetRequestVotesResponse response;
status = ReplicationCoordinator::get(opCtx)->processReplSetRequestVotes(
opCtx, parsedArgs, &response);
+ uassertStatusOK(status);
+
response.addToBSON(&result);
- return CommandHelpers::appendCommandStatusNoThrow(result, status);
+ return true;
}
} cmdReplSetRequestVotes;
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index c286c37f71e..35ce0c99470 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -715,7 +715,10 @@ public:
/*
* Handles an incoming replSetRequestVotes command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ *
+ * Populates the given 'response' object with the result of the request. If there is a failure
+ * processing the vote request, returns an error status. If an error is returned, the value of
+ * the populated 'response' object is invalid.
*/
virtual Status processReplSetRequestVotes(OperationContext* opCtx,
const ReplSetRequestVotesArgs& args,
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index e67d7fb594f..557a9c67282 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -535,6 +535,10 @@ Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument(
try {
Status status =
writeConflictRetry(opCtx, "save replica set lastVote", lastVoteCollectionName, [&] {
+ // If we are casting a vote in a new election immediately after stepping down, we
+ // don't want to have this process interrupted due to us stepping down, since we
+ // want to be able to cast our vote for a new primary right away.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
Lock::DBLock dbWriteLock(opCtx, lastVoteDatabaseName, MODE_X);
// If there is no last vote document, we want to store one. Otherwise, we only want
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 01cff031b59..2835a550339 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -5512,6 +5512,67 @@ TEST_F(ReplCoordTest, NodeDoesNotStoreDryRunVotes) {
ASSERT_EQUALS(lastVote.getValue().getCandidateIndex(), 0);
}
+TEST_F(ReplCoordTest, NodeFailsVoteRequestIfItFailsToStoreLastVote) {
+ // Set up a 2-node replica set config.
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id"
+ << 1))),
+ HostAndPort("node1", 12345));
+ auto time = OpTimeWithTermOne(100, 1);
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
+ simulateSuccessfulV1Election();
+
+ // Get our current term, as primary.
+ ASSERT(getReplCoord()->getMemberState().primary());
+ auto initTerm = getReplCoord()->getTerm();
+
+ auto opCtx = makeOperationContext();
+
+ ReplSetRequestVotesArgs args;
+ ASSERT_OK(args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "mySet"
+ << "term"
+ << initTerm + 1 // term of new candidate.
+ << "candidateIndex"
+ << 1LL
+ << "configVersion"
+ << 2LL
+ << "dryRun"
+ << false
+ << "lastCommittedOp"
+ << time.asOpTime().toBSON())));
+ ReplSetRequestVotesResponse response;
+
+ // Simulate a failure to write the 'last vote' document. The specific error code isn't
+ // important.
+ getExternalState()->setStoreLocalLastVoteDocumentStatus(
+ Status(ErrorCodes::OutOfDiskSpace, "failed to write last vote document"));
+
+ // Make sure the vote request fails. If an error is returned, the filled out response is
+ // invalid, so we do not check its contents.
+ auto status = getReplCoord()->processReplSetRequestVotes(opCtx.get(), args, &response);
+ ASSERT_EQ(ErrorCodes::OutOfDiskSpace, status.code());
+
+ auto lastVote = unittest::assertGet(getExternalState()->loadLocalLastVoteDocument(opCtx.get()));
+
+ // The last vote doc should store the vote of the first election, not the one we failed to cast
+ // our vote in.
+ ASSERT_EQUALS(lastVote.getTerm(), initTerm);
+ ASSERT_EQUALS(lastVote.getCandidateIndex(), 0);
+}
+
// TODO(schwerin): Unit test election id updating
} // namespace
} // namespace repl