summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2015-09-04 16:51:13 -0400
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2015-09-10 16:06:50 -0400
commit5dbcd42903ceb92bdaa651d6ad60a5200188872e (patch)
treec3a18920b7cd9ed7b78c03f678d13d716e486fdc
parent1422edf755dba283ca300365977e379ddb75a4a7 (diff)
downloadmongo-5dbcd42903ceb92bdaa651d6ad60a5200188872e.tar.gz
SERVER-20271 Add election delay after seeing a new term
-rw-r--r--jstests/replsets/restore_term.js1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp52
-rw-r--r--src/mongo/db/repl/topology_coordinator.h2
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp14
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h4
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp6
7 files changed, 73 insertions, 9 deletions
diff --git a/jstests/replsets/restore_term.js b/jstests/replsets/restore_term.js
index 0e1e05483e0..35c6d8965e4 100644
--- a/jstests/replsets/restore_term.js
+++ b/jstests/replsets/restore_term.js
@@ -16,6 +16,7 @@ rst.startSet();
// Initiate the replset in protocol version 1.
var conf = rst.getReplSetConfig();
conf.settings = conf.settings || { };
+conf.settings.electionTimeoutMillis = 2000;
conf.protocolVersion = 1;
rst.initiate(conf);
rst.awaitSecondaryNodes();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 8bf591a18c3..a1cf1a49d25 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2955,7 +2955,8 @@ Status ReplicationCoordinatorImpl::updateTerm(long long term) {
}
bool ReplicationCoordinatorImpl::_updateTerm_incallback(long long term) {
- bool updated = _topCoord->updateTerm(term);
+ auto now = _replExecutor.now();
+ bool updated = _topCoord->updateTerm(term, now);
{
stdx::lock_guard<stdx::mutex> lock(_mutex);
_cachedTerm = _topCoord->getTerm();
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index 21795bc7651..531a94c0054 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -691,6 +691,58 @@ TEST_F(ReplCoordElectV1Test, ElectTermChangeDuringActualElection) {
ASSERT_EQUALS(1,
countLogLinesContaining("not becoming primary, we have been superceded already"));
}
+
+TEST_F(ReplCoordElectV1Test, LearningAboutNewTermDelaysElection) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
+ // Learned about a new term. The following HB won't trigger election during a timeout interval.
+ getReplCoord()->updateTerm(10);
+ simulateEnoughHeartbeatsForElectability();
+ stopCapturingLogMessages();
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT_EQ(
+ 2, countLogLinesContaining("because I stood up or learned about a new term too recently"));
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+
+ auto net = getNet();
+ auto startingTime = net->now();
+
+ // Wait until the node is able to run election again by replying received heartbeats.
+ // Updating the term will delay a new election for the duration of the election timeout,
+ // while the heartbeat interval is half of that, so we wait for two more rounds.
+ net->enterNetwork();
+ net->runUntil(startingTime + config.getElectionTimeoutPeriod() / 2);
+ net->exitNetwork();
+ simulateEnoughHeartbeatsForElectability();
+
+ net->enterNetwork();
+ net->runUntil(startingTime + config.getElectionTimeoutPeriod());
+ net->exitNetwork();
+ simulateEnoughHeartbeatsForElectability();
+
+ simulateSuccessfulV1Election();
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+}
}
}
}
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index e75049e5758..b54275ddc08 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -117,7 +117,7 @@ public:
* the value passed in as "term".
* Returns true if the local term value is changed.
*/
- virtual bool updateTerm(long long term) = 0;
+ virtual bool updateTerm(long long term, Date_t now) = 0;
////////////////////////////////////////////////////////////
//
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index f4a27b4183d..aa5a94c8f74 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -1368,8 +1368,14 @@ bool TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now, const OpTi
return false;
}
if (_electionSleepUntil > now) {
- LOG(2) << "Not standing for election before " << dateToISOStringLocal(_electionSleepUntil)
- << " because I stood too recently";
+ if (_rsConfig.getProtocolVersion() == 1) {
+ LOG(2) << "Not standing for election before "
+ << dateToISOStringLocal(_electionSleepUntil)
+ << " because I stood up or learned about a new term too recently";
+ } else {
+ LOG(2) << "Not standing for election before "
+ << dateToISOStringLocal(_electionSleepUntil) << " because I stood too recently";
+ }
return false;
}
// All checks passed, become a candidate and start election proceedings.
@@ -2289,10 +2295,12 @@ int TopologyCoordinatorImpl::getMaintenanceCount() const {
return _maintenanceModeCalls;
}
-bool TopologyCoordinatorImpl::updateTerm(long long term) {
+bool TopologyCoordinatorImpl::updateTerm(long long term, Date_t now) {
if (term <= _term) {
return false;
}
+ // Don't run election if we just stood up or learned about a new term.
+ _electionSleepUntil = now + _rsConfig.getElectionTimeoutPeriod();
_term = term;
return true;
}
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index cc1b20e4e04..8f589773b06 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -151,7 +151,7 @@ public:
virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const;
virtual int getMaintenanceCount() const;
virtual long long getTerm();
- virtual bool updateTerm(long long term);
+ virtual bool updateTerm(long long term, Date_t now);
virtual void setForceSyncSourceIndex(int index);
virtual HostAndPort chooseNewSyncSource(Date_t now, const Timestamp& lastTimestampApplied);
virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
@@ -419,6 +419,8 @@ private:
Date_t _stepDownUntil;
// A time before which this node will not stand for election.
+ // In protocol version 1, this is used to prevent running for election after seeing
+ // a new term.
Date_t _electionSleepUntil;
// The number of calls we have had to enter maintenance mode
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index 7ed93b286c6..d94c556e8d5 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -4913,7 +4913,7 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommands) {
<< "rs0"
<< "term" << 2 << "winnerId" << 30));
long long responseTerm;
- ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
+ ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now()));
ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
ASSERT_EQUALS(2, responseTerm);
@@ -4958,7 +4958,7 @@ TEST_F(TopoCoordTest, ProcessRequestVotesBadCommandsDryRun) {
0);
setSelfMemberState(MemberState::RS_SECONDARY);
// set term to 1
- ASSERT(getTopoCoord().updateTerm(1));
+ ASSERT(getTopoCoord().updateTerm(1, now()));
// and make sure we voted in term 1
ReplSetRequestVotesArgs argsForRealVote;
argsForRealVote.initialize(BSON("replSetRequestVotes"
@@ -5071,7 +5071,7 @@ TEST_F(TopoCoordTest, ProcessDeclareElectionWinner) {
<< "rs0"
<< "term" << 2 << "winnerId" << 30));
long long responseTerm = -1;
- ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
+ ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm(), now()));
ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
ASSERT_EQUALS(2, responseTerm);