summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Alabi <alabidan@gmail.com>2015-06-01 10:30:42 -0400
committerDaniel Alabi <alabidan@gmail.com>2015-06-02 09:30:31 -0400
commitc5639bdc8ffbb2c7b3838a093f06060cc2cce568 (patch)
tree6afdc920ee1201dba82a4c4731fb7ec806a58662
parent0a7d47f9076bf6070b66462715ca225eca2cdb6b (diff)
downloadmongo-c5639bdc8ffbb2c7b3838a093f06060cc2cce568.tar.gz
SERVER-18280 ReplicaSetMonitor should use electionId to avoid talking to old primaries
(cherry picked from commit b8c1c49b36abfa2c49cb15a6f1ac4cadaf2a5648)
-rw-r--r--src/mongo/client/replica_set_monitor.cpp27
-rw-r--r--src/mongo/client/replica_set_monitor.h7
-rw-r--r--src/mongo/client/replica_set_monitor_internal.h2
-rw-r--r--src/mongo/client/replica_set_monitor_test.cpp182
4 files changed, 214 insertions, 4 deletions
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp
index 66af1596d32..da7f2465be0 100644
--- a/src/mongo/client/replica_set_monitor.cpp
+++ b/src/mongo/client/replica_set_monitor.cpp
@@ -572,8 +572,16 @@ namespace {
return;
}
- if (reply.isMaster)
- receivedIsMasterFromMaster(reply);
+ if (reply.isMaster) {
+ const bool stalePrimary = !receivedIsMasterFromMaster(reply);
+ if (stalePrimary) {
+ log() << "node " << from << " believes it is primary, but its election id of "
+ << reply.electionId << " is older than the most recent election id"
+ << " for this set, " << _set->maxElectionId;
+ failedHost(from);
+ return;
+ }
+ }
if (_scan->foundUpMaster) {
// We only update a Node if a master has confirmed it is in the set.
@@ -649,9 +657,16 @@ namespace {
return scan;
}
- void Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) {
+ bool Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) {
invariant(reply.isMaster);
+ if (reply.electionId.isSet()) {
+ if (_set->maxElectionId.isSet() && _set->maxElectionId.compare(reply.electionId) > 0) {
+ return false;
+ }
+ _set->maxElectionId = reply.electionId;
+ }
+
// Mark all nodes as not master. We will mark ourself as master before releasing the lock.
// NOTE: we use a "last-wins" policy if multiple hosts claim to be master.
for (size_t i = 0; i < _set->nodes.size(); i++) {
@@ -721,6 +736,8 @@ namespace {
_scan->foundUpMaster = true;
_set->lastSeenMaster = reply.host;
+
+ return true;
}
void Refresher::receivedIsMasterBeforeFoundMaster(const IsMasterReply& reply) {
@@ -814,6 +831,10 @@ namespace {
// hidden nodes can't be master, even if they claim to be.
isMaster = !hidden && raw["ismaster"].trueValue();
+ if (isMaster && raw.hasField("electionId")) {
+ electionId = raw["electionId"].OID();
+ }
+
const string primaryString = raw["primary"].str();
primary = primaryString.empty() ? HostAndPort() : HostAndPort(primaryString);
diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h
index 3c768d60c04..43aa1b8a7b6 100644
--- a/src/mongo/client/replica_set_monitor.h
+++ b/src/mongo/client/replica_set_monitor.h
@@ -306,12 +306,17 @@ namespace mongo {
static ScanStatePtr startNewScan(const SetState* set);
private:
+
/**
+ * First, checks that the "reply" is not from a stale primary by
+ * comparing the electionId of "reply" to the maxElectionId recorded by the SetState.
+ * Returns true if "reply" belongs to a non-stale primary.
+ *
* Updates _set and _scan based on set-membership information from a master.
* Applies _scan->unconfirmedReplies to confirmed nodes.
* Does not update this host's node in _set->nodes.
*/
- void receivedIsMasterFromMaster(const IsMasterReply& reply);
+ bool receivedIsMasterFromMaster(const IsMasterReply& reply);
/**
* Adjusts the _scan work queue based on information from this host.
diff --git a/src/mongo/client/replica_set_monitor_internal.h b/src/mongo/client/replica_set_monitor_internal.h
index d69f90afc1f..5927c089bdf 100644
--- a/src/mongo/client/replica_set_monitor_internal.h
+++ b/src/mongo/client/replica_set_monitor_internal.h
@@ -67,6 +67,7 @@ namespace mongo {
bool isMaster;
bool secondary;
bool hidden;
+ OID electionId; // Set if this isMaster reply is from the primary
HostAndPort primary; // empty if not present
std::set<HostAndPort> normalHosts; // both "hosts" and "passives"
BSONObj tags;
@@ -174,6 +175,7 @@ namespace mongo {
const std::string name; // safe to read outside lock since it is const
int consecutiveFailedScans;
std::set<HostAndPort> seedNodes; // updated whenever a master reports set membership changes
+ OID maxElectionId; // largest election id observed by this ReplicaSetMonitor
HostAndPort lastSeenMaster; // empty if we have never seen a master. can be same as current
Nodes nodes; // maintained sorted and unique by host
ScanStatePtr currentScan; // NULL if no scan in progress
diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp
index 426f49f6ecb..1a0fa3560a1 100644
--- a/src/mongo/client/replica_set_monitor_test.cpp
+++ b/src/mongo/client/replica_set_monitor_test.cpp
@@ -784,3 +784,185 @@ TEST(ReplicaSetMonitorTests, OutOfBandFailedHost) {
}
}
}
+
+// Newly elected primary with electionId >= maximum electionId seen by the Refresher
+TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) {
+ SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet);
+ Refresher refresher(state);
+
+ set<HostAndPort> seen;
+
+ // get all hosts to contact first
+ for (size_t i = 0; i != basicSeeds.size(); ++i) {
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+ ASSERT(!seen.count(ns.host));
+ seen.insert(ns.host);
+ }
+
+ const ReadPreferenceSetting primaryOnly(ReadPreference_PrimaryOnly, TagSet());
+
+ // mock all replies
+ for (size_t i = 0; i != basicSeeds.size(); ++i) {
+ // All hosts to talk to are already dispatched, but no reply has been received
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::WAIT);
+ ASSERT(ns.host.empty());
+
+ refresher.receivedIsMaster(basicSeeds[i],
+ -1,
+ BSON("setName" << "name"
+ << "ismaster" << true
+ << "secondary" << false
+ << "hosts" << BSON_ARRAY("a" << "b" << "c")
+ << "electionId" << OID::gen()
+ << "ok" << true));
+
+ // Ensure the set primary is the host we just got a reply from
+ HostAndPort currentPrimary = state->getMatchingHost(primaryOnly);
+ ASSERT_EQUALS(currentPrimary.host(), basicSeeds[i].host());
+ ASSERT_EQUALS(state->nodes.size(), basicSeeds.size());
+
+ // Check the state of each individual node
+ for (size_t j = 0; j != basicSeeds.size(); ++j) {
+ Node* node = state->findNode(basicSeeds[j]);
+ ASSERT(node);
+ ASSERT_EQUALS(node->host.toString(), basicSeeds[j].toString());
+ ASSERT_EQUALS(node->isUp, j <= i);
+ ASSERT_EQUALS(node->isMaster, j == i);
+ ASSERT(node->tags.isEmpty());
+ }
+ }
+
+ // Now all hosts have returned data
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::DONE);
+ ASSERT(ns.host.empty());
+}
+
+// Ignore electionId of secondaries
+TEST(ReplicaSetMonitorTests, IgnoreElectionIdFromSecondaries) {
+ SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet);
+ Refresher refresher(state);
+
+ set<HostAndPort> seen;
+
+ const OID primaryElectionId = OID::gen();
+
+ // mock all replies
+ for (size_t i = 0; i != basicSeeds.size(); ++i) {
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+ ASSERT(!seen.count(ns.host));
+ seen.insert(ns.host);
+
+ // mock a reply
+ const bool primary = ns.host.host() == "a";
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName" << "name"
+ << "ismaster" << primary
+ << "secondary" << !primary
+ << "electionId" << (primary ?
+ primaryElectionId : OID::gen())
+ << "hosts" << BSON_ARRAY("a" << "b" << "c")
+ << "ok" << true));
+ }
+
+ // check that the SetState's maxElectionId == primary's electionId
+ ASSERT_EQUALS(state->maxElectionId, primaryElectionId);
+
+ // Now all hosts have returned data
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::DONE);
+ ASSERT(ns.host.empty());
+}
+
+// Stale Primary with obsolete electionId
+TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) {
+ SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet);
+ Refresher refresher(state);
+
+ const OID firstElectionId = OID::gen();
+ const OID secondElectionId = OID::gen();
+
+ set<HostAndPort> seen;
+
+ // contact first host claiming to be primary with greater electionId
+ {
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+ ASSERT(!seen.count(ns.host));
+ seen.insert(ns.host);
+
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName" << "name"
+ << "ismaster" << true
+ << "secondary" << false
+ << "electionId" << secondElectionId
+ << "hosts" << BSON_ARRAY("a" << "b" << "c")
+ << "ok" << true));
+
+ Node* node = state->findNode(ns.host);
+ ASSERT(node);
+ ASSERT_TRUE(node->isMaster);
+ ASSERT_EQUALS(state->maxElectionId, secondElectionId);
+ }
+
+ // contact second host claiming to be primary with smaller electionId
+ {
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+ ASSERT(!seen.count(ns.host));
+ seen.insert(ns.host);
+
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName" << "name"
+ << "ismaster" << true
+ << "secondary" << false
+ << "electionId" << firstElectionId
+ << "hosts" << BSON_ARRAY("a" << "b" << "c")
+ << "ok" << true));
+
+ Node* node = state->findNode(ns.host);
+ ASSERT(node);
+ // The SetState shouldn't see this host as master
+ ASSERT_FALSE(node->isMaster);
+ // the max electionId should remain the same
+ ASSERT_EQUALS(state->maxElectionId, secondElectionId);
+ }
+
+ // third host is a secondary
+ {
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+ ASSERT(!seen.count(ns.host));
+ seen.insert(ns.host);
+
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName" << "name"
+ << "ismaster" << false
+ << "secondary" << true
+ << "hosts" << BSON_ARRAY("a" << "b" << "c")
+ << "ok" << true));
+
+ Node* node = state->findNode(ns.host);
+ ASSERT(node);
+ ASSERT_FALSE(node->isMaster);
+ // the max electionId should remain the same
+ ASSERT_EQUALS(state->maxElectionId, secondElectionId);
+ }
+
+ // Now all hosts have returned data
+ NextStep ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::DONE);
+ ASSERT(ns.host.empty());
+}