diff options
author | Daniel Alabi <alabidan@gmail.com> | 2015-06-01 10:30:42 -0400 |
---|---|---|
committer | Daniel Alabi <alabidan@gmail.com> | 2015-06-02 09:30:31 -0400 |
commit | c5639bdc8ffbb2c7b3838a093f06060cc2cce568 (patch) | |
tree | 6afdc920ee1201dba82a4c4731fb7ec806a58662 | |
parent | 0a7d47f9076bf6070b66462715ca225eca2cdb6b (diff) | |
download | mongo-c5639bdc8ffbb2c7b3838a093f06060cc2cce568.tar.gz |
SERVER-18280 ReplicaSetMonitor should use electionId to avoid talking to old primaries
(cherry picked from commit b8c1c49b36abfa2c49cb15a6f1ac4cadaf2a5648)
-rw-r--r-- | src/mongo/client/replica_set_monitor.cpp | 27 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor.h | 7 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_internal.h | 2 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_test.cpp | 182 |
4 files changed, 214 insertions, 4 deletions
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp index 66af1596d32..da7f2465be0 100644 --- a/src/mongo/client/replica_set_monitor.cpp +++ b/src/mongo/client/replica_set_monitor.cpp @@ -572,8 +572,16 @@ namespace { return; } - if (reply.isMaster) - receivedIsMasterFromMaster(reply); + if (reply.isMaster) { + const bool stalePrimary = !receivedIsMasterFromMaster(reply); + if (stalePrimary) { + log() << "node " << from << " believes it is primary, but its election id of " + << reply.electionId << " is older than the most recent election id" + << " for this set, " << _set->maxElectionId; + failedHost(from); + return; + } + } if (_scan->foundUpMaster) { // We only update a Node if a master has confirmed it is in the set. @@ -649,9 +657,16 @@ namespace { return scan; } - void Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) { + bool Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) { invariant(reply.isMaster); + if (reply.electionId.isSet()) { + if (_set->maxElectionId.isSet() && _set->maxElectionId.compare(reply.electionId) > 0) { + return false; + } + _set->maxElectionId = reply.electionId; + } + // Mark all nodes as not master. We will mark ourself as master before releasing the lock. // NOTE: we use a "last-wins" policy if multiple hosts claim to be master. for (size_t i = 0; i < _set->nodes.size(); i++) { @@ -721,6 +736,8 @@ namespace { _scan->foundUpMaster = true; _set->lastSeenMaster = reply.host; + + return true; } void Refresher::receivedIsMasterBeforeFoundMaster(const IsMasterReply& reply) { @@ -814,6 +831,10 @@ namespace { // hidden nodes can't be master, even if they claim to be. isMaster = !hidden && raw["ismaster"].trueValue(); + if (isMaster && raw.hasField("electionId")) { + electionId = raw["electionId"].OID(); + } + const string primaryString = raw["primary"].str(); primary = primaryString.empty() ? HostAndPort() : HostAndPort(primaryString); diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h index 3c768d60c04..43aa1b8a7b6 100644 --- a/src/mongo/client/replica_set_monitor.h +++ b/src/mongo/client/replica_set_monitor.h @@ -306,12 +306,17 @@ namespace mongo { static ScanStatePtr startNewScan(const SetState* set); private: + /** + * First, checks that the "reply" is not from a stale primary by + * comparing the electionId of "reply" to the maxElectionId recorded by the SetState. + * Returns true if "reply" belongs to a non-stale primary. + * * Updates _set and _scan based on set-membership information from a master. * Applies _scan->unconfirmedReplies to confirmed nodes. * Does not update this host's node in _set->nodes. */ - void receivedIsMasterFromMaster(const IsMasterReply& reply); + bool receivedIsMasterFromMaster(const IsMasterReply& reply); /** * Adjusts the _scan work queue based on information from this host. diff --git a/src/mongo/client/replica_set_monitor_internal.h b/src/mongo/client/replica_set_monitor_internal.h index d69f90afc1f..5927c089bdf 100644 --- a/src/mongo/client/replica_set_monitor_internal.h +++ b/src/mongo/client/replica_set_monitor_internal.h @@ -67,6 +67,7 @@ namespace mongo { bool isMaster; bool secondary; bool hidden; + OID electionId; // Set if this isMaster reply is from the primary HostAndPort primary; // empty if not present std::set<HostAndPort> normalHosts; // both "hosts" and "passives" BSONObj tags; @@ -174,6 +175,7 @@ namespace mongo { const std::string name; // safe to read outside lock since it is const int consecutiveFailedScans; std::set<HostAndPort> seedNodes; // updated whenever a master reports set membership changes + OID maxElectionId; // largest election id observed by this ReplicaSetMonitor HostAndPort lastSeenMaster; // empty if we have never seen a master. can be same as current Nodes nodes; // maintained sorted and unique by host ScanStatePtr currentScan; // NULL if no scan in progress diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp index 426f49f6ecb..1a0fa3560a1 100644 --- a/src/mongo/client/replica_set_monitor_test.cpp +++ b/src/mongo/client/replica_set_monitor_test.cpp @@ -784,3 +784,185 @@ TEST(ReplicaSetMonitorTests, OutOfBandFailedHost) { } } } + +// Newly elected primary with electionId >= maximum electionId seen by the Refresher +TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + set<HostAndPort> seen; + + // get all hosts to contact first + for (size_t i = 0; i != basicSeeds.size(); ++i) { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + } + + const ReadPreferenceSetting primaryOnly(ReadPreference_PrimaryOnly, TagSet()); + + // mock all replies + for (size_t i = 0; i != basicSeeds.size(); ++i) { + // All hosts to talk to are already dispatched, but no reply has been received + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::WAIT); + ASSERT(ns.host.empty()); + + refresher.receivedIsMaster(basicSeeds[i], + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "electionId" << OID::gen() + << "ok" << true)); + + // Ensure the set primary is the host we just got a reply from + HostAndPort currentPrimary = state->getMatchingHost(primaryOnly); + ASSERT_EQUALS(currentPrimary.host(), basicSeeds[i].host()); + ASSERT_EQUALS(state->nodes.size(), basicSeeds.size()); + + // Check the state of each individual node + for (size_t j = 0; j != basicSeeds.size(); ++j) { + Node* node = state->findNode(basicSeeds[j]); + ASSERT(node); + ASSERT_EQUALS(node->host.toString(), basicSeeds[j].toString()); + ASSERT_EQUALS(node->isUp, j <= i); + ASSERT_EQUALS(node->isMaster, j == i); + ASSERT(node->tags.isEmpty()); + } + } + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} + +// Ignore electionId of secondaries +TEST(ReplicaSetMonitorTests, IgnoreElectionIdFromSecondaries) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + set<HostAndPort> seen; + + const OID primaryElectionId = OID::gen(); + + // mock all replies + for (size_t i = 0; i != basicSeeds.size(); ++i) { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + // mock a reply + const bool primary = ns.host.host() == "a"; + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << primary + << "secondary" << !primary + << "electionId" << (primary ? + primaryElectionId : OID::gen()) + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + } + + // check that the SetState's maxElectionId == primary's electionId + ASSERT_EQUALS(state->maxElectionId, primaryElectionId); + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} + +// Stale Primary with obsolete electionId +TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + const OID firstElectionId = OID::gen(); + const OID secondElectionId = OID::gen(); + + set<HostAndPort> seen; + + // contact first host claiming to be primary with greater electionId + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "electionId" << secondElectionId + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + ASSERT_TRUE(node->isMaster); + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // contact second host claiming to be primary with smaller electionId + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "electionId" << firstElectionId + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + // The SetState shouldn't see this host as master + ASSERT_FALSE(node->isMaster); + // the max electionId should remain the same + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // third host is a secondary + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << false + << "secondary" << true + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + ASSERT_FALSE(node->isMaster); + // the max electionId should remain the same + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} |