diff options
author | Daniel Alabi <alabidan@gmail.com> | 2015-06-01 10:30:42 -0400 |
---|---|---|
committer | Daniel Alabi <alabidan@gmail.com> | 2015-06-01 10:30:42 -0400 |
commit | b8c1c49b36abfa2c49cb15a6f1ac4cadaf2a5648 (patch) | |
tree | 15dbbad837db36942555adb5e26b647c770697f8 | |
parent | 3edfdf63b3870329a8e66614d25652a51b287660 (diff) | |
download | mongo-b8c1c49b36abfa2c49cb15a6f1ac4cadaf2a5648.tar.gz |
SERVER-18280 ReplicaSetMonitor should use electionId to avoid talking to old primaries
-rw-r--r-- | src/mongo/client/replica_set_monitor.cpp | 27 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor.h | 7 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_internal.h | 2 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_test.cpp | 182 |
4 files changed, 214 insertions, 4 deletions
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp index a541f897958..73e39cebd0f 100644 --- a/src/mongo/client/replica_set_monitor.cpp +++ b/src/mongo/client/replica_set_monitor.cpp @@ -536,8 +536,16 @@ namespace { return; } - if (reply.isMaster) - receivedIsMasterFromMaster(reply); + if (reply.isMaster) { + const bool stalePrimary = !receivedIsMasterFromMaster(reply); + if (stalePrimary) { + log() << "node " << from << " believes it is primary, but its election id of " + << reply.electionId << " is older than the most recent election id" + << " for this set, " << _set->maxElectionId; + failedHost(from); + return; + } + } if (_scan->foundUpMaster) { // We only update a Node if a master has confirmed it is in the set. @@ -613,9 +621,16 @@ namespace { return scan; } - void Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) { + bool Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) { invariant(reply.isMaster); + if (reply.electionId.isSet()) { + if (_set->maxElectionId.isSet() && _set->maxElectionId.compare(reply.electionId) > 0) { + return false; + } + _set->maxElectionId = reply.electionId; + } + // Mark all nodes as not master. We will mark ourself as master before releasing the lock. // NOTE: we use a "last-wins" policy if multiple hosts claim to be master. for (size_t i = 0; i < _set->nodes.size(); i++) { @@ -685,6 +700,8 @@ namespace { _scan->foundUpMaster = true; _set->lastSeenMaster = reply.host; + + return true; } void Refresher::receivedIsMasterBeforeFoundMaster(const IsMasterReply& reply) { @@ -778,6 +795,10 @@ namespace { // hidden nodes can't be master, even if they claim to be. isMaster = !hidden && raw["ismaster"].trueValue(); + if (isMaster && raw.hasField("electionId")) { + electionId = raw["electionId"].OID(); + } + const string primaryString = raw["primary"].str(); primary = primaryString.empty() ? HostAndPort() : HostAndPort(primaryString); diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h index 1fee9318f80..194a4710349 100644 --- a/src/mongo/client/replica_set_monitor.h +++ b/src/mongo/client/replica_set_monitor.h @@ -292,12 +292,17 @@ namespace mongo { static ScanStatePtr startNewScan(const SetState* set); private: + /** + * First, checks that the "reply" is not from a stale primary by + * comparing the electionId of "reply" to the maxElectionId recorded by the SetState. + * Returns true if "reply" belongs to a non-stale primary. + * * Updates _set and _scan based on set-membership information from a master. * Applies _scan->unconfirmedReplies to confirmed nodes. * Does not update this host's node in _set->nodes. */ - void receivedIsMasterFromMaster(const IsMasterReply& reply); + bool receivedIsMasterFromMaster(const IsMasterReply& reply); /** * Adjusts the _scan work queue based on information from this host. diff --git a/src/mongo/client/replica_set_monitor_internal.h b/src/mongo/client/replica_set_monitor_internal.h index 0d74e69b322..61f3101e47f 100644 --- a/src/mongo/client/replica_set_monitor_internal.h +++ b/src/mongo/client/replica_set_monitor_internal.h @@ -55,6 +55,7 @@ namespace mongo { bool isMaster; bool secondary; bool hidden; + OID electionId; // Set if this isMaster reply is from the primary HostAndPort primary; // empty if not present std::set<HostAndPort> normalHosts; // both "hosts" and "passives" BSONObj tags; @@ -162,6 +163,7 @@ namespace mongo { const std::string name; // safe to read outside lock since it is const int consecutiveFailedScans; std::set<HostAndPort> seedNodes; // updated whenever a master reports set membership changes + OID maxElectionId; // largest election id observed by this ReplicaSetMonitor HostAndPort lastSeenMaster; // empty if we have never seen a master. can be same as current Nodes nodes; // maintained sorted and unique by host ScanStatePtr currentScan; // NULL if no scan in progress diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp index 7e0c833ee85..60775087314 100644 --- a/src/mongo/client/replica_set_monitor_test.cpp +++ b/src/mongo/client/replica_set_monitor_test.cpp @@ -780,3 +780,185 @@ TEST(ReplicaSetMonitorTests, OutOfBandFailedHost) { } } } + +// Newly elected primary with electionId >= maximum electionId seen by the Refresher +TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + set<HostAndPort> seen; + + // get all hosts to contact first + for (size_t i = 0; i != basicSeeds.size(); ++i) { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + } + + const ReadPreferenceSetting primaryOnly(ReadPreference_PrimaryOnly, TagSet()); + + // mock all replies + for (size_t i = 0; i != basicSeeds.size(); ++i) { + // All hosts to talk to are already dispatched, but no reply has been received + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::WAIT); + ASSERT(ns.host.empty()); + + refresher.receivedIsMaster(basicSeeds[i], + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "electionId" << OID::gen() + << "ok" << true)); + + // Ensure the set primary is the host we just got a reply from + HostAndPort currentPrimary = state->getMatchingHost(primaryOnly); + ASSERT_EQUALS(currentPrimary.host(), basicSeeds[i].host()); + ASSERT_EQUALS(state->nodes.size(), basicSeeds.size()); + + // Check the state of each individual node + for (size_t j = 0; j != basicSeeds.size(); ++j) { + Node* node = state->findNode(basicSeeds[j]); + ASSERT(node); + ASSERT_EQUALS(node->host.toString(), basicSeeds[j].toString()); + ASSERT_EQUALS(node->isUp, j <= i); + ASSERT_EQUALS(node->isMaster, j == i); + ASSERT(node->tags.isEmpty()); + } + } + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} + +// Ignore electionId of secondaries +TEST(ReplicaSetMonitorTests, IgnoreElectionIdFromSecondaries) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + set<HostAndPort> seen; + + const OID primaryElectionId = OID::gen(); + + // mock all replies + for (size_t i = 0; i != basicSeeds.size(); ++i) { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + // mock a reply + const bool primary = ns.host.host() == "a"; + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << primary + << "secondary" << !primary + << "electionId" << (primary ? + primaryElectionId : OID::gen()) + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + } + + // check that the SetState's maxElectionId == primary's electionId + ASSERT_EQUALS(state->maxElectionId, primaryElectionId); + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} + +// Stale Primary with obsolete electionId +TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) { + SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); + Refresher refresher(state); + + const OID firstElectionId = OID::gen(); + const OID secondElectionId = OID::gen(); + + set<HostAndPort> seen; + + // contact first host claiming to be primary with greater electionId + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "electionId" << secondElectionId + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + ASSERT_TRUE(node->isMaster); + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // contact second host claiming to be primary with smaller electionId + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << true + << "secondary" << false + << "electionId" << firstElectionId + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + // The SetState shouldn't see this host as master + ASSERT_FALSE(node->isMaster); + // the max electionId should remain the same + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // third host is a secondary + { + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST); + ASSERT(basicSeedsSet.count(ns.host)); + ASSERT(!seen.count(ns.host)); + seen.insert(ns.host); + + refresher.receivedIsMaster(ns.host, + -1, + BSON("setName" << "name" + << "ismaster" << false + << "secondary" << true + << "hosts" << BSON_ARRAY("a" << "b" << "c") + << "ok" << true)); + + Node* node = state->findNode(ns.host); + ASSERT(node); + ASSERT_FALSE(node->isMaster); + // the max electionId should remain the same + ASSERT_EQUALS(state->maxElectionId, secondElectionId); + } + + // Now all hosts have returned data + NextStep ns = refresher.getNextStep(); + ASSERT_EQUALS(ns.step, NextStep::DONE); + ASSERT(ns.host.empty()); +} |