summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2016-01-25 16:49:05 -0500
committerRamon Fernandez <ramon@mongodb.com>2016-02-01 13:00:57 -0500
commit581abf60609caeadeb119393c86694df59e77dd0 (patch)
tree56c1394e19e85b9232e94a9b1de2c950526c37dd
parente6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69 (diff)
downloadmongo-581abf60609caeadeb119393c86694df59e77dd0.tar.gz
SERVER-21789 mongos replica set monitor should choose primary based on (rs config version, electionId)
(cherry picked from commit d06ca003737005b22e4fec49984ce7c73a3993a7)
-rw-r--r--src/mongo/client/replica_set_monitor.cpp25
-rw-r--r--src/mongo/client/replica_set_monitor_internal.h2
-rw-r--r--src/mongo/client/replica_set_monitor_test.cpp122
3 files changed, 131 insertions, 18 deletions
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp
index 5f6d09b2702..1df2e9fcb61 100644
--- a/src/mongo/client/replica_set_monitor.cpp
+++ b/src/mongo/client/replica_set_monitor.cpp
@@ -593,11 +593,11 @@ void Refresher::receivedIsMaster(const HostAndPort& from,
}
if (reply.isMaster) {
- const bool stalePrimary = !receivedIsMasterFromMaster(reply);
- if (stalePrimary) {
+ if (!receivedIsMasterFromMaster(reply)) {
log() << "node " << from << " believes it is primary, but its election id of "
- << reply.electionId << " is older than the most recent election id"
- << " for this set, " << _set->maxElectionId;
+ << reply.electionId << " and config version of " << reply.configVersion
+ << " is older than the most recent election id " << _set->maxElectionId
+ << " and config version of " << _set->configVersion;
failedHost(from);
return;
}
@@ -672,13 +672,26 @@ ScanStatePtr Refresher::startNewScan(const SetState* set) {
bool Refresher::receivedIsMasterFromMaster(const IsMasterReply& reply) {
invariant(reply.isMaster);
+ // Reject if config version is older. This is for backwards compatibility with nodes in pv0
+ // since they don't have the same ordering with pv1 electionId.
+ if (reply.configVersion < _set->configVersion) {
+ return false;
+ }
+
if (reply.electionId.isSet()) {
- if (_set->maxElectionId.isSet() && _set->maxElectionId.compare(reply.electionId) > 0) {
+ // ElectionIds are only comparable if they are of the same protocol version. However, since
+ // isMaster has no protocol version field, we use the configVersion instead. This works
+ // because configVersion needs to be incremented whenever the protocol version is changed.
+ if (reply.configVersion == _set->configVersion && _set->maxElectionId.isSet() &&
+ _set->maxElectionId.compare(reply.electionId) > 0) {
return false;
}
+
_set->maxElectionId = reply.electionId;
}
+ _set->configVersion = reply.configVersion;
+
// Mark all nodes as not master. We will mark ourself as master before releasing the lock.
// NOTE: we use a "last-wins" policy if multiple hosts claim to be master.
for (size_t i = 0; i < _set->nodes.size(); i++) {
@@ -851,6 +864,8 @@ void IsMasterReply::parse(const BSONObj& obj) {
electionId = raw["electionId"].OID();
}
+ configVersion = raw["setVersion"].numberInt();
+
const string primaryString = raw["primary"].str();
primary = primaryString.empty() ? HostAndPort() : HostAndPort(primaryString);
diff --git a/src/mongo/client/replica_set_monitor_internal.h b/src/mongo/client/replica_set_monitor_internal.h
index 5408f483579..d4162be40d1 100644
--- a/src/mongo/client/replica_set_monitor_internal.h
+++ b/src/mongo/client/replica_set_monitor_internal.h
@@ -67,6 +67,7 @@ struct ReplicaSetMonitor::IsMasterReply {
bool isMaster;
bool secondary;
bool hidden;
+ int configVersion{0};
OID electionId; // Set if this isMaster reply is from the primary
HostAndPort primary; // empty if not present
std::set<HostAndPort> normalHosts; // both "hosts" and "passives"
@@ -183,6 +184,7 @@ public:
int consecutiveFailedScans;
std::set<HostAndPort> seedNodes; // updated whenever a master reports set membership changes
OID maxElectionId; // largest election id observed by this ReplicaSetMonitor
+ int configVersion{0}; // version number of the replica set config.
HostAndPort lastSeenMaster; // empty if we have never seen a master. can be same as current
Nodes nodes; // maintained sorted and unique by host
ScanStatePtr currentScan; // NULL if no scan in progress
diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp
index 7f29de2666f..845b66e8366 100644
--- a/src/mongo/client/replica_set_monitor_test.cpp
+++ b/src/mongo/client/replica_set_monitor_test.cpp
@@ -100,6 +100,8 @@ TEST(ReplicaSetMonitor, IsMasterReplyRSNotInitiated) {
ASSERT_EQUALS(imr.hidden, false);
ASSERT_EQUALS(imr.secondary, false);
ASSERT_EQUALS(imr.isMaster, false);
+ ASSERT_EQUALS(imr.configVersion, 0);
+ ASSERT(!imr.electionId.isSet());
ASSERT(imr.primary.empty());
ASSERT(imr.normalHosts.empty());
ASSERT(imr.tags.isEmpty());
@@ -108,8 +110,9 @@ TEST(ReplicaSetMonitor, IsMasterReplyRSNotInitiated) {
TEST(ReplicaSetMonitor, IsMasterReplyRSPrimary) {
BSONObj ismaster = BSON("setName"
<< "test"
- << "setVersion" << 1 << "ismaster" << true << "secondary" << false
- << "hosts" << BSON_ARRAY("mongo.example:3000") << "primary"
+ << "setVersion" << 1 << "electionId" << OID("7fffffff0000000000000001")
+ << "ismaster" << true << "secondary" << false << "hosts"
+ << BSON_ARRAY("mongo.example:3000") << "primary"
<< "mongo.example:3000"
<< "me"
<< "mongo.example:3000"
@@ -122,6 +125,8 @@ TEST(ReplicaSetMonitor, IsMasterReplyRSPrimary) {
ASSERT_EQUALS(imr.ok, true);
ASSERT_EQUALS(imr.host.toString(), HostAndPort("mongo.example:3000").toString());
ASSERT_EQUALS(imr.setName, "test");
+ ASSERT_EQUALS(imr.configVersion, 1);
+ ASSERT_EQUALS(imr.electionId, OID("7fffffff0000000000000001"));
ASSERT_EQUALS(imr.hidden, false);
ASSERT_EQUALS(imr.secondary, false);
ASSERT_EQUALS(imr.isMaster, true);
@@ -133,8 +138,9 @@ TEST(ReplicaSetMonitor, IsMasterReplyRSPrimary) {
TEST(ReplicaSetMonitor, IsMasterReplyPassiveSecondary) {
BSONObj ismaster = BSON("setName"
<< "test"
- << "setVersion" << 1 << "ismaster" << false << "secondary" << true
- << "hosts" << BSON_ARRAY("mongo.example:3000") << "passives"
+ << "setVersion" << 2 << "electionId" << OID("7fffffff0000000000000001")
+ << "ismaster" << false << "secondary" << true << "hosts"
+ << BSON_ARRAY("mongo.example:3000") << "passives"
<< BSON_ARRAY("mongo.example:3001") << "primary"
<< "mongo.example:3000"
<< "passive" << true << "me"
@@ -148,6 +154,7 @@ TEST(ReplicaSetMonitor, IsMasterReplyPassiveSecondary) {
ASSERT_EQUALS(imr.ok, true);
ASSERT_EQUALS(imr.host.toString(), HostAndPort("mongo.example:3001").toString());
ASSERT_EQUALS(imr.setName, "test");
+ ASSERT_EQUALS(imr.configVersion, 2);
ASSERT_EQUALS(imr.hidden, false);
ASSERT_EQUALS(imr.secondary, true);
ASSERT_EQUALS(imr.isMaster, false);
@@ -155,13 +162,15 @@ TEST(ReplicaSetMonitor, IsMasterReplyPassiveSecondary) {
ASSERT(imr.normalHosts.count(HostAndPort("mongo.example:3000")));
ASSERT(imr.normalHosts.count(HostAndPort("mongo.example:3001")));
ASSERT(imr.tags.isEmpty());
+ ASSERT(!imr.electionId.isSet());
}
TEST(ReplicaSetMonitor, IsMasterReplyHiddenSecondary) {
BSONObj ismaster = BSON("setName"
<< "test"
- << "setVersion" << 1 << "ismaster" << false << "secondary" << true
- << "hosts" << BSON_ARRAY("mongo.example:3000") << "primary"
+ << "setVersion" << 2 << "electionId" << OID("7fffffff0000000000000001")
+ << "ismaster" << false << "secondary" << true << "hosts"
+ << BSON_ARRAY("mongo.example:3000") << "primary"
<< "mongo.example:3000"
<< "passive" << true << "hidden" << true << "me"
<< "mongo.example:3001"
@@ -174,20 +183,23 @@ TEST(ReplicaSetMonitor, IsMasterReplyHiddenSecondary) {
ASSERT_EQUALS(imr.ok, true);
ASSERT_EQUALS(imr.host.toString(), HostAndPort("mongo.example:3001").toString());
ASSERT_EQUALS(imr.setName, "test");
+ ASSERT_EQUALS(imr.configVersion, 2);
ASSERT_EQUALS(imr.hidden, true);
ASSERT_EQUALS(imr.secondary, true);
ASSERT_EQUALS(imr.isMaster, false);
ASSERT_EQUALS(imr.primary.toString(), HostAndPort("mongo.example:3000").toString());
ASSERT(imr.normalHosts.count(HostAndPort("mongo.example:3000")));
ASSERT(imr.tags.isEmpty());
+ ASSERT(!imr.electionId.isSet());
}
TEST(ReplicaSetMonitor, IsMasterSecondaryWithTags) {
BSONObj ismaster = BSON("setName"
<< "test"
- << "setVersion" << 1 << "ismaster" << false << "secondary" << true
- << "hosts" << BSON_ARRAY("mongo.example:3000"
- << "mongo.example:3001") << "primary"
+ << "setVersion" << 2 << "electionId" << OID("7fffffff0000000000000001")
+ << "ismaster" << false << "secondary" << true << "hosts"
+ << BSON_ARRAY("mongo.example:3000"
+ << "mongo.example:3001") << "primary"
<< "mongo.example:3000"
<< "me"
<< "mongo.example:3001"
@@ -204,6 +216,7 @@ TEST(ReplicaSetMonitor, IsMasterSecondaryWithTags) {
ASSERT_EQUALS(imr.ok, true);
ASSERT_EQUALS(imr.host.toString(), HostAndPort("mongo.example:3001").toString());
ASSERT_EQUALS(imr.setName, "test");
+ ASSERT_EQUALS(imr.configVersion, 2);
ASSERT_EQUALS(imr.hidden, false);
ASSERT_EQUALS(imr.secondary, true);
ASSERT_EQUALS(imr.isMaster, false);
@@ -212,6 +225,7 @@ TEST(ReplicaSetMonitor, IsMasterSecondaryWithTags) {
ASSERT(imr.normalHosts.count(HostAndPort("mongo.example:3001")));
ASSERT(imr.tags.hasElement("dc"));
ASSERT(imr.tags.hasElement("use"));
+ ASSERT(!imr.electionId.isSet());
ASSERT_EQUALS(imr.tags["dc"].str(), "nyc");
ASSERT_EQUALS(imr.tags["use"].str(), "production");
}
@@ -888,10 +902,10 @@ TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) {
BSON("setName"
<< "name"
<< "ismaster" << true << "secondary" << false
- << "electionId" << secondElectionId << "hosts"
- << BSON_ARRAY("a"
- << "b"
- << "c") << "ok" << true));
+ << "setVersion" << 1 << "electionId" << secondElectionId
+ << "hosts" << BSON_ARRAY("a"
+ << "b"
+ << "c") << "ok" << true));
Node* node = state->findNode(ns.host);
ASSERT(node);
@@ -968,4 +982,86 @@ TEST(ReplicaSetMonitor, PrimaryIsUpCheck) {
ASSERT_TRUE(rsm.isKnownToHaveGoodPrimary());
}
+/**
+ * Repl protocol verion 0 and 1 compatibility checking.
+ */
+TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasNewerConfigVersion) {
+ SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
+ Refresher refresher(state);
+
+ auto ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName"
+ << "name"
+ << "ismaster" << true << "secondary" << false << "setVersion"
+ << 1 << "electionId" << OID("7fffffff0000000000000001")
+ << "hosts" << BSON_ARRAY("a"
+ << "b"
+ << "c") << "ok" << true));
+
+ // check that the SetState's maxElectionId == primary's electionId
+ ASSERT_EQUALS(state->maxElectionId, OID("7fffffff0000000000000001"));
+ ASSERT_EQUALS(state->configVersion, 1);
+
+ const OID primaryElectionId = OID::gen();
+
+ // Newer setVersion, no election id
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName"
+ << "name"
+ << "ismaster" << true << "secondary" << false << "setVersion"
+ << 2 << "electionId" << primaryElectionId << "hosts"
+ << BSON_ARRAY("a"
+ << "b"
+ << "c") << "ok" << true));
+
+ ASSERT_EQUALS(state->maxElectionId, primaryElectionId);
+ ASSERT_EQUALS(state->configVersion, 2);
+}
+
+/**
+ * Repl protocol verion 0 and 1 compatibility checking.
+ */
+TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasOlderConfigVersion) {
+ SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
+ Refresher refresher(state);
+
+ auto ns = refresher.getNextStep();
+ ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
+ ASSERT(basicSeedsSet.count(ns.host));
+
+ const OID primaryElectionId = OID::gen();
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName"
+ << "name"
+ << "ismaster" << true << "secondary" << false << "electionId"
+ << primaryElectionId << "setVersion" << 2 << "hosts"
+ << BSON_ARRAY("a"
+ << "b"
+ << "c") << "ok" << true));
+
+ ASSERT_EQUALS(state->maxElectionId, primaryElectionId);
+ ASSERT_EQUALS(state->configVersion, 2);
+
+ // Older setVersion, but election id > previous election id. Newer setVersion should win.
+ refresher.receivedIsMaster(ns.host,
+ -1,
+ BSON("setName"
+ << "name"
+ << "ismaster" << true << "secondary" << false << "setVersion"
+ << 1 << "electionId" << OID("7fffffff0000000000000001")
+ << "hosts" << BSON_ARRAY("a"
+ << "b"
+ << "c") << "ok" << true));
+
+ ASSERT_EQUALS(state->maxElectionId, primaryElectionId);
+ ASSERT_EQUALS(state->configVersion, 2);
+}
+
} // namespace