summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-10-15 19:21:43 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-15 20:02:55 +0000
commitb996dfbc5135e94b38fb86014c656930cdc2f533 (patch)
tree510fc780d0b875e996aba67bfa6e3852da86b3cf
parentb69563e8b35f91441bc6b666cba1c09414b27a08 (diff)
downloadmongo-b996dfbc5135e94b38fb86014c656930cdc2f533.tar.gz
SERVER-59409 backport BACKPORT-10657: Fix race between reconfig replication and stepup part 4
-rw-r--r--jstests/sharding/reconfig_race_with_failover.js65
-rw-r--r--src/mongo/client/sdam/election_id_set_version_pair.h25
-rw-r--r--src/mongo/client/sdam/election_id_set_version_pair_test.cpp42
-rw-r--r--src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json30
-rw-r--r--src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json146
-rw-r--r--src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json10
-rw-r--r--src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json32
-rw-r--r--src/mongo/client/sdam/server_description.cpp10
-rw-r--r--src/mongo/client/sdam/server_description.h3
-rw-r--r--src/mongo/client/sdam/server_description_test.cpp14
-rw-r--r--src/mongo/client/sdam/topology_description.cpp45
-rw-r--r--src/mongo/client/sdam/topology_description.h27
-rw-r--r--src/mongo/client/sdam/topology_description_builder.cpp4
-rw-r--r--src/mongo/client/sdam/topology_state_machine.cpp49
-rw-r--r--src/mongo/client/sdam/topology_state_machine.h3
-rw-r--r--src/mongo/client/streamable_replica_set_monitor.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp46
17 files changed, 400 insertions, 166 deletions
diff --git a/jstests/sharding/reconfig_race_with_failover.js b/jstests/sharding/reconfig_race_with_failover.js
new file mode 100644
index 00000000000..a63c40596cf
--- /dev/null
+++ b/jstests/sharding/reconfig_race_with_failover.js
@@ -0,0 +1,65 @@
+/*
+ * Tests that if reconfig did not replicate before step up, the new primary
+ * is not stale because its new election Id takes precedence over Set version when
+ * comparing.
+ *
+ * @tags: [does_not_support_stepdowns, multiversion_incompatible]
+ */
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+load("jstests/sharding/libs/sharded_index_util.js");
+
+// Skip db hash check because secondary is left with a different config.
+TestData.skipCheckDBHashes = true;
+
+const st = new ShardingTest({shards: {rs0: {nodes: [{}, {}, {rsConfig: {priority: 0}}]}}});
+const rst = st.rs0;
+const primary = rst.getPrimary();
+if (primary !== nodes[0]) {
+ st.stop();
+ return; // For simplicity.
+}
+
+const config = rst.getReplSetConfigFromNode();
+jsTestLog(`Initial config ${tojson(config)}`);
+config.version++;
+
+const versionToBlock = config.version;
+const termToBlock = config.term;
+
+let pauseInReconfig = configureFailPoint(primary, "hangAfterReconfig");
+let maxElectionIdSetVersionPairUpdated =
+ configureFailPoint(primary, "maxElectionIdSetVersionPairUpdated");
+
+// Fail points to prevent the new config from being replicated.
+let testFpHangBeforeFetchingConfig1 = configureFailPoint(
+ nodes[1], "skipBeforeFetchingConfig", {versionAndTerm: [versionToBlock, termToBlock]});
+let testFpHangBeforeFetchingConfig2 = configureFailPoint(
+ nodes[2], "skipBeforeFetchingConfig", {versionAndTerm: [versionToBlock, termToBlock]});
+let runReconfigJoin = startParallelShell(
+ funWithArgs(function(config) {
+ jsTestLog(`Run reconfig`);
+ let result = db.getSiblingDB("admin").runCommand({replSetReconfig: config});
+ jsTestLog(`Reconfig finished, result ${tojson(result)}`);
+ }, config), primary.port);
+pauseInReconfig.wait();
+maxElectionIdSetVersionPairUpdated.wait();
+
+// After step up the new primary still has the Set at previous version.
+const nextPrimary = rst.getSecondary();
+jsTestLog("Step Up");
+nextPrimary.adminCommand({replSetStepUp: 1});
+pauseInReconfig.off();
+runReconfigJoin();
+
+testFpHangBeforeFetchingConfig1.off();
+testFpHangBeforeFetchingConfig2.off();
+
+// This command fails if mongos' RSM was stuck with stale primary unable to rollback Set version.
+let res = ShardedIndexUtil.getPerShardIndexes(st.s.getCollection("config.system.sessions"));
+jsTestLog(`Aggregate run on Mongos ${tojson(res)}`);
+
+st.stop();
+}());
diff --git a/src/mongo/client/sdam/election_id_set_version_pair.h b/src/mongo/client/sdam/election_id_set_version_pair.h
index 296f94b6116..0325eed418d 100644
--- a/src/mongo/client/sdam/election_id_set_version_pair.h
+++ b/src/mongo/client/sdam/election_id_set_version_pair.h
@@ -43,8 +43,8 @@ struct ElectionIdSetVersionPair {
return electionId && setVersion;
}
- bool allUndefined() const {
- return !electionId && !setVersion;
+ bool anyDefined() const {
+ return electionId || setVersion;
}
bool anyUndefined() const {
@@ -54,11 +54,6 @@ struct ElectionIdSetVersionPair {
BSONObj toBSON() const;
};
-inline bool electionIdEqual(const ElectionIdSetVersionPair& p1,
- const ElectionIdSetVersionPair& p2) {
- return p1.electionId && p2.electionId && (*p1.electionId).compare(*p2.electionId) == 0;
-}
-
inline bool operator<(const ElectionIdSetVersionPair& p1, const ElectionIdSetVersionPair& p2) {
if (p1.anyUndefined() && p2.allDefined()) {
return true;
@@ -82,22 +77,6 @@ inline bool setVersionWentBackwards(const ElectionIdSetVersionPair& current,
return current.setVersion && incoming.setVersion && *current.setVersion > *incoming.setVersion;
}
-/**
- * @return true if 'incoming' is RS primary and fields have consistent values.
- */
-inline bool isIncomingPrimaryConsistent(const ElectionIdSetVersionPair& current,
- const ElectionIdSetVersionPair& incoming) {
- // If Set version goes backwards the term should advance, because it means
- // failover happened. This is possible if the previous primary failed to replicate
- // new Set version before failover. When it happens, we will rollback the Set version.
- if (setVersionWentBackwards(current, incoming)) {
- return !current.electionId || !incoming.electionId ||
- (*current.electionId).compare(*incoming.electionId) < 0;
- }
-
- return true;
-}
-
inline BSONObj ElectionIdSetVersionPair::toBSON() const {
BSONObjBuilder bob;
if (electionId) {
diff --git a/src/mongo/client/sdam/election_id_set_version_pair_test.cpp b/src/mongo/client/sdam/election_id_set_version_pair_test.cpp
index 93a95c10097..dbb6550aa0e 100644
--- a/src/mongo/client/sdam/election_id_set_version_pair_test.cpp
+++ b/src/mongo/client/sdam/election_id_set_version_pair_test.cpp
@@ -49,8 +49,6 @@ public:
kNotComparable
};
- enum class Consistency { kConsistent, kInconsistent };
-
struct TestCase {
// Curent topology max fields.
const boost::optional<OID> kTerm1;
@@ -59,7 +57,6 @@ public:
const boost::optional<OID> kTerm2;
const boost::optional<int> kSet2;
const Compare compare;
- const Consistency consistent;
};
std::string log(const TestCase& t, int testNum) {
@@ -89,54 +86,51 @@ public:
ASSERT_FALSE(p1 == p2) << log(t, testNum);
break;
}
-
- const bool isConsistent = isIncomingPrimaryConsistent(p1, p2);
- ASSERT_EQ(t.consistent == Consistency::kConsistent, isConsistent) << log(t, testNum);
}
};
TEST_F(ElectionIdSetVersionPairTest, ExpectedOutcome) {
std::vector<TestCase> tests = {
// At startup, both current fields are not set. Field set > unset.
- {kNullOid, kNullSet, kOidOne, 1, Compare::kLess, Consistency::kConsistent},
+ {kNullOid, kNullSet, kOidOne, 1, Compare::kLess},
- {kOidOne, 1, kOidOne, 1, Compare::kEquals, Consistency::kConsistent},
+ {kOidOne, 1, kOidOne, 1, Compare::kEquals},
// One field is not set. This should never happen however added for better
// coverage for malformed protocol.
- {kNullOid, 1, kOidOne, 1, Compare::kLess, Consistency::kConsistent},
- {kOidOne, kNullSet, kOidOne, 1, Compare::kLess, Consistency::kConsistent},
- {kOidOne, 1, kNullOid, 1, Compare::kGreater, Consistency::kConsistent},
- {kOidOne, 1, kOidOne, kNullSet, Compare::kGreater, Consistency::kConsistent},
+ {kNullOid, 1, kOidOne, 1, Compare::kLess},
+ {kOidOne, kNullSet, kOidOne, 1, Compare::kLess},
+ {kOidOne, 1, kNullOid, 1, Compare::kGreater},
+ {kOidOne, 1, kOidOne, kNullSet, Compare::kGreater},
// Primary advanced one way or another. "Less" means current < incoming.
- {kOidOne, 1, kOidTwo, 1, Compare::kLess, Consistency::kConsistent},
- {kOidOne, 1, kOidOne, 2, Compare::kLess, Consistency::kConsistent},
+ {kOidOne, 1, kOidTwo, 1, Compare::kLess},
+ {kOidOne, 1, kOidOne, 2, Compare::kLess},
// Primary advanced but current state is incomplete.
- {kNullOid, 1, kOidTwo, 1, Compare::kLess, Consistency::kConsistent},
- {kNullOid, 1, kOidOne, 2, Compare::kLess, Consistency::kConsistent},
- {kOidOne, kNullSet, kOidTwo, 1, Compare::kLess, Consistency::kConsistent},
- {kOidOne, kNullSet, kOidOne, 2, Compare::kLess, Consistency::kConsistent},
+ {kNullOid, 1, kOidTwo, 1, Compare::kLess},
+ {kNullOid, 1, kOidOne, 2, Compare::kLess},
+ {kOidOne, kNullSet, kOidTwo, 1, Compare::kLess},
+ {kOidOne, kNullSet, kOidOne, 2, Compare::kLess},
// Primary went backwards one way or another.
// Inconsistent because Set version went backwards without Term being changed (same
// primary).
- {kOidTwo, 2, kOidTwo, 1, Compare::kGreater, Consistency::kInconsistent},
- {kOidTwo, 2, kOidOne, 2, Compare::kGreater, Consistency::kConsistent},
+ {kOidTwo, 2, kOidTwo, 1, Compare::kGreater},
+ {kOidTwo, 2, kOidOne, 2, Compare::kGreater},
// Primary went backwards with current state incomplete.
- {kNullOid, 2, kOidTwo, 1, Compare::kLess, Consistency::kConsistent},
- {kOidTwo, kNullSet, kOidOne, 1, Compare::kGreater, Consistency::kConsistent},
+ {kNullOid, 2, kOidTwo, 1, Compare::kLess},
+ {kOidTwo, kNullSet, kOidOne, 1, Compare::kGreater},
// Stale primary case when Term went backwards but Set version advanced.
// This case is 'consistent' because it's normal for stale primary. The
// important part is that 'current' > 'incoming', which makes the node 'Unknown'.
- {kOidTwo, 1, kOidOne, 2, Compare::kGreater, Consistency::kConsistent},
+ {kOidTwo, 1, kOidOne, 2, Compare::kGreater},
// Previous primary was unable to replicate the Set version before failover,
// new primary forces it to be rolled back.
- {kOidOne, 2, kOidTwo, 1, Compare::kLess, Consistency::kConsistent},
+ {kOidOne, 2, kOidTwo, 1, Compare::kLess},
};
int testNum = 0;
diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json
index 3de0a74e413..3d6f7655576 100644
--- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json
+++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json
@@ -120,15 +120,18 @@
"outcome": {
"servers": {
"a:27017": {
+ "type": "Unknown",
+ "setName": null,
+ "electionId": null,
+ "setVersion": null
+ },
+ "b:27017": {
"type": "RSPrimary",
"setName": "rs",
"setVersion": 1,
- "electionId": null
- },
- "b:27017": {
- "type": "Unknown",
- "setName": null,
- "electionId": null
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ }
},
"c:27017": {
"type": "Unknown",
@@ -170,15 +173,18 @@
"outcome": {
"servers": {
"a:27017": {
+ "type": "Unknown",
+ "setName": null,
+ "electionId": null,
+ "setVersion": null
+ },
+ "b:27017": {
"type": "RSPrimary",
"setName": "rs",
"setVersion": 1,
- "electionId": null
- },
- "b:27017": {
- "type": "Unknown",
- "setName": null,
- "electionId": null
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ }
},
"c:27017": {
"type": "Unknown",
diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json
new file mode 100644
index 00000000000..d3fa9acb3c2
--- /dev/null
+++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json
@@ -0,0 +1,146 @@
+{
+ "description": "Set version rolls back after new primary with higher election Id",
+ "uri": "mongodb://a/?replicaSet=rs",
+ "phases": [
+ {
+ "responses": [
+ [
+ "a:27017",
+ {
+ "ok": 1,
+ "ismaster": true,
+ "hosts": [
+ "a:27017",
+ "b:27017"
+ ],
+ "setName": "rs",
+ "setVersion": 2,
+ "electionId": {
+ "$oid": "000000000000000000000001"
+ },
+ "minWireVersion": 0,
+ "maxWireVersion": 6
+ }
+ ]
+ ],
+ "outcome": {
+ "servers": {
+ "a:27017": {
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 2,
+ "electionId": {
+ "$oid": "000000000000000000000001"
+ }
+ },
+ "b:27017": {
+ "type": "Unknown",
+ "setName": null,
+ "electionId": null
+ }
+ },
+ "topologyType": "ReplicaSetWithPrimary",
+ "logicalSessionTimeoutMinutes": null,
+ "setName": "rs",
+ "maxSetVersion": 2,
+ "maxElectionId": {
+ "$oid": "000000000000000000000001"
+ }
+ }
+ },
+ {
+ "_comment": "Response from new primary with newer election Id",
+ "responses": [
+ [
+ "b:27017",
+ {
+ "ok": 1,
+ "ismaster": true,
+ "hosts": [
+ "a:27017",
+ "b:27017"
+ ],
+ "setName": "rs",
+ "setVersion": 1,
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ },
+ "minWireVersion": 0,
+ "maxWireVersion": 6
+ }
+ ]
+ ],
+ "outcome": {
+ "servers": {
+ "a:27017": {
+ "type": "Unknown",
+ "setName": null,
+ "electionId": null
+ },
+ "b:27017": {
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 1,
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ }
+ }
+ },
+ "topologyType": "ReplicaSetWithPrimary",
+ "logicalSessionTimeoutMinutes": null,
+ "setName": "rs",
+ "maxSetVersion": 1,
+ "maxElectionId": {
+ "$oid": "000000000000000000000002"
+ }
+ }
+ },
+ {
+ "_comment": "Response from stale primary",
+ "responses": [
+ [
+ "a:27017",
+ {
+ "ok": 1,
+ "ismaster": true,
+ "hosts": [
+ "a:27017",
+ "b:27017"
+ ],
+ "setName": "rs",
+ "setVersion": 2,
+ "electionId": {
+ "$oid": "000000000000000000000001"
+ },
+ "minWireVersion": 0,
+ "maxWireVersion": 6
+ }
+ ]
+ ],
+ "outcome": {
+ "servers": {
+ "a:27017": {
+ "type": "Unknown",
+ "setName": null,
+ "electionId": null
+ },
+ "b:27017": {
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 1,
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ }
+ }
+ },
+ "topologyType": "ReplicaSetWithPrimary",
+ "logicalSessionTimeoutMinutes": null,
+ "setName": "rs",
+ "maxSetVersion": 1,
+ "maxElectionId": {
+ "$oid": "000000000000000000000002"
+ }
+ }
+ }
+ ]
+}
diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json
index 0500c6d1575..07ec55cee1c 100644
--- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json
+++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json
@@ -61,14 +61,14 @@
"outcome": {
"servers": {
"a:27017": {
- "type": "Unknown",
- "setName": null,
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 2,
"electionId": null
},
"b:27017": {
- "type": "RSPrimary",
- "setName": "rs",
- "setVersion": 1,
+ "type": "Unknown",
+ "setName": null,
"electionId": null
}
},
diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json
index 16225d6b83f..365af2f7e19 100644
--- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json
+++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json
@@ -69,20 +69,23 @@
"outcome": {
"servers": {
"a:27017": {
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 1,
+ "electionId": {
+ "$oid": "000000000000000000000001"
+ }
+ },
+ "b:27017": {
"type": "Unknown",
"setName": null,
"electionId": null
- },
- "b:27017": {
- "type": "RSPrimary",
- "setName": "rs",
- "setVersion": 2
}
},
"topologyType": "ReplicaSetWithPrimary",
"logicalSessionTimeoutMinutes": null,
"setName": "rs",
- "maxSetVersion": 2,
+ "maxSetVersion": 1,
"maxElectionId": {
"$oid": "000000000000000000000001"
}
@@ -112,22 +115,25 @@
"outcome": {
"servers": {
"a:27017": {
+ "type": "RSPrimary",
+ "setName": "rs",
+ "setVersion": 1,
+ "electionId": {
+ "$oid": "000000000000000000000002"
+ }
+ },
+ "b:27017": {
"type": "Unknown",
"setName": null,
"electionId": null
- },
- "b:27017": {
- "type": "RSPrimary",
- "setName": "rs",
- "setVersion": 2
}
},
"topologyType": "ReplicaSetWithPrimary",
"logicalSessionTimeoutMinutes": null,
"setName": "rs",
- "maxSetVersion": 2,
+ "maxSetVersion": 1,
"maxElectionId": {
- "$oid": "000000000000000000000001"
+ "$oid": "000000000000000000000002"
}
}
}
diff --git a/src/mongo/client/sdam/server_description.cpp b/src/mongo/client/sdam/server_description.cpp
index 143902f021c..042ad352c28 100644
--- a/src/mongo/client/sdam/server_description.cpp
+++ b/src/mongo/client/sdam/server_description.cpp
@@ -277,16 +277,8 @@ const boost::optional<std::string>& ServerDescription::getSetName() const {
return _setName;
}
-const boost::optional<int>& ServerDescription::getSetVersion() const {
- return _setVersion;
-}
-
-const boost::optional<mongo::OID>& ServerDescription::getElectionId() const {
- return _electionId;
-}
-
const ElectionIdSetVersionPair ServerDescription::getElectionIdSetVersionPair() const {
- return ElectionIdSetVersionPair{getElectionId(), getSetVersion()};
+ return ElectionIdSetVersionPair{_electionId, _setVersion};
}
const boost::optional<HostAndPort>& ServerDescription::getPrimary() const {
diff --git a/src/mongo/client/sdam/server_description.h b/src/mongo/client/sdam/server_description.h
index 666acd1bc37..4a7505a0973 100644
--- a/src/mongo/client/sdam/server_description.h
+++ b/src/mongo/client/sdam/server_description.h
@@ -103,9 +103,6 @@ public:
const std::set<HostAndPort>& getHosts() const;
const std::set<HostAndPort>& getPassives() const;
const std::set<HostAndPort>& getArbiters() const;
- // TODO(SERVER-59409): remove next 2 methods and keep only pair getter.
- const boost::optional<int>& getSetVersion() const;
- const boost::optional<OID>& getElectionId() const;
const ElectionIdSetVersionPair getElectionIdSetVersionPair() const;
const boost::optional<TopologyVersion>& getTopologyVersion() const;
const boost::optional<TopologyDescriptionPtr> getTopologyDescription();
diff --git a/src/mongo/client/sdam/server_description_test.cpp b/src/mongo/client/sdam/server_description_test.cpp
index eac743ed6f5..b4cbc940c28 100644
--- a/src/mongo/client/sdam/server_description_test.cpp
+++ b/src/mongo/client/sdam/server_description_test.cpp
@@ -475,7 +475,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreSetVersionAndName) {
kBsonSetVersionName,
duration_cast<HelloRTT>(mongo::Milliseconds(40)));
auto description = ServerDescription(clockSource, response);
- ASSERT_EQUALS(kBsonSetVersionName.getIntField("setVersion"), description.getSetVersion());
+ ASSERT_EQUALS(kBsonSetVersionName.getIntField("setVersion"),
+ description.getElectionIdSetVersionPair().setVersion);
ASSERT_EQUALS(std::string(kBsonSetVersionName.getStringField("setName")),
description.getSetName());
}
@@ -484,7 +485,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreElectionId) {
auto response = HelloOutcome(
HostAndPort("foo:1234"), kBsonElectionId, duration_cast<HelloRTT>(mongo::Milliseconds(40)));
auto description = ServerDescription(clockSource, response);
- ASSERT_EQUALS(kBsonElectionId.getField("electionId").OID(), description.getElectionId());
+ ASSERT_EQUALS(kBsonElectionId.getField("electionId").OID(),
+ description.getElectionIdSetVersionPair().electionId);
}
TEST_F(ServerDescriptionTestFixture, ShouldStorePrimary) {
@@ -537,8 +539,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreCorrectDefaultValuesOnSuccess) {
ASSERT_EQUALS(static_cast<size_t>(0), description.getPassives().size());
ASSERT_EQUALS(static_cast<size_t>(0), description.getTags().size());
ASSERT_EQUALS(boost::none, description.getSetName());
- ASSERT_EQUALS(boost::none, description.getSetVersion());
- ASSERT_EQUALS(boost::none, description.getElectionId());
+ ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().setVersion);
+ ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().electionId);
ASSERT_EQUALS(boost::none, description.getPrimary());
ASSERT_EQUALS(boost::none, description.getLogicalSessionTimeoutMinutes());
ASSERT(boost::none == description.getTopologyVersion());
@@ -557,8 +559,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreCorrectDefaultValuesOnFailure) {
ASSERT_EQUALS(static_cast<size_t>(0), description.getPassives().size());
ASSERT_EQUALS(static_cast<size_t>(0), description.getTags().size());
ASSERT_EQUALS(boost::none, description.getSetName());
- ASSERT_EQUALS(boost::none, description.getSetVersion());
- ASSERT_EQUALS(boost::none, description.getElectionId());
+ ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().setVersion);
+ ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().electionId);
ASSERT_EQUALS(boost::none, description.getPrimary());
ASSERT_EQUALS(boost::none, description.getLogicalSessionTimeoutMinutes());
ASSERT(boost::none == description.getTopologyVersion());
diff --git a/src/mongo/client/sdam/topology_description.cpp b/src/mongo/client/sdam/topology_description.cpp
index 63ff6187edf..9dd586f8a18 100644
--- a/src/mongo/client/sdam/topology_description.cpp
+++ b/src/mongo/client/sdam/topology_description.cpp
@@ -26,17 +26,24 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork
+
#include "mongo/client/sdam/topology_description.h"
-#include "mongo/client/sdam/sdam_datatypes.h"
-#include "mongo/client/sdam/server_description.h"
-#include "mongo/db/wire_version.h"
-#include "mongo/util/fail_point.h"
#include <algorithm>
#include <cstring>
#include <iterator>
#include <memory>
+#include "mongo/client/sdam/sdam_datatypes.h"
+#include "mongo/client/sdam/server_description.h"
+#include "mongo/db/wire_version.h"
+#include "mongo/logv2/log.h"
+#include "mongo/util/fail_point.h"
+
+// Checkpoint to track when election Id and Set version is changed.
+MONGO_FAIL_POINT_DEFINE(maxElectionIdSetVersionPairUpdated);
+
namespace mongo::sdam {
MONGO_FAIL_POINT_DEFINE(topologyDescriptionInstallServerDescription);
@@ -85,16 +92,22 @@ const boost::optional<std::string>& TopologyDescription::getSetName() const {
return _setName;
}
-const boost::optional<int>& TopologyDescription::getMaxSetVersion() const {
- return _maxSetVersion;
-}
-
-const boost::optional<OID>& TopologyDescription::getMaxElectionId() const {
- return _maxElectionId;
+ElectionIdSetVersionPair TopologyDescription::getMaxElectionIdSetVersionPair() const {
+ return _maxElectionIdSetVersionPair;
}
-const ElectionIdSetVersionPair TopologyDescription::getMaxElectionIdSetVersionPair() const {
- return ElectionIdSetVersionPair{getMaxElectionId(), getMaxSetVersion()};
+void TopologyDescription::updateMaxElectionIdSetVersionPair(const ElectionIdSetVersionPair& pair) {
+ if (MONGO_unlikely(maxElectionIdSetVersionPairUpdated.shouldFail())) {
+ LOGV2(5940906,
+ "Fail point maxElectionIdSetVersionPairUpdated",
+ "topologyId"_attr = _id,
+ "primaryForSet"_attr = _setName ? *_setName : std::string("Unknown"),
+ "incomingElectionId"_attr = pair.electionId,
+ "currentMaxElectionId"_attr = _maxElectionIdSetVersionPair.electionId,
+ "incomingSetVersion"_attr = pair.setVersion,
+ "currentMaxSetVersion"_attr = _maxElectionIdSetVersionPair.setVersion);
+ }
+ _maxElectionIdSetVersionPair = pair;
}
const std::vector<ServerDescriptionPtr>& TopologyDescription::getServers() const {
@@ -287,12 +300,8 @@ BSONObj TopologyDescription::toBSON() {
bson << "compatibleError" << *_compatibleError;
}
- if (_maxSetVersion) {
- bson << "maxSetVersion" << *_maxSetVersion;
- }
-
- if (_maxElectionId) {
- bson << "maxElectionId" << *_maxElectionId;
+ if (_maxElectionIdSetVersionPair.anyDefined()) {
+ bson << "maxElectionIdSetVersion" << _maxElectionIdSetVersionPair.toBSON();
}
return bson.obj();
diff --git a/src/mongo/client/sdam/topology_description.h b/src/mongo/client/sdam/topology_description.h
index b7de69401a0..d8799356513 100644
--- a/src/mongo/client/sdam/topology_description.h
+++ b/src/mongo/client/sdam/topology_description.h
@@ -67,11 +67,7 @@ public:
const UUID& getId() const;
TopologyType getType() const;
const boost::optional<std::string>& getSetName() const;
-
- // TODO(SERVER-59409): remove next 2 methods and keep only pair getter.
- const boost::optional<int>& getMaxSetVersion() const;
- const boost::optional<OID>& getMaxElectionId() const;
- const ElectionIdSetVersionPair getMaxElectionIdSetVersionPair() const;
+ ElectionIdSetVersionPair getMaxElectionIdSetVersionPair() const;
const std::vector<ServerDescriptionPtr>& getServers() const;
@@ -105,15 +101,13 @@ private:
friend bool operator==(const TopologyDescription& lhs, const TopologyDescription& rhs) {
return std::tie(lhs._setName,
lhs._type,
- lhs._maxSetVersion,
- lhs._maxElectionId,
+ lhs._maxElectionIdSetVersionPair,
lhs._servers,
lhs._compatible,
lhs._logicalSessionTimeoutMinutes) ==
std::tie(rhs._setName,
rhs._type,
- rhs._maxSetVersion,
- rhs._maxElectionId,
+ rhs._maxElectionIdSetVersionPair,
rhs._servers,
rhs._compatible,
rhs._logicalSessionTimeoutMinutes);
@@ -147,6 +141,8 @@ private:
*/
void calculateLogicalSessionTimeout();
+ void updateMaxElectionIdSetVersionPair(const ElectionIdSetVersionPair& pair);
+
// unique id for this topology
UUID _id = UUID::gen();
@@ -156,13 +152,12 @@ private:
// setName: the replica set name. Default null.
boost::optional<std::string> _setName;
- // maxSetVersion: an integer or null. The largest setVersion ever reported by a primary.
- // Default null.
- boost::optional<int> _maxSetVersion;
-
- // maxElectionId: an ObjectId or null. The largest electionId ever reported by a primary.
- // Default null.
- boost::optional<OID> _maxElectionId;
+ // The tuple consisting of:
+ // maxSetVersion: an integer or none. The largest setVersion ever reported by a primary.
+ // Note: maxSetVersion can go backwards.
+ // maxElectionId: an ObjectId or none. The largest electionId ever reported by a primary.
+ // Default {none, none}.
+ ElectionIdSetVersionPair _maxElectionIdSetVersionPair;
// servers: a set of ServerDescription instances. Default contains one server:
// "localhost:27017", ServerType Unknown.
diff --git a/src/mongo/client/sdam/topology_description_builder.cpp b/src/mongo/client/sdam/topology_description_builder.cpp
index 15ed22f3242..6afc3d8126b 100644
--- a/src/mongo/client/sdam/topology_description_builder.cpp
+++ b/src/mongo/client/sdam/topology_description_builder.cpp
@@ -55,7 +55,7 @@ TopologyDescriptionBuilder& TopologyDescriptionBuilder::withSetName(const std::s
}
TopologyDescriptionBuilder& TopologyDescriptionBuilder::withMaxSetVersion(int maxSetVersion) {
- _instance->_maxSetVersion = maxSetVersion;
+ _instance->_maxElectionIdSetVersionPair.setVersion = maxSetVersion;
return *this;
}
@@ -67,7 +67,7 @@ TopologyDescriptionBuilder& TopologyDescriptionBuilder::withServers(
TopologyDescriptionBuilder& TopologyDescriptionBuilder::withMaxElectionID(
const OID& maxElectionId) {
- _instance->_maxElectionId = maxElectionId;
+ _instance->_maxElectionIdSetVersionPair.electionId = maxElectionId;
return *this;
}
diff --git a/src/mongo/client/sdam/topology_state_machine.cpp b/src/mongo/client/sdam/topology_state_machine.cpp
index 4aa91d07637..ed4d39abddf 100644
--- a/src/mongo/client/sdam/topology_state_machine.cpp
+++ b/src/mongo/client/sdam/topology_state_machine.cpp
@@ -27,12 +27,15 @@
* it in the license file.
*/
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork
+
#include "mongo/client/sdam/topology_state_machine.h"
#include <ostream>
+#include "mongo/client/sdam/election_id_set_version_pair.h"
#include "mongo/client/sdam/sdam_test_base.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/fail_point.h"
namespace mongo::sdam {
namespace {
@@ -274,28 +277,24 @@ void TopologyStateMachine::updateRSFromPrimary(TopologyDescription& topologyDesc
return;
}
- auto serverDescSetVersion = serverDescription->getSetVersion();
- auto serverDescElectionId = serverDescription->getElectionId();
- auto topologyMaxSetVersion = topologyDescription.getMaxSetVersion();
- auto topologyMaxElectionId = topologyDescription.getMaxElectionId();
- if (serverDescSetVersion && serverDescElectionId) {
- if (topologyMaxSetVersion && topologyMaxElectionId &&
- ((topologyMaxSetVersion > serverDescSetVersion) ||
- (topologyMaxSetVersion == serverDescSetVersion &&
- (*topologyMaxElectionId).compare(*serverDescElectionId) > 0))) {
- // stale primary
- installServerDescription(
- topologyDescription, std::make_shared<ServerDescription>(serverDescAddress), false);
- checkIfHasPrimary(topologyDescription, serverDescription);
- return;
- }
- modifyMaxElectionId(topologyDescription, *serverDescription->getElectionId());
+ const ElectionIdSetVersionPair incomingElectionIdSetVersion =
+ serverDescription->getElectionIdSetVersionPair();
+ const ElectionIdSetVersionPair currentMaxElectionIdSetVersion =
+ topologyDescription.getMaxElectionIdSetVersionPair();
+
+ if (incomingElectionIdSetVersion < currentMaxElectionIdSetVersion) {
+ LOGV2(5940901,
+ "Stale primary detected, marking its state as unknown",
+ "primary"_attr = serverDescription->getAddress(),
+ "incomingElectionIdSetVersion"_attr = incomingElectionIdSetVersion,
+ "currentMaxElectionIdSetVersion"_attr = currentMaxElectionIdSetVersion);
+ installServerDescription(
+ topologyDescription, std::make_shared<ServerDescription>(serverDescAddress), false);
+ checkIfHasPrimary(topologyDescription, serverDescription);
+ return;
}
- if (serverDescSetVersion &&
- (!topologyMaxSetVersion || (serverDescSetVersion > topologyMaxSetVersion))) {
- modifyMaxSetVersion(topologyDescription, *serverDescSetVersion);
- }
+ topologyDescription.updateMaxElectionIdSetVersionPair(incomingElectionIdSetVersion);
auto oldPrimaries = topologyDescription.findServers(
[serverDescAddress](const ServerDescriptionPtr& description) {
@@ -403,14 +402,4 @@ void TopologyStateMachine::installServerDescription(TopologyDescription& topolog
bool newServer) {
topologyDescription.installServerDescription(newServerDescription);
}
-
-void TopologyStateMachine::modifyMaxElectionId(TopologyDescription& topologyDescription,
- const OID& newMaxElectionId) {
- topologyDescription._maxElectionId = newMaxElectionId;
-}
-
-void TopologyStateMachine::modifyMaxSetVersion(TopologyDescription& topologyDescription,
- int& newMaxSetVersion) {
- topologyDescription._maxSetVersion = newMaxSetVersion;
-}
} // namespace mongo::sdam
diff --git a/src/mongo/client/sdam/topology_state_machine.h b/src/mongo/client/sdam/topology_state_machine.h
index c946de9c0e3..158377c3346 100644
--- a/src/mongo/client/sdam/topology_state_machine.h
+++ b/src/mongo/client/sdam/topology_state_machine.h
@@ -95,9 +95,6 @@ private:
void modifySetName(TopologyDescription& topologyDescription,
const boost::optional<std::string>& setName);
- void modifyMaxElectionId(TopologyDescription& topologyDescription, const OID& newMaxElectionId);
- void modifyMaxSetVersion(TopologyDescription& topologyDescription, int& newMaxSetVersion);
-
StateTransitionTable _stt;
SdamConfiguration _config;
};
diff --git a/src/mongo/client/streamable_replica_set_monitor.cpp b/src/mongo/client/streamable_replica_set_monitor.cpp
index 86d08e595db..a3ac65a4f8d 100644
--- a/src/mongo/client/streamable_replica_set_monitor.cpp
+++ b/src/mongo/client/streamable_replica_set_monitor.cpp
@@ -727,6 +727,21 @@ void StreamableReplicaSetMonitor::onTopologyDescriptionChangedEvent(
}
}
}
+
+ if (auto previousMaxElectionIdSetVersionPair =
+ previousDescription->getMaxElectionIdSetVersionPair(),
+ newMaxElectionIdSetVersionPair = newDescription->getMaxElectionIdSetVersionPair();
+ setVersionWentBackwards(previousMaxElectionIdSetVersionPair,
+ newMaxElectionIdSetVersionPair)) {
+ // The previous primary was unable to reach consensus for the config with
+ // higher version and it was abandoned after failover.
+ LOGV2(5940902,
+ "Max known Set version coming from new primary forces to rollback it backwards",
+ "replicaSet"_attr = getName(),
+ "newElectionIdSetVersion"_attr = newMaxElectionIdSetVersionPair.setVersion,
+ "previousMaxElectionIdSetVersion"_attr =
+ previousMaxElectionIdSetVersionPair.setVersion);
+ }
}
void StreamableReplicaSetMonitor::onServerHeartbeatSucceededEvent(const HostAndPort& hostAndPort,
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index f49c0d0d4d2..edd4a533241 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -142,6 +142,12 @@ MONGO_FAIL_POINT_DEFINE(doNotRemoveNewlyAddedOnHeartbeats);
MONGO_FAIL_POINT_DEFINE(hangDuringAutomaticReconfig);
// Make reconfig command hang before validating new config.
MONGO_FAIL_POINT_DEFINE(ReconfigHangBeforeConfigValidationCheck);
+// Blocks after reconfig runs.
+MONGO_FAIL_POINT_DEFINE(hangAfterReconfig);
+// Allows skipping fetching the config from ping sender.
+MONGO_FAIL_POINT_DEFINE(skipBeforeFetchingConfig);
+// Hang after grabbing the RSTL but before we start rejecting writes.
+MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL);
// Number of times we tried to go live as a secondary.
Counter64 attemptsToBecomeSecondary;
@@ -3689,6 +3695,11 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx,
configStateGuard.dismiss();
_finishReplSetReconfig(opCtx, newConfig, force, myIndex);
+ if (MONGO_unlikely(hangAfterReconfig.shouldFail())) {
+ LOGV2(5940904, "Hanging after reconfig on fail point");
+ hangAfterReconfig.pauseWhileSet();
+ }
+
return Status::OK();
}
@@ -5461,6 +5472,29 @@ bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal_inlock() c
return _rsConfig.getWriteConcernMajorityShouldJournal();
}
+namespace {
+// Fail point to block and optionally skip fetching config. Supported arguments:
+// versionAndTerm: [ v, t ]
+void _handleBeforeFetchingConfig(const BSONObj& customArgs,
+ ConfigVersionAndTerm versionAndTerm,
+ bool* skipFetchingConfig) {
+ if (customArgs.hasElement("versionAndTerm")) {
+ const auto nested = customArgs["versionAndTerm"].embeddedObject();
+ std::vector<BSONElement> elements;
+ nested.elems(elements);
+ invariant(elements.size() == 2);
+ ConfigVersionAndTerm patternVersionAndTerm =
+ ConfigVersionAndTerm(elements[0].numberInt(), elements[1].numberInt());
+ if (patternVersionAndTerm == versionAndTerm) {
+ LOGV2(5940905,
+ "Failpoint is activated to skip fetching config for version and term",
+ "versionAndTerm"_attr = versionAndTerm);
+ *skipFetchingConfig = true;
+ }
+ }
+}
+} // namespace
+
Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
ReplSetHeartbeatResponse* response) {
{
@@ -5514,8 +5548,16 @@ Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgs
// We cannot cancel the enqueued heartbeat, but either this one or the enqueued heartbeat
// will trigger reconfig, which cancels and reschedules all heartbeats.
else if (args.hasSender()) {
- LOGV2(21401, "Scheduling heartbeat to fetch a newer config", attr);
- _scheduleHeartbeatToTarget_inlock(senderHost, now);
+ bool inTestSkipFetchingConfig = false;
+ skipBeforeFetchingConfig.execute([&](const BSONObj& customArgs) {
+ _handleBeforeFetchingConfig(
+ customArgs, args.getConfigVersionAndTerm(), &inTestSkipFetchingConfig);
+ });
+
+ if (!inTestSkipFetchingConfig) {
+ LOGV2(21401, "Scheduling heartbeat to fetch a newer config", attr);
+ _scheduleHeartbeatToTarget_inlock(senderHost, now);
+ }
}
} else if (result.isOK() && args.getPrimaryId() >= 0 &&
(!response->hasPrimaryId() || response->getPrimaryId() != args.getPrimaryId())) {