From b996dfbc5135e94b38fb86014c656930cdc2f533 Mon Sep 17 00:00:00 2001 From: Andrew Shuvalov Date: Fri, 15 Oct 2021 19:21:43 +0000 Subject: SERVER-59409 backport BACKPORT-10657: Fix race between reconfig replication and stepup part 4 --- jstests/sharding/reconfig_race_with_failover.js | 65 +++++++++ .../client/sdam/election_id_set_version_pair.h | 25 +--- .../sdam/election_id_set_version_pair_test.cpp | 42 +++--- .../json_tests/sdam_tests/rs/null_election_id.json | 30 +++-- .../sdam_tests/rs/set_version_can_rollback.json | 146 +++++++++++++++++++++ .../rs/setversion_without_electionid.json | 10 +- .../rs/use_setversion_without_electionid.json | 32 +++-- src/mongo/client/sdam/server_description.cpp | 10 +- src/mongo/client/sdam/server_description.h | 3 - src/mongo/client/sdam/server_description_test.cpp | 14 +- src/mongo/client/sdam/topology_description.cpp | 45 ++++--- src/mongo/client/sdam/topology_description.h | 27 ++-- .../client/sdam/topology_description_builder.cpp | 4 +- src/mongo/client/sdam/topology_state_machine.cpp | 49 +++---- src/mongo/client/sdam/topology_state_machine.h | 3 - .../client/streamable_replica_set_monitor.cpp | 15 +++ src/mongo/db/repl/replication_coordinator_impl.cpp | 46 ++++++- 17 files changed, 400 insertions(+), 166 deletions(-) create mode 100644 jstests/sharding/reconfig_race_with_failover.js create mode 100644 src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json diff --git a/jstests/sharding/reconfig_race_with_failover.js b/jstests/sharding/reconfig_race_with_failover.js new file mode 100644 index 00000000000..a63c40596cf --- /dev/null +++ b/jstests/sharding/reconfig_race_with_failover.js @@ -0,0 +1,65 @@ +/* + * Tests that if reconfig did not replicate before step up, the new primary + * is not stale because its new election Id takes precedence over Set version when + * comparing. + * + * @tags: [does_not_support_stepdowns, multiversion_incompatible] + */ +(function() { +"use strict"; +load("jstests/libs/fail_point_util.js"); +load('jstests/libs/parallel_shell_helpers.js'); +load("jstests/sharding/libs/sharded_index_util.js"); + +// Skip db hash check because secondary is left with a different config. +TestData.skipCheckDBHashes = true; + +const st = new ShardingTest({shards: {rs0: {nodes: [{}, {}, {rsConfig: {priority: 0}}]}}}); +const rst = st.rs0; +const primary = rst.getPrimary(); +if (primary !== nodes[0]) { + st.stop(); + return; // For simplicity. +} + +const config = rst.getReplSetConfigFromNode(); +jsTestLog(`Initial config ${tojson(config)}`); +config.version++; + +const versionToBlock = config.version; +const termToBlock = config.term; + +let pauseInReconfig = configureFailPoint(primary, "hangAfterReconfig"); +let maxElectionIdSetVersionPairUpdated = + configureFailPoint(primary, "maxElectionIdSetVersionPairUpdated"); + +// Fail points to prevent the new config from being replicated. +let testFpHangBeforeFetchingConfig1 = configureFailPoint( + nodes[1], "skipBeforeFetchingConfig", {versionAndTerm: [versionToBlock, termToBlock]}); +let testFpHangBeforeFetchingConfig2 = configureFailPoint( + nodes[2], "skipBeforeFetchingConfig", {versionAndTerm: [versionToBlock, termToBlock]}); +let runReconfigJoin = startParallelShell( + funWithArgs(function(config) { + jsTestLog(`Run reconfig`); + let result = db.getSiblingDB("admin").runCommand({replSetReconfig: config}); + jsTestLog(`Reconfig finished, result ${tojson(result)}`); + }, config), primary.port); +pauseInReconfig.wait(); +maxElectionIdSetVersionPairUpdated.wait(); + +// After step up the new primary still has the Set at previous version. +const nextPrimary = rst.getSecondary(); +jsTestLog("Step Up"); +nextPrimary.adminCommand({replSetStepUp: 1}); +pauseInReconfig.off(); +runReconfigJoin(); + +testFpHangBeforeFetchingConfig1.off(); +testFpHangBeforeFetchingConfig2.off(); + +// This command fails if mongos' RSM was stuck with stale primary unable to rollback Set version. +let res = ShardedIndexUtil.getPerShardIndexes(st.s.getCollection("config.system.sessions")); +jsTestLog(`Aggregate run on Mongos ${tojson(res)}`); + +st.stop(); +}()); diff --git a/src/mongo/client/sdam/election_id_set_version_pair.h b/src/mongo/client/sdam/election_id_set_version_pair.h index 296f94b6116..0325eed418d 100644 --- a/src/mongo/client/sdam/election_id_set_version_pair.h +++ b/src/mongo/client/sdam/election_id_set_version_pair.h @@ -43,8 +43,8 @@ struct ElectionIdSetVersionPair { return electionId && setVersion; } - bool allUndefined() const { - return !electionId && !setVersion; + bool anyDefined() const { + return electionId || setVersion; } bool anyUndefined() const { @@ -54,11 +54,6 @@ struct ElectionIdSetVersionPair { BSONObj toBSON() const; }; -inline bool electionIdEqual(const ElectionIdSetVersionPair& p1, - const ElectionIdSetVersionPair& p2) { - return p1.electionId && p2.electionId && (*p1.electionId).compare(*p2.electionId) == 0; -} - inline bool operator<(const ElectionIdSetVersionPair& p1, const ElectionIdSetVersionPair& p2) { if (p1.anyUndefined() && p2.allDefined()) { return true; @@ -82,22 +77,6 @@ inline bool setVersionWentBackwards(const ElectionIdSetVersionPair& current, return current.setVersion && incoming.setVersion && *current.setVersion > *incoming.setVersion; } -/** - * @return true if 'incoming' is RS primary and fields have consistent values. - */ -inline bool isIncomingPrimaryConsistent(const ElectionIdSetVersionPair& current, - const ElectionIdSetVersionPair& incoming) { - // If Set version goes backwards the term should advance, because it means - // failover happened. This is possible if the previous primary failed to replicate - // new Set version before failover. When it happens, we will rollback the Set version. - if (setVersionWentBackwards(current, incoming)) { - return !current.electionId || !incoming.electionId || - (*current.electionId).compare(*incoming.electionId) < 0; - } - - return true; -} - inline BSONObj ElectionIdSetVersionPair::toBSON() const { BSONObjBuilder bob; if (electionId) { diff --git a/src/mongo/client/sdam/election_id_set_version_pair_test.cpp b/src/mongo/client/sdam/election_id_set_version_pair_test.cpp index 93a95c10097..dbb6550aa0e 100644 --- a/src/mongo/client/sdam/election_id_set_version_pair_test.cpp +++ b/src/mongo/client/sdam/election_id_set_version_pair_test.cpp @@ -49,8 +49,6 @@ public: kNotComparable }; - enum class Consistency { kConsistent, kInconsistent }; - struct TestCase { // Curent topology max fields. const boost::optional kTerm1; @@ -59,7 +57,6 @@ public: const boost::optional kTerm2; const boost::optional kSet2; const Compare compare; - const Consistency consistent; }; std::string log(const TestCase& t, int testNum) { @@ -89,54 +86,51 @@ public: ASSERT_FALSE(p1 == p2) << log(t, testNum); break; } - - const bool isConsistent = isIncomingPrimaryConsistent(p1, p2); - ASSERT_EQ(t.consistent == Consistency::kConsistent, isConsistent) << log(t, testNum); } }; TEST_F(ElectionIdSetVersionPairTest, ExpectedOutcome) { std::vector tests = { // At startup, both current fields are not set. Field set > unset. - {kNullOid, kNullSet, kOidOne, 1, Compare::kLess, Consistency::kConsistent}, + {kNullOid, kNullSet, kOidOne, 1, Compare::kLess}, - {kOidOne, 1, kOidOne, 1, Compare::kEquals, Consistency::kConsistent}, + {kOidOne, 1, kOidOne, 1, Compare::kEquals}, // One field is not set. This should never happen however added for better // coverage for malformed protocol. - {kNullOid, 1, kOidOne, 1, Compare::kLess, Consistency::kConsistent}, - {kOidOne, kNullSet, kOidOne, 1, Compare::kLess, Consistency::kConsistent}, - {kOidOne, 1, kNullOid, 1, Compare::kGreater, Consistency::kConsistent}, - {kOidOne, 1, kOidOne, kNullSet, Compare::kGreater, Consistency::kConsistent}, + {kNullOid, 1, kOidOne, 1, Compare::kLess}, + {kOidOne, kNullSet, kOidOne, 1, Compare::kLess}, + {kOidOne, 1, kNullOid, 1, Compare::kGreater}, + {kOidOne, 1, kOidOne, kNullSet, Compare::kGreater}, // Primary advanced one way or another. "Less" means current < incoming. - {kOidOne, 1, kOidTwo, 1, Compare::kLess, Consistency::kConsistent}, - {kOidOne, 1, kOidOne, 2, Compare::kLess, Consistency::kConsistent}, + {kOidOne, 1, kOidTwo, 1, Compare::kLess}, + {kOidOne, 1, kOidOne, 2, Compare::kLess}, // Primary advanced but current state is incomplete. - {kNullOid, 1, kOidTwo, 1, Compare::kLess, Consistency::kConsistent}, - {kNullOid, 1, kOidOne, 2, Compare::kLess, Consistency::kConsistent}, - {kOidOne, kNullSet, kOidTwo, 1, Compare::kLess, Consistency::kConsistent}, - {kOidOne, kNullSet, kOidOne, 2, Compare::kLess, Consistency::kConsistent}, + {kNullOid, 1, kOidTwo, 1, Compare::kLess}, + {kNullOid, 1, kOidOne, 2, Compare::kLess}, + {kOidOne, kNullSet, kOidTwo, 1, Compare::kLess}, + {kOidOne, kNullSet, kOidOne, 2, Compare::kLess}, // Primary went backwards one way or another. // Inconsistent because Set version went backwards without Term being changed (same // primary). - {kOidTwo, 2, kOidTwo, 1, Compare::kGreater, Consistency::kInconsistent}, - {kOidTwo, 2, kOidOne, 2, Compare::kGreater, Consistency::kConsistent}, + {kOidTwo, 2, kOidTwo, 1, Compare::kGreater}, + {kOidTwo, 2, kOidOne, 2, Compare::kGreater}, // Primary went backwards with current state incomplete. - {kNullOid, 2, kOidTwo, 1, Compare::kLess, Consistency::kConsistent}, - {kOidTwo, kNullSet, kOidOne, 1, Compare::kGreater, Consistency::kConsistent}, + {kNullOid, 2, kOidTwo, 1, Compare::kLess}, + {kOidTwo, kNullSet, kOidOne, 1, Compare::kGreater}, // Stale primary case when Term went backwards but Set version advanced. // This case is 'consistent' because it's normal for stale primary. The // important part is that 'current' > 'incoming', which makes the node 'Unknown'. - {kOidTwo, 1, kOidOne, 2, Compare::kGreater, Consistency::kConsistent}, + {kOidTwo, 1, kOidOne, 2, Compare::kGreater}, // Previous primary was unable to replicate the Set version before failover, // new primary forces it to be rolled back. - {kOidOne, 2, kOidTwo, 1, Compare::kLess, Consistency::kConsistent}, + {kOidOne, 2, kOidTwo, 1, Compare::kLess}, }; int testNum = 0; diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json index 3de0a74e413..3d6f7655576 100644 --- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json +++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/null_election_id.json @@ -120,15 +120,18 @@ "outcome": { "servers": { "a:27017": { + "type": "Unknown", + "setName": null, + "electionId": null, + "setVersion": null + }, + "b:27017": { "type": "RSPrimary", "setName": "rs", "setVersion": 1, - "electionId": null - }, - "b:27017": { - "type": "Unknown", - "setName": null, - "electionId": null + "electionId": { + "$oid": "000000000000000000000002" + } }, "c:27017": { "type": "Unknown", @@ -170,15 +173,18 @@ "outcome": { "servers": { "a:27017": { + "type": "Unknown", + "setName": null, + "electionId": null, + "setVersion": null + }, + "b:27017": { "type": "RSPrimary", "setName": "rs", "setVersion": 1, - "electionId": null - }, - "b:27017": { - "type": "Unknown", - "setName": null, - "electionId": null + "electionId": { + "$oid": "000000000000000000000002" + } }, "c:27017": { "type": "Unknown", diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json new file mode 100644 index 00000000000..d3fa9acb3c2 --- /dev/null +++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/set_version_can_rollback.json @@ -0,0 +1,146 @@ +{ + "description": "Set version rolls back after new primary with higher election Id", + "uri": "mongodb://a/?replicaSet=rs", + "phases": [ + { + "responses": [ + [ + "a:27017", + { + "ok": 1, + "ismaster": true, + "hosts": [ + "a:27017", + "b:27017" + ], + "setName": "rs", + "setVersion": 2, + "electionId": { + "$oid": "000000000000000000000001" + }, + "minWireVersion": 0, + "maxWireVersion": 6 + } + ] + ], + "outcome": { + "servers": { + "a:27017": { + "type": "RSPrimary", + "setName": "rs", + "setVersion": 2, + "electionId": { + "$oid": "000000000000000000000001" + } + }, + "b:27017": { + "type": "Unknown", + "setName": null, + "electionId": null + } + }, + "topologyType": "ReplicaSetWithPrimary", + "logicalSessionTimeoutMinutes": null, + "setName": "rs", + "maxSetVersion": 2, + "maxElectionId": { + "$oid": "000000000000000000000001" + } + } + }, + { + "_comment": "Response from new primary with newer election Id", + "responses": [ + [ + "b:27017", + { + "ok": 1, + "ismaster": true, + "hosts": [ + "a:27017", + "b:27017" + ], + "setName": "rs", + "setVersion": 1, + "electionId": { + "$oid": "000000000000000000000002" + }, + "minWireVersion": 0, + "maxWireVersion": 6 + } + ] + ], + "outcome": { + "servers": { + "a:27017": { + "type": "Unknown", + "setName": null, + "electionId": null + }, + "b:27017": { + "type": "RSPrimary", + "setName": "rs", + "setVersion": 1, + "electionId": { + "$oid": "000000000000000000000002" + } + } + }, + "topologyType": "ReplicaSetWithPrimary", + "logicalSessionTimeoutMinutes": null, + "setName": "rs", + "maxSetVersion": 1, + "maxElectionId": { + "$oid": "000000000000000000000002" + } + } + }, + { + "_comment": "Response from stale primary", + "responses": [ + [ + "a:27017", + { + "ok": 1, + "ismaster": true, + "hosts": [ + "a:27017", + "b:27017" + ], + "setName": "rs", + "setVersion": 2, + "electionId": { + "$oid": "000000000000000000000001" + }, + "minWireVersion": 0, + "maxWireVersion": 6 + } + ] + ], + "outcome": { + "servers": { + "a:27017": { + "type": "Unknown", + "setName": null, + "electionId": null + }, + "b:27017": { + "type": "RSPrimary", + "setName": "rs", + "setVersion": 1, + "electionId": { + "$oid": "000000000000000000000002" + } + } + }, + "topologyType": "ReplicaSetWithPrimary", + "logicalSessionTimeoutMinutes": null, + "setName": "rs", + "maxSetVersion": 1, + "maxElectionId": { + "$oid": "000000000000000000000002" + } + } + } + ] +} diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json index 0500c6d1575..07ec55cee1c 100644 --- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json +++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/setversion_without_electionid.json @@ -61,14 +61,14 @@ "outcome": { "servers": { "a:27017": { - "type": "Unknown", - "setName": null, + "type": "RSPrimary", + "setName": "rs", + "setVersion": 2, "electionId": null }, "b:27017": { - "type": "RSPrimary", - "setName": "rs", - "setVersion": 1, + "type": "Unknown", + "setName": null, "electionId": null } }, diff --git a/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json b/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json index 16225d6b83f..365af2f7e19 100644 --- a/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json +++ b/src/mongo/client/sdam/json_tests/sdam_tests/rs/use_setversion_without_electionid.json @@ -69,20 +69,23 @@ "outcome": { "servers": { "a:27017": { + "type": "RSPrimary", + "setName": "rs", + "setVersion": 1, + "electionId": { + "$oid": "000000000000000000000001" + } + }, + "b:27017": { "type": "Unknown", "setName": null, "electionId": null - }, - "b:27017": { - "type": "RSPrimary", - "setName": "rs", - "setVersion": 2 } }, "topologyType": "ReplicaSetWithPrimary", "logicalSessionTimeoutMinutes": null, "setName": "rs", - "maxSetVersion": 2, + "maxSetVersion": 1, "maxElectionId": { "$oid": "000000000000000000000001" } @@ -112,22 +115,25 @@ "outcome": { "servers": { "a:27017": { + "type": "RSPrimary", + "setName": "rs", + "setVersion": 1, + "electionId": { + "$oid": "000000000000000000000002" + } + }, + "b:27017": { "type": "Unknown", "setName": null, "electionId": null - }, - "b:27017": { - "type": "RSPrimary", - "setName": "rs", - "setVersion": 2 } }, "topologyType": "ReplicaSetWithPrimary", "logicalSessionTimeoutMinutes": null, "setName": "rs", - "maxSetVersion": 2, + "maxSetVersion": 1, "maxElectionId": { - "$oid": "000000000000000000000001" + "$oid": "000000000000000000000002" } } } diff --git a/src/mongo/client/sdam/server_description.cpp b/src/mongo/client/sdam/server_description.cpp index 143902f021c..042ad352c28 100644 --- a/src/mongo/client/sdam/server_description.cpp +++ b/src/mongo/client/sdam/server_description.cpp @@ -277,16 +277,8 @@ const boost::optional& ServerDescription::getSetName() const { return _setName; } -const boost::optional& ServerDescription::getSetVersion() const { - return _setVersion; -} - -const boost::optional& ServerDescription::getElectionId() const { - return _electionId; -} - const ElectionIdSetVersionPair ServerDescription::getElectionIdSetVersionPair() const { - return ElectionIdSetVersionPair{getElectionId(), getSetVersion()}; + return ElectionIdSetVersionPair{_electionId, _setVersion}; } const boost::optional& ServerDescription::getPrimary() const { diff --git a/src/mongo/client/sdam/server_description.h b/src/mongo/client/sdam/server_description.h index 666acd1bc37..4a7505a0973 100644 --- a/src/mongo/client/sdam/server_description.h +++ b/src/mongo/client/sdam/server_description.h @@ -103,9 +103,6 @@ public: const std::set& getHosts() const; const std::set& getPassives() const; const std::set& getArbiters() const; - // TODO(SERVER-59409): remove next 2 methods and keep only pair getter. - const boost::optional& getSetVersion() const; - const boost::optional& getElectionId() const; const ElectionIdSetVersionPair getElectionIdSetVersionPair() const; const boost::optional& getTopologyVersion() const; const boost::optional getTopologyDescription(); diff --git a/src/mongo/client/sdam/server_description_test.cpp b/src/mongo/client/sdam/server_description_test.cpp index eac743ed6f5..b4cbc940c28 100644 --- a/src/mongo/client/sdam/server_description_test.cpp +++ b/src/mongo/client/sdam/server_description_test.cpp @@ -475,7 +475,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreSetVersionAndName) { kBsonSetVersionName, duration_cast(mongo::Milliseconds(40))); auto description = ServerDescription(clockSource, response); - ASSERT_EQUALS(kBsonSetVersionName.getIntField("setVersion"), description.getSetVersion()); + ASSERT_EQUALS(kBsonSetVersionName.getIntField("setVersion"), + description.getElectionIdSetVersionPair().setVersion); ASSERT_EQUALS(std::string(kBsonSetVersionName.getStringField("setName")), description.getSetName()); } @@ -484,7 +485,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreElectionId) { auto response = HelloOutcome( HostAndPort("foo:1234"), kBsonElectionId, duration_cast(mongo::Milliseconds(40))); auto description = ServerDescription(clockSource, response); - ASSERT_EQUALS(kBsonElectionId.getField("electionId").OID(), description.getElectionId()); + ASSERT_EQUALS(kBsonElectionId.getField("electionId").OID(), + description.getElectionIdSetVersionPair().electionId); } TEST_F(ServerDescriptionTestFixture, ShouldStorePrimary) { @@ -537,8 +539,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreCorrectDefaultValuesOnSuccess) { ASSERT_EQUALS(static_cast(0), description.getPassives().size()); ASSERT_EQUALS(static_cast(0), description.getTags().size()); ASSERT_EQUALS(boost::none, description.getSetName()); - ASSERT_EQUALS(boost::none, description.getSetVersion()); - ASSERT_EQUALS(boost::none, description.getElectionId()); + ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().setVersion); + ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().electionId); ASSERT_EQUALS(boost::none, description.getPrimary()); ASSERT_EQUALS(boost::none, description.getLogicalSessionTimeoutMinutes()); ASSERT(boost::none == description.getTopologyVersion()); @@ -557,8 +559,8 @@ TEST_F(ServerDescriptionTestFixture, ShouldStoreCorrectDefaultValuesOnFailure) { ASSERT_EQUALS(static_cast(0), description.getPassives().size()); ASSERT_EQUALS(static_cast(0), description.getTags().size()); ASSERT_EQUALS(boost::none, description.getSetName()); - ASSERT_EQUALS(boost::none, description.getSetVersion()); - ASSERT_EQUALS(boost::none, description.getElectionId()); + ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().setVersion); + ASSERT_EQUALS(boost::none, description.getElectionIdSetVersionPair().electionId); ASSERT_EQUALS(boost::none, description.getPrimary()); ASSERT_EQUALS(boost::none, description.getLogicalSessionTimeoutMinutes()); ASSERT(boost::none == description.getTopologyVersion()); diff --git a/src/mongo/client/sdam/topology_description.cpp b/src/mongo/client/sdam/topology_description.cpp index 63ff6187edf..9dd586f8a18 100644 --- a/src/mongo/client/sdam/topology_description.cpp +++ b/src/mongo/client/sdam/topology_description.cpp @@ -26,17 +26,24 @@ * exception statement from all source files in the program, then also delete * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork + #include "mongo/client/sdam/topology_description.h" -#include "mongo/client/sdam/sdam_datatypes.h" -#include "mongo/client/sdam/server_description.h" -#include "mongo/db/wire_version.h" -#include "mongo/util/fail_point.h" #include #include #include #include +#include "mongo/client/sdam/sdam_datatypes.h" +#include "mongo/client/sdam/server_description.h" +#include "mongo/db/wire_version.h" +#include "mongo/logv2/log.h" +#include "mongo/util/fail_point.h" + +// Checkpoint to track when election Id and Set version is changed. +MONGO_FAIL_POINT_DEFINE(maxElectionIdSetVersionPairUpdated); + namespace mongo::sdam { MONGO_FAIL_POINT_DEFINE(topologyDescriptionInstallServerDescription); @@ -85,16 +92,22 @@ const boost::optional& TopologyDescription::getSetName() const { return _setName; } -const boost::optional& TopologyDescription::getMaxSetVersion() const { - return _maxSetVersion; -} - -const boost::optional& TopologyDescription::getMaxElectionId() const { - return _maxElectionId; +ElectionIdSetVersionPair TopologyDescription::getMaxElectionIdSetVersionPair() const { + return _maxElectionIdSetVersionPair; } -const ElectionIdSetVersionPair TopologyDescription::getMaxElectionIdSetVersionPair() const { - return ElectionIdSetVersionPair{getMaxElectionId(), getMaxSetVersion()}; +void TopologyDescription::updateMaxElectionIdSetVersionPair(const ElectionIdSetVersionPair& pair) { + if (MONGO_unlikely(maxElectionIdSetVersionPairUpdated.shouldFail())) { + LOGV2(5940906, + "Fail point maxElectionIdSetVersionPairUpdated", + "topologyId"_attr = _id, + "primaryForSet"_attr = _setName ? *_setName : std::string("Unknown"), + "incomingElectionId"_attr = pair.electionId, + "currentMaxElectionId"_attr = _maxElectionIdSetVersionPair.electionId, + "incomingSetVersion"_attr = pair.setVersion, + "currentMaxSetVersion"_attr = _maxElectionIdSetVersionPair.setVersion); + } + _maxElectionIdSetVersionPair = pair; } const std::vector& TopologyDescription::getServers() const { @@ -287,12 +300,8 @@ BSONObj TopologyDescription::toBSON() { bson << "compatibleError" << *_compatibleError; } - if (_maxSetVersion) { - bson << "maxSetVersion" << *_maxSetVersion; - } - - if (_maxElectionId) { - bson << "maxElectionId" << *_maxElectionId; + if (_maxElectionIdSetVersionPair.anyDefined()) { + bson << "maxElectionIdSetVersion" << _maxElectionIdSetVersionPair.toBSON(); } return bson.obj(); diff --git a/src/mongo/client/sdam/topology_description.h b/src/mongo/client/sdam/topology_description.h index b7de69401a0..d8799356513 100644 --- a/src/mongo/client/sdam/topology_description.h +++ b/src/mongo/client/sdam/topology_description.h @@ -67,11 +67,7 @@ public: const UUID& getId() const; TopologyType getType() const; const boost::optional& getSetName() const; - - // TODO(SERVER-59409): remove next 2 methods and keep only pair getter. - const boost::optional& getMaxSetVersion() const; - const boost::optional& getMaxElectionId() const; - const ElectionIdSetVersionPair getMaxElectionIdSetVersionPair() const; + ElectionIdSetVersionPair getMaxElectionIdSetVersionPair() const; const std::vector& getServers() const; @@ -105,15 +101,13 @@ private: friend bool operator==(const TopologyDescription& lhs, const TopologyDescription& rhs) { return std::tie(lhs._setName, lhs._type, - lhs._maxSetVersion, - lhs._maxElectionId, + lhs._maxElectionIdSetVersionPair, lhs._servers, lhs._compatible, lhs._logicalSessionTimeoutMinutes) == std::tie(rhs._setName, rhs._type, - rhs._maxSetVersion, - rhs._maxElectionId, + rhs._maxElectionIdSetVersionPair, rhs._servers, rhs._compatible, rhs._logicalSessionTimeoutMinutes); @@ -147,6 +141,8 @@ private: */ void calculateLogicalSessionTimeout(); + void updateMaxElectionIdSetVersionPair(const ElectionIdSetVersionPair& pair); + // unique id for this topology UUID _id = UUID::gen(); @@ -156,13 +152,12 @@ private: // setName: the replica set name. Default null. boost::optional _setName; - // maxSetVersion: an integer or null. The largest setVersion ever reported by a primary. - // Default null. - boost::optional _maxSetVersion; - - // maxElectionId: an ObjectId or null. The largest electionId ever reported by a primary. - // Default null. - boost::optional _maxElectionId; + // The tuple consisting of: + // maxSetVersion: an integer or none. The largest setVersion ever reported by a primary. + // Note: maxSetVersion can go backwards. + // maxElectionId: an ObjectId or none. The largest electionId ever reported by a primary. + // Default {none, none}. + ElectionIdSetVersionPair _maxElectionIdSetVersionPair; // servers: a set of ServerDescription instances. Default contains one server: // "localhost:27017", ServerType Unknown. diff --git a/src/mongo/client/sdam/topology_description_builder.cpp b/src/mongo/client/sdam/topology_description_builder.cpp index 15ed22f3242..6afc3d8126b 100644 --- a/src/mongo/client/sdam/topology_description_builder.cpp +++ b/src/mongo/client/sdam/topology_description_builder.cpp @@ -55,7 +55,7 @@ TopologyDescriptionBuilder& TopologyDescriptionBuilder::withSetName(const std::s } TopologyDescriptionBuilder& TopologyDescriptionBuilder::withMaxSetVersion(int maxSetVersion) { - _instance->_maxSetVersion = maxSetVersion; + _instance->_maxElectionIdSetVersionPair.setVersion = maxSetVersion; return *this; } @@ -67,7 +67,7 @@ TopologyDescriptionBuilder& TopologyDescriptionBuilder::withServers( TopologyDescriptionBuilder& TopologyDescriptionBuilder::withMaxElectionID( const OID& maxElectionId) { - _instance->_maxElectionId = maxElectionId; + _instance->_maxElectionIdSetVersionPair.electionId = maxElectionId; return *this; } diff --git a/src/mongo/client/sdam/topology_state_machine.cpp b/src/mongo/client/sdam/topology_state_machine.cpp index 4aa91d07637..ed4d39abddf 100644 --- a/src/mongo/client/sdam/topology_state_machine.cpp +++ b/src/mongo/client/sdam/topology_state_machine.cpp @@ -27,12 +27,15 @@ * it in the license file. */ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork + #include "mongo/client/sdam/topology_state_machine.h" #include +#include "mongo/client/sdam/election_id_set_version_pair.h" #include "mongo/client/sdam/sdam_test_base.h" #include "mongo/logv2/log.h" +#include "mongo/util/fail_point.h" namespace mongo::sdam { namespace { @@ -274,28 +277,24 @@ void TopologyStateMachine::updateRSFromPrimary(TopologyDescription& topologyDesc return; } - auto serverDescSetVersion = serverDescription->getSetVersion(); - auto serverDescElectionId = serverDescription->getElectionId(); - auto topologyMaxSetVersion = topologyDescription.getMaxSetVersion(); - auto topologyMaxElectionId = topologyDescription.getMaxElectionId(); - if (serverDescSetVersion && serverDescElectionId) { - if (topologyMaxSetVersion && topologyMaxElectionId && - ((topologyMaxSetVersion > serverDescSetVersion) || - (topologyMaxSetVersion == serverDescSetVersion && - (*topologyMaxElectionId).compare(*serverDescElectionId) > 0))) { - // stale primary - installServerDescription( - topologyDescription, std::make_shared(serverDescAddress), false); - checkIfHasPrimary(topologyDescription, serverDescription); - return; - } - modifyMaxElectionId(topologyDescription, *serverDescription->getElectionId()); + const ElectionIdSetVersionPair incomingElectionIdSetVersion = + serverDescription->getElectionIdSetVersionPair(); + const ElectionIdSetVersionPair currentMaxElectionIdSetVersion = + topologyDescription.getMaxElectionIdSetVersionPair(); + + if (incomingElectionIdSetVersion < currentMaxElectionIdSetVersion) { + LOGV2(5940901, + "Stale primary detected, marking its state as unknown", + "primary"_attr = serverDescription->getAddress(), + "incomingElectionIdSetVersion"_attr = incomingElectionIdSetVersion, + "currentMaxElectionIdSetVersion"_attr = currentMaxElectionIdSetVersion); + installServerDescription( + topologyDescription, std::make_shared(serverDescAddress), false); + checkIfHasPrimary(topologyDescription, serverDescription); + return; } - if (serverDescSetVersion && - (!topologyMaxSetVersion || (serverDescSetVersion > topologyMaxSetVersion))) { - modifyMaxSetVersion(topologyDescription, *serverDescSetVersion); - } + topologyDescription.updateMaxElectionIdSetVersionPair(incomingElectionIdSetVersion); auto oldPrimaries = topologyDescription.findServers( [serverDescAddress](const ServerDescriptionPtr& description) { @@ -403,14 +402,4 @@ void TopologyStateMachine::installServerDescription(TopologyDescription& topolog bool newServer) { topologyDescription.installServerDescription(newServerDescription); } - -void TopologyStateMachine::modifyMaxElectionId(TopologyDescription& topologyDescription, - const OID& newMaxElectionId) { - topologyDescription._maxElectionId = newMaxElectionId; -} - -void TopologyStateMachine::modifyMaxSetVersion(TopologyDescription& topologyDescription, - int& newMaxSetVersion) { - topologyDescription._maxSetVersion = newMaxSetVersion; -} } // namespace mongo::sdam diff --git a/src/mongo/client/sdam/topology_state_machine.h b/src/mongo/client/sdam/topology_state_machine.h index c946de9c0e3..158377c3346 100644 --- a/src/mongo/client/sdam/topology_state_machine.h +++ b/src/mongo/client/sdam/topology_state_machine.h @@ -95,9 +95,6 @@ private: void modifySetName(TopologyDescription& topologyDescription, const boost::optional& setName); - void modifyMaxElectionId(TopologyDescription& topologyDescription, const OID& newMaxElectionId); - void modifyMaxSetVersion(TopologyDescription& topologyDescription, int& newMaxSetVersion); - StateTransitionTable _stt; SdamConfiguration _config; }; diff --git a/src/mongo/client/streamable_replica_set_monitor.cpp b/src/mongo/client/streamable_replica_set_monitor.cpp index 86d08e595db..a3ac65a4f8d 100644 --- a/src/mongo/client/streamable_replica_set_monitor.cpp +++ b/src/mongo/client/streamable_replica_set_monitor.cpp @@ -727,6 +727,21 @@ void StreamableReplicaSetMonitor::onTopologyDescriptionChangedEvent( } } } + + if (auto previousMaxElectionIdSetVersionPair = + previousDescription->getMaxElectionIdSetVersionPair(), + newMaxElectionIdSetVersionPair = newDescription->getMaxElectionIdSetVersionPair(); + setVersionWentBackwards(previousMaxElectionIdSetVersionPair, + newMaxElectionIdSetVersionPair)) { + // The previous primary was unable to reach consensus for the config with + // higher version and it was abandoned after failover. + LOGV2(5940902, + "Max known Set version coming from new primary forces to rollback it backwards", + "replicaSet"_attr = getName(), + "newElectionIdSetVersion"_attr = newMaxElectionIdSetVersionPair.setVersion, + "previousMaxElectionIdSetVersion"_attr = + previousMaxElectionIdSetVersionPair.setVersion); + } } void StreamableReplicaSetMonitor::onServerHeartbeatSucceededEvent(const HostAndPort& hostAndPort, diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index f49c0d0d4d2..edd4a533241 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -142,6 +142,12 @@ MONGO_FAIL_POINT_DEFINE(doNotRemoveNewlyAddedOnHeartbeats); MONGO_FAIL_POINT_DEFINE(hangDuringAutomaticReconfig); // Make reconfig command hang before validating new config. MONGO_FAIL_POINT_DEFINE(ReconfigHangBeforeConfigValidationCheck); +// Blocks after reconfig runs. +MONGO_FAIL_POINT_DEFINE(hangAfterReconfig); +// Allows skipping fetching the config from ping sender. +MONGO_FAIL_POINT_DEFINE(skipBeforeFetchingConfig); +// Hang after grabbing the RSTL but before we start rejecting writes. +MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL); // Number of times we tried to go live as a secondary. Counter64 attemptsToBecomeSecondary; @@ -3689,6 +3695,11 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx, configStateGuard.dismiss(); _finishReplSetReconfig(opCtx, newConfig, force, myIndex); + if (MONGO_unlikely(hangAfterReconfig.shouldFail())) { + LOGV2(5940904, "Hanging after reconfig on fail point"); + hangAfterReconfig.pauseWhileSet(); + } + return Status::OK(); } @@ -5461,6 +5472,29 @@ bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal_inlock() c return _rsConfig.getWriteConcernMajorityShouldJournal(); } +namespace { +// Fail point to block and optionally skip fetching config. Supported arguments: +// versionAndTerm: [ v, t ] +void _handleBeforeFetchingConfig(const BSONObj& customArgs, + ConfigVersionAndTerm versionAndTerm, + bool* skipFetchingConfig) { + if (customArgs.hasElement("versionAndTerm")) { + const auto nested = customArgs["versionAndTerm"].embeddedObject(); + std::vector elements; + nested.elems(elements); + invariant(elements.size() == 2); + ConfigVersionAndTerm patternVersionAndTerm = + ConfigVersionAndTerm(elements[0].numberInt(), elements[1].numberInt()); + if (patternVersionAndTerm == versionAndTerm) { + LOGV2(5940905, + "Failpoint is activated to skip fetching config for version and term", + "versionAndTerm"_attr = versionAndTerm); + *skipFetchingConfig = true; + } + } +} +} // namespace + Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args, ReplSetHeartbeatResponse* response) { { @@ -5514,8 +5548,16 @@ Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgs // We cannot cancel the enqueued heartbeat, but either this one or the enqueued heartbeat // will trigger reconfig, which cancels and reschedules all heartbeats. else if (args.hasSender()) { - LOGV2(21401, "Scheduling heartbeat to fetch a newer config", attr); - _scheduleHeartbeatToTarget_inlock(senderHost, now); + bool inTestSkipFetchingConfig = false; + skipBeforeFetchingConfig.execute([&](const BSONObj& customArgs) { + _handleBeforeFetchingConfig( + customArgs, args.getConfigVersionAndTerm(), &inTestSkipFetchingConfig); + }); + + if (!inTestSkipFetchingConfig) { + LOGV2(21401, "Scheduling heartbeat to fetch a newer config", attr); + _scheduleHeartbeatToTarget_inlock(senderHost, now); + } } } else if (result.isOK() && args.getPrimaryId() >= 0 && (!response->hasPrimaryId() || response->getPrimaryId() != args.getPrimaryId())) { -- cgit v1.2.1