summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2020-02-24 01:22:36 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-29 00:02:40 +0000
commit6c0645af5999ef52579c04f1b6fff422bd89da3c (patch)
treed8ce20701fef24f7ed9f2691e4972b792487f53e
parent2d8f351c2640f402027a2fc0c51628db8d4f85b1 (diff)
downloadmongo-6c0645af5999ef52579c04f1b6fff422bd89da3c.tar.gz
SERVER-46288 Reconfig in 4.2 style with the current config on FCV downgrade
- Refactoring doReplSetConfig to accept a callback. - Use replset tag system to wait for config replication. - Revert kConfigMajority write concern. - Include all nodes including arbiters in config replication quorum. - Reconfig with term -1 using the current config on downgrade.
-rw-r--r--jstests/multiVersion/genericSetFCVUsage/set_feature_compatibility_version.js3
-rw-r--r--jstests/multiVersion/remove_config_term_on_replset_downgrade.js8
-rw-r--r--jstests/replsets/awaitable_ismaster_fcv_change.js29
-rw-r--r--src/mongo/db/commands/feature_compatibility_version.cpp11
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp46
-rw-r--r--src/mongo/db/repl/member_config.cpp7
-rw-r--r--src/mongo/db/repl/member_config.h1
-rw-r--r--src/mongo/db/repl/member_config_test.cpp8
-rw-r--r--src/mongo/db/repl/repl_set_config.cpp28
-rw-r--r--src/mongo/db/repl/repl_set_config.h16
-rw-r--r--src/mongo/db/repl/repl_set_config_checks.cpp18
-rw-r--r--src/mongo/db/repl/repl_set_config_checks_test.cpp10
-rw-r--r--src/mongo/db/repl/replication_coordinator.h9
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp174
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.h4
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp58
-rw-r--r--src/mongo/db/repl/topology_coordinator.h28
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp172
-rw-r--r--src/mongo/db/write_concern_options.cpp1
-rw-r--r--src/mongo/db/write_concern_options.h9
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.cpp6
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.h4
-rw-r--r--src/mongo/shell/replsettest.js4
27 files changed, 427 insertions, 247 deletions
diff --git a/jstests/multiVersion/genericSetFCVUsage/set_feature_compatibility_version.js b/jstests/multiVersion/genericSetFCVUsage/set_feature_compatibility_version.js
index d7a1c8853f0..02e6fbb9d40 100644
--- a/jstests/multiVersion/genericSetFCVUsage/set_feature_compatibility_version.js
+++ b/jstests/multiVersion/genericSetFCVUsage/set_feature_compatibility_version.js
@@ -309,8 +309,9 @@ assert.commandFailed(
st.rs0.getPrimary().discardMessagesFrom(st.configRS.getPrimary(), 1.0);
jsTestLog(
"EXPECTED TO FAIL: setFeatureCompatibilityVersion cannot be set because the shard primary is not reachable");
+// Downgrading from 4.4 needs to wait for all nodes to reconfig, so we specify a higher timeout.
assert.commandFailed(
- mongosAdminDB.runCommand({setFeatureCompatibilityVersion: lastStableFCV, maxTimeMS: 1000}));
+ mongosAdminDB.runCommand({setFeatureCompatibilityVersion: lastStableFCV, maxTimeMS: 10000}));
checkFCV(configPrimaryAdminDB, lastStableFCV, lastStableFCV /* indicates downgrade in progress */);
st.rs0.getPrimary().discardMessagesFrom(st.configRS.getPrimary(), 0.0);
diff --git a/jstests/multiVersion/remove_config_term_on_replset_downgrade.js b/jstests/multiVersion/remove_config_term_on_replset_downgrade.js
index 4315fdcc506..782c4e24fd8 100644
--- a/jstests/multiVersion/remove_config_term_on_replset_downgrade.js
+++ b/jstests/multiVersion/remove_config_term_on_replset_downgrade.js
@@ -52,7 +52,11 @@ rst.nodes.forEach(function(node) {
jsTestLog("Checking the config term on node " + tojson(node.host) + " after FCV downgrade.");
let config = node.getDB("local").getCollection("system.replset").findOne();
assert(!config.hasOwnProperty("term"), tojson(config));
+ // The configs can only differ in config versions and terms.
config.term = configInNewVersion.term;
+ // The versions differ because of the force reconfig on downgrade.
+ assert.eq(config.version, configInNewVersion.version + 1);
+ config.version = configInNewVersion.version;
assert.docEq(configInNewVersion, config);
});
@@ -66,7 +70,11 @@ rst.nodes.forEach(function(node) {
jsTestLog("Checking the config term on node " + tojson(node.host) + " after binary downgrade.");
let config = node.getDB("local").getCollection("system.replset").findOne();
assert(!config.hasOwnProperty("term"), tojson(config));
+ // The configs can only differ in config versions and terms.
config.term = configInNewVersion.term;
+ // The versions differ because of the force reconfig on downgrade.
+ assert.eq(config.version, configInNewVersion.version + 1);
+ config.version = configInNewVersion.version;
assert.docEq(configInNewVersion, config);
});
rst.stopSet();
diff --git a/jstests/replsets/awaitable_ismaster_fcv_change.js b/jstests/replsets/awaitable_ismaster_fcv_change.js
index 872e9ff33e8..5099acf462d 100644
--- a/jstests/replsets/awaitable_ismaster_fcv_change.js
+++ b/jstests/replsets/awaitable_ismaster_fcv_change.js
@@ -40,7 +40,7 @@ assert(secondaryTopologyVersion.hasOwnProperty("counter"), tojson(secondaryTopol
function runAwaitableIsMasterBeforeFCVChange(
topologyVersionField, isUpgrade, isPrimary, prevMinWireVersion, serverMaxWireVersion) {
db.getMongo().setSlaveOk();
- const firstResponse = assert.commandWorked(db.runCommand({
+ let response = assert.commandWorked(db.runCommand({
isMaster: 1,
topologyVersion: topologyVersionField,
maxAwaitTimeMS: 99999999,
@@ -48,26 +48,37 @@ function runAwaitableIsMasterBeforeFCVChange(
{minWireVersion: NumberInt(0), maxWireVersion: NumberInt(serverMaxWireVersion)},
}));
+ // On downgrade from 4.4 to 4.2, the primary will reconfig the replset and signal isMaster.
+ if (prevMinWireVersion === response.minWireVersion) {
+ jsTestLog("Min wire version didn't change: " + prevMinWireVersion + ". Retry isMaster.");
+ topologyVersionField = response.topologyVersion;
+ response = assert.commandWorked(db.runCommand({
+ isMaster: 1,
+ topologyVersion: topologyVersionField,
+ maxAwaitTimeMS: 99999999,
+ internalClient:
+ {minWireVersion: NumberInt(0), maxWireVersion: NumberInt(serverMaxWireVersion)},
+ }));
+ }
// We only expect to increment the server TopologyVersion when the minWireVersion has changed.
// This can only happen in two scenarios:
// 1. Setting featureCompatibilityVersion from downgrading to fullyDowngraded.
// 2. Setting featureCompatibilityVersion from fullyDowngraded to upgrading.
- assert.eq(
- topologyVersionField.counter + 1, firstResponse.topologyVersion.counter, firstResponse);
+ assert.eq(topologyVersionField.counter + 1, response.topologyVersion.counter, response);
const expectedIsMasterValue = isPrimary;
const expectedSecondaryValue = !isPrimary;
- assert.eq(expectedIsMasterValue, firstResponse.ismaster, firstResponse);
- assert.eq(expectedSecondaryValue, firstResponse.secondary, firstResponse);
+ assert.eq(expectedIsMasterValue, response.ismaster, response);
+ assert.eq(expectedSecondaryValue, response.secondary, response);
- const minWireVersion = firstResponse.minWireVersion;
- const maxWireVersion = firstResponse.maxWireVersion;
+ const minWireVersion = response.minWireVersion;
+ const maxWireVersion = response.maxWireVersion;
assert.neq(prevMinWireVersion, minWireVersion);
if (isUpgrade) {
// minWireVersion should always equal maxWireVersion if we have not fully downgraded FCV.
- assert.eq(minWireVersion, maxWireVersion, firstResponse);
+ assert.eq(minWireVersion, maxWireVersion, response);
} else {
- assert.eq(minWireVersion + 1, maxWireVersion, firstResponse);
+ assert.eq(minWireVersion + 1, maxWireVersion, response);
}
}
diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp
index cffd5882666..1e06fc65e88 100644
--- a/src/mongo/db/commands/feature_compatibility_version.cpp
+++ b/src/mongo/db/commands/feature_compatibility_version.cpp
@@ -165,17 +165,6 @@ void FeatureCompatibilityVersion::onInsertOrUpdate(OperationContext* opCtx, cons
FeatureCompatibilityVersionParser::toString(newVersion));
}
- // Remove term field of config document on downgrade.
- if (newVersion == ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo42 &&
- repl::ReplicationCoordinator::get(opCtx)->getReplicationMode() ==
- repl::ReplicationCoordinator::modeReplSet) {
- auto storageInterface = repl::StorageInterface::get(opCtx);
- repl::UnreplicatedWritesBlock uwb(opCtx);
- repl::TimestampedBSONObj update{BSON("$unset" << BSON("term" << 1)), Timestamp()};
- uassertStatusOK(storageInterface->updateSingleton(
- opCtx, NamespaceString::kSystemReplSetNamespace, {}, update));
- }
-
opCtx->recoveryUnit()->onCommit([opCtx, newVersion](boost::optional<Timestamp>) {
serverGlobalParams.featureCompatibility.setVersion(newVersion);
updateMinWireVersion();
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index b8794e24eb2..f35c67c5321 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -161,8 +161,9 @@ public:
auto waitForWCStatus = waitForWriteConcern(
opCtx,
repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(),
- WriteConcernOptions(
- WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, timeout),
+ WriteConcernOptions(repl::ReplSetConfig::kMajorityWriteConcernModeName,
+ WriteConcernOptions::SyncMode::UNSET,
+ timeout),
&res);
CommandHelpers::appendCommandWCStatus(result, waitForWCStatus, res);
});
@@ -305,6 +306,44 @@ public:
FeatureCompatibilityVersion::setTargetDowngrade(opCtx);
+ // Safe reconfig introduces a new "term" field in the config document. If the user tries
+ // to downgrade the replset to FCV42, the primary will initiate a reconfig without the
+ // term and wait for it to be replicated on all nodes.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const bool isReplSet =
+ replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
+ if (isReplSet &&
+ replCoord->getConfig().getConfigTerm() != repl::OpTime::kUninitializedTerm) {
+ // Force reconfig with term -1 to remove the 4.2 incompatible "term" field.
+ auto getNewConfig = [&](const repl::ReplSetConfig& oldConfig, long long term) {
+ auto newConfig = oldConfig;
+ newConfig.setConfigTerm(repl::OpTime::kUninitializedTerm);
+ newConfig.setConfigVersion(newConfig.getConfigVersion() + 1);
+ return newConfig;
+ };
+
+ // "force" reconfig in order to skip safety checks. This is safe since the content
+ // of config is the same.
+ LOGV2(4628800, "Downgrading replica set config.");
+ auto status = replCoord->doReplSetReconfig(opCtx, getNewConfig, true /* force */);
+ uassertStatusOKWithContext(status, "Failed to downgrade the replica set config");
+
+ LOGV2(4628801,
+ "Waiting for the downgraded replica set config to propagate to all nodes");
+ // If a write concern is given, we'll use its wTimeout. It's kNoTimeout by default.
+ WriteConcernOptions writeConcern(repl::ReplSetConfig::kConfigAllWriteConcernName,
+ WriteConcernOptions::SyncMode::NONE,
+ opCtx->getWriteConcern().wTimeout);
+ writeConcern.checkCondition = WriteConcernOptions::CheckCondition::Config;
+ repl::OpTime fakeOpTime(Timestamp(1, 1), replCoord->getTerm());
+ uassertStatusOKWithContext(
+ replCoord->awaitReplication(opCtx, fakeOpTime, writeConcern).status,
+ "Failed to wait for the downgraded replica set config to propagate to all "
+ "nodes");
+ LOGV2(4628802,
+ "The downgraded replica set config has been propagated to all nodes");
+ }
+
{
// Take the global lock in S mode to create a barrier for operations taking the
// global IX or X locks. This ensures that either
@@ -319,9 +358,6 @@ public:
if (failDowngrading.shouldFail())
return false;
- const bool isReplSet = repl::ReplicationCoordinator::get(opCtx)->getReplicationMode() ==
- repl::ReplicationCoordinator::modeReplSet;
-
if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
LOGV2(20502, "Downgrade: dropping config.rangeDeletions collection");
migrationutil::dropRangeDeletionsCollection(opCtx);
diff --git a/src/mongo/db/repl/member_config.cpp b/src/mongo/db/repl/member_config.cpp
index 5ab43763c8c..2a9e9f0c16c 100644
--- a/src/mongo/db/repl/member_config.cpp
+++ b/src/mongo/db/repl/member_config.cpp
@@ -54,6 +54,7 @@ const std::string MemberConfig::kHorizonsFieldName = "horizons";
const std::string MemberConfig::kInternalVoterTagName = "$voter";
const std::string MemberConfig::kInternalElectableTagName = "$electable";
const std::string MemberConfig::kInternalAllTagName = "$all";
+const std::string MemberConfig::kConfigAllTagName = "$configAll";
namespace {
const std::string kLegalMemberConfigFieldNames[] = {MemberConfig::kIdFieldName,
@@ -217,6 +218,9 @@ MemberConfig::MemberConfig(const BSONObj& mcfg, ReplSetTagConfig* tagConfig) {
if (!_arbiterOnly) {
_tags.push_back(tagConfig->makeTag(kInternalAllTagName, id));
}
+
+ // Add a tag for every node, including arbiters.
+ _tags.push_back(tagConfig->makeTag(kConfigAllTagName, id));
}
Status MemberConfig::validate() const {
@@ -231,7 +235,8 @@ Status MemberConfig::validate() const {
<< " but must be 0 or 1");
}
if (_arbiterOnly) {
- if (!_tags.empty()) {
+ // Arbiters only have one internal tag.
+ if (_tags.size() != 1) {
return Status(ErrorCodes::BadValue, "Cannot set tags on arbiters.");
}
if (!isVoter()) {
diff --git a/src/mongo/db/repl/member_config.h b/src/mongo/db/repl/member_config.h
index 29b6e95232a..1994bacd753 100644
--- a/src/mongo/db/repl/member_config.h
+++ b/src/mongo/db/repl/member_config.h
@@ -66,6 +66,7 @@ public:
static const std::string kInternalVoterTagName;
static const std::string kInternalElectableTagName;
static const std::string kInternalAllTagName;
+ static const std::string kConfigAllTagName;
/**
* Construct a MemberConfig from the contents of "mcfg".
diff --git a/src/mongo/db/repl/member_config_test.cpp b/src/mongo/db/repl/member_config_test.cpp
index 30941d51544..74adeb64776 100644
--- a/src/mongo/db/repl/member_config_test.cpp
+++ b/src/mongo/db/repl/member_config_test.cpp
@@ -52,7 +52,7 @@ TEST(MemberConfig, ParseMinimalMemberConfigAndCheckDefaults) {
ASSERT_FALSE(mc.isHidden());
ASSERT_FALSE(mc.isArbiter());
ASSERT_TRUE(mc.shouldBuildIndexes());
- ASSERT_EQUALS(3U, mc.getNumTags());
+ ASSERT_EQUALS(4U, mc.getNumTags());
ASSERT_OK(mc.validate());
}
@@ -289,14 +289,16 @@ TEST(MemberConfig, ParseTags) {
<< "k2"
<< "v2")),
&tagConfig);
- ASSERT_EQUALS(5U, mc.getNumTags());
- ASSERT_EQUALS(5, std::distance(mc.tagsBegin(), mc.tagsEnd()));
+ ASSERT_EQUALS(6U, mc.getNumTags());
+ ASSERT_EQUALS(6, std::distance(mc.tagsBegin(), mc.tagsEnd()));
ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k1", "v1")));
ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k2", "v2")));
ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$voter", "0")));
ASSERT_EQUALS(1,
std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$electable", "0")));
ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$all", "0")));
+ ASSERT_EQUALS(1,
+ std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$configAll", "0")));
}
TEST(MemberConfig, ParseHorizonFields) {
diff --git a/src/mongo/db/repl/repl_set_config.cpp b/src/mongo/db/repl/repl_set_config.cpp
index a19bb6d2b5a..23add8494e8 100644
--- a/src/mongo/db/repl/repl_set_config.cpp
+++ b/src/mongo/db/repl/repl_set_config.cpp
@@ -55,7 +55,6 @@ const Milliseconds ReplSetConfig::kCatchUpTakeoverDisabled(-1);
const std::string ReplSetConfig::kConfigServerFieldName = "configsvr";
const std::string ReplSetConfig::kVersionFieldName = "version";
const std::string ReplSetConfig::kTermFieldName = "term";
-const std::string ReplSetConfig::kMajorityWriteConcernModeName = "$majority";
const Milliseconds ReplSetConfig::kDefaultHeartbeatInterval(2000);
const Seconds ReplSetConfig::kDefaultHeartbeatTimeoutPeriod(10);
const Milliseconds ReplSetConfig::kDefaultElectionTimeoutPeriod(10000);
@@ -686,8 +685,7 @@ Status ReplSetConfig::validate() const {
Status ReplSetConfig::checkIfWriteConcernCanBeSatisfied(
const WriteConcernOptions& writeConcern) const {
- if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority &&
- writeConcern.wMode != WriteConcernOptions::kConfigMajority) {
+ if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) {
StatusWith<ReplSetTagPattern> tagPatternStatus = findCustomWriteMode(writeConcern.wMode);
if (!tagPatternStatus.isOK()) {
return tagPatternStatus.getStatus();
@@ -841,6 +839,30 @@ void ReplSetConfig::_addInternalWriteConcernModes() {
// other errors are unexpected
fassert(28694, status);
}
+
+ // $majorityConfig: the majority of all members including arbiters.
+ pattern = _tagConfig.makePattern();
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kConfigAllTagName, _members.size() / 2 + 1);
+ if (status.isOK()) {
+ _customWriteConcernModes[kConfigMajorityWriteConcernModeName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $configAll-tagged nodes in this config;
+ // other errors are unexpected
+ fassert(31472, status);
+ }
+
+ // $configAll: all members including arbiters.
+ pattern = _tagConfig.makePattern();
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kConfigAllTagName, _members.size());
+ if (status.isOK()) {
+ _customWriteConcernModes[kConfigAllWriteConcernName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $all-tagged nodes in this config;
+ // other errors are unexpected
+ fassert(31473, status);
+ }
}
void ReplSetConfig::_initializeConnectionString() {
diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h
index 9f52c69e9d6..7f188555dc6 100644
--- a/src/mongo/db/repl/repl_set_config.h
+++ b/src/mongo/db/repl/repl_set_config.h
@@ -117,7 +117,9 @@ public:
static const std::string kConfigServerFieldName;
static const std::string kVersionFieldName;
static const std::string kTermFieldName;
- static const std::string kMajorityWriteConcernModeName;
+ static constexpr char kMajorityWriteConcernModeName[] = "$majority";
+ static constexpr char kConfigMajorityWriteConcernModeName[] = "$configMajority";
+ static constexpr char kConfigAllWriteConcernName[] = "$configAll";
// If this field is present, a repair operation potentially modified replicated data. This
// should never be included in a valid configuration document.
@@ -181,7 +183,7 @@ public:
Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
/**
- * Gets the version of this configuration.
+ * Gets and sets the version of this configuration.
*
* The version number sequences configurations of the replica set, so that
* nodes may distinguish between "older" and "newer" configurations.
@@ -190,8 +192,12 @@ public:
return _version;
}
+ void setConfigVersion(long long version) {
+ _version = version;
+ }
+
/**
- * Gets the term of this configuration.
+ * Gets and sets the term of this configuration.
*
* The configuration term is the term of the primary that originally created this configuration.
* Configurations in a replica set are totally ordered by their term and configuration version.
@@ -200,6 +206,10 @@ public:
return _term;
}
+ void setConfigTerm(long long term) {
+ _term = term;
+ }
+
/**
* Gets the (version, term) pair of this configuration.
*/
diff --git a/src/mongo/db/repl/repl_set_config_checks.cpp b/src/mongo/db/repl/repl_set_config_checks.cpp
index 19648e8319a..9c7deaad980 100644
--- a/src/mongo/db/repl/repl_set_config_checks.cpp
+++ b/src/mongo/db/repl/repl_set_config_checks.cpp
@@ -195,19 +195,23 @@ Status validateSingleNodeChange(const ReplSetConfig& oldConfig, const ReplSetCon
* primary under "oldConfig" and is electable under "newConfig". Such checks that
* require knowledge of which node is executing the configuration are out of scope
* for this function.
+ *
+ * When "force" is true, skips config version check, since the version is guaranteed
+ * to be valid either by "force" reconfig command or by internal use.
*/
Status validateOldAndNewConfigsCompatible(const ReplSetConfig& oldConfig,
const ReplSetConfig& newConfig) {
invariant(newConfig.isInitialized());
invariant(oldConfig.isInitialized());
- if (oldConfig.getConfigVersion() >= newConfig.getConfigVersion()) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream()
- << "New replica set configuration version must be greater than old, but "
- << newConfig.getConfigVersion() << " is not greater than "
- << oldConfig.getConfigVersion() << " for replica set "
- << newConfig.getReplSetName());
+ if (oldConfig.getConfigVersionAndTerm() >= newConfig.getConfigVersionAndTerm()) {
+ return Status(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New replica set configuration version and term must be greater than old, but "
+ << newConfig.getConfigVersionAndTerm().toString() << " is not greater than "
+ << oldConfig.getConfigVersionAndTerm().toString() << " for replica set "
+ << newConfig.getReplSetName());
}
if (oldConfig.getReplSetName() != newConfig.getReplSetName()) {
diff --git a/src/mongo/db/repl/repl_set_config_checks_test.cpp b/src/mongo/db/repl/repl_set_config_checks_test.cpp
index d48383c767a..22c17222c11 100644
--- a/src/mongo/db/repl/repl_set_config_checks_test.cpp
+++ b/src/mongo/db/repl/repl_set_config_checks_test.cpp
@@ -244,22 +244,12 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigVersionNumberMustB
validateConfigForReconfig(
&externalState, oldConfig, oldConfig, getGlobalServiceContext(), false)
.getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(
- &externalState, oldConfig, oldConfig, getGlobalServiceContext(), true)
- .getStatus());
// Cannot reconfig from new to old (versions must increase).
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
validateConfigForReconfig(
&externalState, newConfig, oldConfig, getGlobalServiceContext(), false)
.getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(
- &externalState, newConfig, oldConfig, getGlobalServiceContext(), true)
- .getStatus());
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotChangeSetName) {
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 9984d3803f3..6d6d1f37c88 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -688,6 +688,15 @@ public:
const ReplSetReconfigArgs& args,
BSONObjBuilder* resultObj) = 0;
+ /**
+ * Install the new config returned by the callback "getNewConfig".
+ */
+ using GetNewConfigFn = std::function<StatusWith<ReplSetConfig>(const ReplSetConfig& oldConfig,
+ long long currentTerm)>;
+ virtual Status doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) = 0;
+
/*
* Handles an incoming replSetInitiate command. If "configObj" is empty, generates a default
* configuration to use.
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index ee2a8bd820a..2531d08a5df 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -186,26 +186,6 @@ void lockAndCall(stdx::unique_lock<Latch>* lk, const std::function<void()>& fn)
fn();
}
-/**
- * Implements the force-reconfig behavior of incrementing config version by a large random
- * number.
- */
-BSONObj incrementConfigVersionByRandom(BSONObj config) {
- BSONObjBuilder builder;
- SecureRandom generator;
- for (BSONObjIterator iter(config); iter.more(); iter.next()) {
- BSONElement elem = *iter;
- if (elem.fieldNameStringData() == ReplSetConfig::kVersionFieldName && elem.isNumber()) {
- const int random = generator.nextInt32(100'000);
- builder.appendIntOrLL(ReplSetConfig::kVersionFieldName,
- elem.numberLong() + 10'000 + random);
- } else {
- builder.append(elem);
- }
- }
- return builder.obj();
-}
-
template <typename T>
StatusOrStatusWith<T> futureGetNoThrowWithDeadline(OperationContext* opCtx,
SharedSemiFuture<T>& f,
@@ -1695,22 +1675,11 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
// The syncMode cannot be unset.
invariant(writeConcern.syncMode != WriteConcernOptions::SyncMode::UNSET);
- // When waiting for the config to be replicated to a majority, we do not wait on a specific
- // OpTime. We specifically populate waiters with a null OpTime.
- invariant(writeConcern.wMode != WriteConcernOptions::kConfigMajority || opTime.isNull());
-
const bool useDurableOpTime = writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL;
-
if (writeConcern.wMode.empty()) {
return _topCoord->haveNumNodesReachedOpTime(
opTime, writeConcern.wNumNodes, useDurableOpTime);
}
-
- // Check that the nodes in the new config have replicated the config.
- if (writeConcern.wMode == WriteConcernOptions::kConfigMajority) {
- return _topCoord->haveMajorityReplicatedConfig();
- }
-
StringData patternName;
if (writeConcern.wMode == WriteConcernOptions::kMajority) {
if (_externalState->snapshotsEnabled() && !gTestingSnapshotBehaviorInIsolation) {
@@ -1759,9 +1728,14 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
} else {
patternName = writeConcern.wMode;
}
-
auto tagPattern = uassertStatusOK(_rsConfig.findCustomWriteMode(patternName));
- return _topCoord->haveTaggedNodesReachedOpTime(opTime, tagPattern, useDurableOpTime);
+ if (writeConcern.checkCondition == WriteConcernOptions::CheckCondition::OpTime) {
+ return _topCoord->haveTaggedNodesReachedOpTime(opTime, tagPattern, useDurableOpTime);
+ } else {
+ invariant(writeConcern.checkCondition == WriteConcernOptions::CheckCondition::Config);
+ auto pred = _topCoord->makeConfigPredicate();
+ return _topCoord->haveTaggedNodesSatisfiedCondition(pred, tagPattern);
+ }
}
ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
@@ -1839,22 +1813,16 @@ BSONObj ReplicationCoordinatorImpl::_getReplicationProgress(WithLock wl) const {
SharedSemiFuture<void> ReplicationCoordinatorImpl::_startWaitingForReplication(
WithLock wl, const OpTime& opTime, const WriteConcernOptions& writeConcern) {
- // When waiting for the config to be replicated to a majority, we do not wait on a specific
- // OpTime. We specifically populate waiters with a null OpTime.
- invariant(writeConcern.wMode != WriteConcernOptions::kConfigMajority || opTime.isNull());
const Mode replMode = getReplicationMode();
if (replMode == modeNone) {
// no replication check needed (validated above)
return Future<void>::makeReady();
}
-
- if (opTime.isNull() && writeConcern.wMode != WriteConcernOptions::kConfigMajority) {
- // If waiting for the empty optime, always say it's been replicated unless we're trying to
- // replicate a new config.
+ if (opTime.isNull()) {
+ // If waiting for the empty optime, always say it's been replicated.
return Future<void>::makeReady();
}
-
if (_inShutdown) {
return Future<void>::makeReady(
Status{ErrorCodes::ShutdownInProgress, "Replication is being shut down"});
@@ -1866,11 +1834,7 @@ SharedSemiFuture<void> ReplicationCoordinatorImpl::_startWaitingForReplication(
"Primary stepped down while waiting for replication"};
}
- // Checking for a config majority does not rely on waiting for a specific OpTime. So, we
- // pass a null OpTime to awaitReplication, which means the term here will always be -1.
- // Make sure we don't unnecessarily step down in this case.
- if (writeConcern.wMode != WriteConcernOptions::kConfigMajority &&
- opTime.getTerm() != _topCoord->getTerm()) {
+ if (opTime.getTerm() != _topCoord->getTerm()) {
return {
ErrorCodes::PrimarySteppedDown,
str::stream() << "Term changed from " << opTime.getTerm() << " to "
@@ -2897,6 +2861,51 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
"replSetReconfig admin command received from client; new config: {newConfig}",
"newConfig"_attr = args.newConfigObj);
+ auto getNewConfig = [&](const ReplSetConfig& oldConfig,
+ long long currentTerm) -> StatusWith<ReplSetConfig> {
+ ReplSetConfig newConfig;
+
+ // Only explicitly set configTerm for reconfig to this node's term if we're in FCV 4.4.
+ // Otherwise, use -1.
+ auto isUpgraded = serverGlobalParams.featureCompatibility.isVersion(
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44);
+ auto term = (!args.force && isUpgraded) ? currentTerm : OpTime::kUninitializedTerm;
+
+ // When initializing a new config through the replSetReconfig command, ignore the term
+ // field passed in through its args. Instead, use this node's term.
+ Status status = newConfig.initialize(args.newConfigObj, term, oldConfig.getReplicaSetId());
+ if (!status.isOK()) {
+ LOGV2_ERROR(21418,
+ "replSetReconfig got {status} while parsing {newConfigObj}",
+ "status"_attr = status,
+ "newConfigObj"_attr = args.newConfigObj);
+ return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
+ }
+ if (newConfig.getReplSetName() != _settings.ourSetName()) {
+ str::stream errmsg;
+ errmsg << "Attempting to reconfigure a replica set with name "
+ << newConfig.getReplSetName() << ", but command line reports "
+ << _settings.ourSetName() << "; rejecting";
+ LOGV2_ERROR(
+ 21419, "{std_string_errmsg}", "std_string_errmsg"_attr = std::string(errmsg));
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
+
+ // Increase the config version for force reconfig.
+ if (args.force) {
+ auto version = std::max(oldConfig.getConfigVersion(), newConfig.getConfigVersion());
+ version += 10'000 + SecureRandom().nextInt32(100'000);
+ newConfig.setConfigVersion(version);
+ }
+ return newConfig;
+ };
+
+ return doReplSetReconfig(opCtx, getNewConfig, args.force);
+}
+
+Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) {
stdx::unique_lock<Latch> lk(_mutex);
while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
@@ -2927,24 +2936,26 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
invariant(_rsConfig.isInitialized());
- if (!args.force && !_getMemberState_inlock().primary()) {
+ if (!force && !_getMemberState_inlock().primary()) {
return Status(ErrorCodes::NotMaster,
str::stream()
<< "replSetReconfig should only be run on PRIMARY, but my state is "
<< _getMemberState_inlock().toString()
<< "; use the \"force\" argument to override");
}
+ auto topCoordTerm = _topCoord->getTerm();
+
+ WriteConcernOptions configWriteConcern(ReplSetConfig::kConfigMajorityWriteConcernModeName,
+ WriteConcernOptions::SyncMode::NONE,
+ WriteConcernOptions::kNoTimeout);
+ configWriteConcern.checkCondition = WriteConcernOptions::CheckCondition::Config;
+ // Construct a fake OpTime that can be accepted but isn't used.
+ OpTime fakeOpTime(Timestamp(1, 1), topCoordTerm);
if (serverGlobalParams.featureCompatibility.isVersion(
ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44) &&
- !args.force) {
- WriteConcernOptions writeConcern(
- WriteConcernOptions::kConfigMajority,
- WriteConcernOptions::SyncMode::NONE,
- // The timeout isn't used by _doneWaitingForReplication_inlock.
- WriteConcernOptions::kNoTimeout);
-
- if (!_doneWaitingForReplication_inlock(OpTime(), writeConcern)) {
+ !force) {
+ if (!_doneWaitingForReplication_inlock(fakeOpTime, configWriteConcern)) {
return Status(ErrorCodes::ConfigurationInProgress,
str::stream()
<< "Cannot run replSetReconfig because the current config: "
@@ -2986,46 +2997,21 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
makeGuard([&] { lockAndCall(&lk, [=] { _setConfigState_inlock(kConfigSteady); }); });
ReplSetConfig oldConfig = _rsConfig;
- // Only explicitly set configTerm for reconfig to this node's term if we're in FCV 4.4.
- // Otherwise, use -1.
- // Make sure we get the term from the topCoord under the replCoord mutex.
- auto topCoordTerm = serverGlobalParams.featureCompatibility.isVersion(
- ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44)
- ? _topCoord->getTerm()
- : OpTime::kUninitializedTerm;
lk.unlock();
- ReplSetConfig newConfig;
- BSONObj newConfigObj = args.newConfigObj;
- if (args.force) {
- newConfigObj = incrementConfigVersionByRandom(newConfigObj);
- }
+ // Call the callback to get the new config given the old one.
+ auto newConfigStatus = getNewConfig(oldConfig, topCoordTerm);
+ Status status = newConfigStatus.getStatus();
+ if (!status.isOK())
+ return status;
+ ReplSetConfig newConfig = newConfigStatus.getValue();
BSONObj oldConfigObj = oldConfig.toBSON();
+ BSONObj newConfigObj = newConfig.toBSON();
audit::logReplSetReconfig(opCtx->getClient(), &oldConfigObj, &newConfigObj);
- // When initializing a new config through the replSetReconfig command, ignore the term
- // field passed in through its args. Instead, use this node's term.
- // If it is a force reconfig, explicitly set the term to -1.
- auto term = !args.force ? topCoordTerm : OpTime::kUninitializedTerm;
- Status status = newConfig.initialize(newConfigObj, term, oldConfig.getReplicaSetId());
- if (!status.isOK()) {
- LOGV2_ERROR(21418,
- "replSetReconfig got {status} while parsing {newConfigObj}",
- "status"_attr = status,
- "newConfigObj"_attr = newConfigObj);
- return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
- }
- if (newConfig.getReplSetName() != _settings.ourSetName()) {
- str::stream errmsg;
- errmsg << "Attempting to reconfigure a replica set with name " << newConfig.getReplSetName()
- << ", but command line reports " << _settings.ourSetName() << "; rejecting";
- LOGV2_ERROR(21419, "{errmsg}", "errmsg"_attr = std::string(errmsg));
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
StatusWith<int> myIndex = validateConfigForReconfig(
- _externalState.get(), oldConfig, newConfig, opCtx->getServiceContext(), args.force);
+ _externalState.get(), oldConfig, newConfig, opCtx->getServiceContext(), force);
if (!myIndex.isOK()) {
LOGV2_ERROR(21420,
"replSetReconfig got {status} while validating {newConfigObj}",
@@ -3039,7 +3025,7 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
"replSetReconfig config object with {numMembers} members parses ok",
"numMembers"_attr = newConfig.getNumMembers());
- if (!args.force && !MONGO_unlikely(omitConfigQuorumCheck.shouldFail())) {
+ if (!force && !MONGO_unlikely(omitConfigQuorumCheck.shouldFail())) {
status = checkQuorumForReconfig(
_replExecutor.get(), newConfig, myIndex.getValue(), _topCoord->getTerm());
if (!status.isOK()) {
@@ -3058,17 +3044,15 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
}
configStateGuard.dismiss();
- _finishReplSetReconfig(opCtx, newConfig, args.force, myIndex.getValue());
+ _finishReplSetReconfig(opCtx, newConfig, force, myIndex.getValue());
- if (!args.force &&
+ if (!force &&
serverGlobalParams.featureCompatibility.isVersion(
ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44)) {
// Wait for the config document to be replicated to a majority of nodes in the new
// config.
- WriteConcernOptions writeConcern(WriteConcernOptions::kConfigMajority,
- WriteConcernOptions::SyncMode::NONE,
- WriteConcernOptions::kNoTimeout);
- StatusAndDuration configAwaitStatus = awaitReplication(opCtx, OpTime(), writeConcern);
+ StatusAndDuration configAwaitStatus =
+ awaitReplication(opCtx, fakeOpTime, configWriteConcern);
uassertStatusOK(configAwaitStatus.status);
// Now that the new config has been persisted and installed in memory, wait for the latest
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 39be77acb89..562417d99d9 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -241,6 +241,10 @@ public:
const ReplSetReconfigArgs& args,
BSONObjBuilder* resultObj) override;
+ virtual Status doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) override;
+
virtual Status processReplSetInitiate(OperationContext* opCtx,
const BSONObj& configObj,
BSONObjBuilder* resultObj) override;
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index b8ff01aefa4..10189e95718 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -379,6 +379,12 @@ Status ReplicationCoordinatorMock::processReplSetReconfig(OperationContext* opCt
return Status::OK();
}
+Status ReplicationCoordinatorMock::doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) {
+ return Status::OK();
+}
+
Status ReplicationCoordinatorMock::processReplSetInitiate(OperationContext* opCtx,
const BSONObj& configObj,
BSONObjBuilder* resultObj) {
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 80a368d25a2..57f60ece3cd 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -209,6 +209,10 @@ public:
const ReplSetReconfigArgs& args,
BSONObjBuilder* resultObj);
+ virtual Status doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force);
+
virtual Status processReplSetInitiate(OperationContext* opCtx,
const BSONObj& configObj,
BSONObjBuilder* resultObj);
diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp
index 14089bcba5b..1605c42b131 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.cpp
+++ b/src/mongo/db/repl/replication_coordinator_noop.cpp
@@ -321,6 +321,12 @@ Status ReplicationCoordinatorNoOp::processReplSetReconfig(OperationContext*,
MONGO_UNREACHABLE;
}
+Status ReplicationCoordinatorNoOp::doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) {
+ MONGO_UNREACHABLE;
+}
+
Status ReplicationCoordinatorNoOp::processReplSetInitiate(OperationContext*,
const BSONObj&,
BSONObjBuilder*) {
diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h
index 4c63d307335..3bc608f6b54 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.h
+++ b/src/mongo/db/repl/replication_coordinator_noop.h
@@ -183,6 +183,10 @@ public:
const ReplSetReconfigArgs&,
BSONObjBuilder*) final;
+ Status doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) final;
+
Status processReplSetInitiate(OperationContext*, const BSONObj&, BSONObjBuilder*) final;
Status processReplSetUpdatePosition(const UpdatePositionArgs&, long long*) final;
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 3ffbbe4a695..697a2136884 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -945,29 +945,6 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse(
return nextAction;
}
-bool TopologyCoordinator::haveMajorityReplicatedConfig() {
- auto configWriteMaj = _rsConfig.getWriteMajority();
- auto numNodesWithReplicatedConfig = 0;
- for (auto&& memberData : _memberData) {
- // If this member is not in our new config, do not count it in the majority
- if (_rsConfig.findMemberByID(memberData.getMemberId().getData()) == NULL) {
- continue;
- }
-
- if (memberData.getConfigVersionAndTerm() == _rsConfig.getConfigVersionAndTerm()) {
- // configVersionAndTerm comparison will compare config versions alone and ignore terms
- // if the config term is -1, which makes this compatible with 4.2 heartbeat responses.
- numNodesWithReplicatedConfig++;
- }
-
- // Once we know a majority of the nodes have replicated the config return.
- if (numNodesWithReplicatedConfig >= configWriteMaj)
- return true;
- }
-
- return false;
-}
-
bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime,
int numNodes,
bool durablyWritten) {
@@ -1016,16 +993,20 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime,
bool TopologyCoordinator::haveTaggedNodesReachedOpTime(const OpTime& opTime,
const ReplSetTagPattern& tagPattern,
bool durablyWritten) {
- ReplSetTagMatch matcher(tagPattern);
+ auto pred = makeOpTimePredicate(opTime, durablyWritten);
+ return haveTaggedNodesSatisfiedCondition(pred, tagPattern);
+}
+TopologyCoordinator::MemberPredicate TopologyCoordinator::makeOpTimePredicate(const OpTime& opTime,
+ bool durablyWritten) {
// Invariants that we only wait for an OpTime in the term that this node is currently writing
// to. In other words, we do not support waiting for an OpTime written by a previous primary
// because comparing members' lastApplied/lastDurable alone is not sufficient to tell if the
// OpTime has been replicated.
invariant(opTime.getTerm() == getMyLastAppliedOpTime().getTerm());
- for (auto&& memberData : _memberData) {
- const OpTime& memberOpTime =
+ return [=](const MemberData& memberData) {
+ auto memberOpTime =
durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime();
// In addition to checking if a member has a greater/equal timestamp field we also need to
@@ -1034,16 +1015,29 @@ bool TopologyCoordinator::haveTaggedNodesReachedOpTime(const OpTime& opTime,
// thus we do not know if the target OpTime in our previous term has been replicated to the
// member because the memberOpTime in a higher term could correspond to an operation in a
// divergent branch of history regardless of its timestamp.
- if (memberOpTime.getTerm() == opTime.getTerm() &&
- memberOpTime.getTimestamp() >= opTime.getTimestamp()) {
- // This node has reached the desired optime, now we need to check if it is a part
+ return memberOpTime.getTerm() == opTime.getTerm() &&
+ memberOpTime.getTimestamp() >= opTime.getTimestamp();
+ };
+}
+
+TopologyCoordinator::MemberPredicate TopologyCoordinator::makeConfigPredicate() {
+ return [&](const MemberData& memberData) {
+ return memberData.getConfigVersionAndTerm() == _rsConfig.getConfigVersionAndTerm();
+ };
+}
+
+bool TopologyCoordinator::haveTaggedNodesSatisfiedCondition(
+ std::function<bool(const MemberData&)> pred, const ReplSetTagPattern& tagPattern) {
+ ReplSetTagMatch matcher(tagPattern);
+
+ for (auto&& memberData : _memberData) {
+ if (pred(memberData)) {
+ // This node has satisfied the predicate, now we need to check if it is a part
// of the tagPattern.
int memberIndex = memberData.getConfigIndex();
invariant(memberIndex >= 0);
const MemberConfig& memberConfig = _rsConfig.getMemberAt(memberIndex);
- for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
- it != memberConfig.tagsEnd();
- ++it) {
+ for (auto&& it = memberConfig.tagsBegin(); it != memberConfig.tagsEnd(); ++it) {
if (matcher.update(*it)) {
return true;
}
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index f60d9a92044..20c2bbe8cbf 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -413,15 +413,6 @@ public:
const StatusWith<ReplSetHeartbeatResponse>& hbResponse);
/**
- * Returns whether or not at least a majority of voting, data-bearing nodes in the current
- * replica set config have replicated the config.
- * This loops through _memberData and checks if:
- * (1) The member is in the current config
- * (2) The member's config (term, version) match the current config's (term, version)
- */
- bool haveMajorityReplicatedConfig();
-
- /**
* Returns whether or not at least 'numNodes' have reached the given opTime with the same term.
* "durablyWritten" indicates whether the operation has to be durably applied.
*/
@@ -436,6 +427,25 @@ public:
const ReplSetTagPattern& tagPattern,
bool durablyWritten);
+ using MemberPredicate = std::function<bool(const MemberData&)>;
+
+ /**
+ * Return the predicate that tests if a member has reached the target OpTime.
+ */
+ MemberPredicate makeOpTimePredicate(const OpTime& opTime, bool durablyWritten);
+
+ /**
+ * Return the predicate that tests if a member has replicated the given config.
+ */
+ MemberPredicate makeConfigPredicate();
+
+ /**
+ * Returns whether or not at least one node matching the tagPattern has satisfied the given
+ * condition.
+ */
+ bool haveTaggedNodesSatisfiedCondition(MemberPredicate pred,
+ const ReplSetTagPattern& tagPattern);
+
/**
* Returns a vector of members that have applied the operation with OpTime 'op'.
* "durablyWritten" indicates whether the operation has to be durably applied.
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index 8902ba19ea3..5675c52d10e 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -5114,7 +5114,31 @@ TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMe
ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate());
}
-TEST_F(TopoCoordTest, MajorityReplicatedConfigChecks) {
+class ConfigReplicationTest : public TopoCoordTest {
+public:
+ void setUp() override {
+ TopoCoordTest::setUp();
+ }
+
+ void simulateHBWithConfigVersionAndTerm(size_t remoteIndex) {
+ // Simulate a heartbeat to the remote.
+ auto member = getCurrentConfig().getMemberAt(remoteIndex);
+ getTopoCoord().prepareHeartbeatRequestV1(
+ now(), getCurrentConfig().getReplSetName(), member.getHostAndPort());
+ // Simulate heartbeat response from the remote.
+ // This will call setUpValues, which will update the memberData for the remote.
+ ReplSetHeartbeatResponse hb;
+ hb.setConfigVersion(getCurrentConfig().getConfigVersion());
+ hb.setConfigTerm(getCurrentConfig().getConfigTerm());
+ hb.setState(member.isArbiter() ? MemberState::RS_ARBITER : MemberState::RS_SECONDARY);
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+
+ getTopoCoord().processHeartbeatResponse(
+ now(), Milliseconds(0), member.getHostAndPort(), hbResponse);
+ }
+};
+
+TEST_F(ConfigReplicationTest, MajorityReplicatedConfigChecks) {
// Config with three nodes requires at least 2 nodes to replicate the config.
updateConfig(BSON("_id"
<< "rs0"
@@ -5130,27 +5154,21 @@ TEST_F(TopoCoordTest, MajorityReplicatedConfigChecks) {
// makeSelfPrimary() doesn't actually conduct a true election, so the term will still be 0.
makeSelfPrimary();
+ auto tagPatternStatus =
+ getCurrentConfig().findCustomWriteMode(ReplSetConfig::kConfigMajorityWriteConcernModeName);
+ ASSERT_TRUE(tagPatternStatus.isOK());
+ auto tagPattern = tagPatternStatus.getValue();
+ auto pred = getTopoCoord().makeConfigPredicate();
+
// Currently, only we have config 2 (the initial config).
// This simulates a reconfig where only the primary has written down the configVersion and
// configTerm.
- ASSERT_FALSE(getTopoCoord().haveMajorityReplicatedConfig());
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
// Simulate a heartbeat to one of the other nodes.
- getTopoCoord().prepareHeartbeatRequestV1(now(), "rs0", HostAndPort("host1"));
- // Simulate heartbeat response from host1. This will call setUpValues, which will update the
- // memberData for host1.
- ReplSetHeartbeatResponse hb;
- hb.setConfigVersion(2);
- hb.setConfigTerm(0);
- hb.setState(MemberState::RS_SECONDARY);
- StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
-
- now() += Milliseconds(1);
- getTopoCoord().processHeartbeatResponse(
- now(), Milliseconds(1), HostAndPort("host1"), hbResponse);
-
+ simulateHBWithConfigVersionAndTerm(1);
// Now, node 0 and node 1 should have the current config.
- ASSERT_TRUE(getTopoCoord().haveMajorityReplicatedConfig());
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
// Update _rsConfig with config(v: 3, t: 0). configTerms are the same but configVersion 3 is
// higher than 2.
@@ -5168,24 +5186,12 @@ TEST_F(TopoCoordTest, MajorityReplicatedConfigChecks) {
// Currently, only we have config 3.
// This simulates a reconfig where only the primary has written down the configVersion and
// configTerm.
- ASSERT_FALSE(getTopoCoord().haveMajorityReplicatedConfig());
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
// Simulate a heartbeat to one of the other nodes.
- getTopoCoord().prepareHeartbeatRequestV1(now(), "rs0", HostAndPort("host2"));
- // Simulate heartbeat response from host2. This will call setUpValues, which will update the
- // memberData for host2.
- ReplSetHeartbeatResponse hb2;
- hb2.setConfigVersion(3);
- hb2.setConfigTerm(0);
- hb2.setState(MemberState::RS_SECONDARY);
- StatusWith<ReplSetHeartbeatResponse> hbResponse2 = StatusWith<ReplSetHeartbeatResponse>(hb2);
-
- now() += Milliseconds(1);
- getTopoCoord().processHeartbeatResponse(
- now(), Milliseconds(1), HostAndPort("host2"), hbResponse2);
-
+ simulateHBWithConfigVersionAndTerm(2);
// Now, node 0 and node 2 should have the current config.
- ASSERT_TRUE(getTopoCoord().haveMajorityReplicatedConfig());
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
// Update _rsConfig with config(v: 4, t: 1). configTerm 1 is higher than configTerm 0.
updateConfig(BSON("_id"
@@ -5200,30 +5206,14 @@ TEST_F(TopoCoordTest, MajorityReplicatedConfigChecks) {
0);
// Currently, only we have config 4.
- // This simulates a reconfig where only the primary has written down the configVersion and
- // configTerm.
- ASSERT_FALSE(getTopoCoord().haveMajorityReplicatedConfig());
-
- // Simulate a heartbeat to one of the other nodes.
- getTopoCoord().prepareHeartbeatRequestV1(now(), "rs0", HostAndPort("host1"));
- // Simulate heartbeat response from host1. This will call setUpValues, which will update the
- // memberData for host1.
- ReplSetHeartbeatResponse hb3;
- hb3.setConfigVersion(4);
- hb3.setConfigTerm(1);
- hb3.setState(MemberState::RS_SECONDARY);
- StatusWith<ReplSetHeartbeatResponse> hbResponse3 = StatusWith<ReplSetHeartbeatResponse>(hb3);
-
- now() += Milliseconds(1);
- getTopoCoord().processHeartbeatResponse(
- now(), Milliseconds(1), HostAndPort("host1"), hbResponse3);
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
+ simulateHBWithConfigVersionAndTerm(1);
// Now, node 0 and node 1 should have the current config.
- ASSERT_TRUE(getTopoCoord().haveMajorityReplicatedConfig());
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern));
}
-TEST_F(TopoCoordTest, ArbiterNotIncludedInMajorityReplicatedConfigChecks) {
- // Config with only one data-bearing node only needs one node to majority replicate config.
+TEST_F(ConfigReplicationTest, ArbiterIncludedInMajorityReplicatedConfigChecks) {
updateConfig(BSON("_id"
<< "rs0"
<< "version" << 2 << "term" << 0 << "members"
@@ -5231,15 +5221,89 @@ TEST_F(TopoCoordTest, ArbiterNotIncludedInMajorityReplicatedConfigChecks) {
<< "host0:27017")
<< BSON("_id" << 1 << "host"
<< "host1:27017"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"))),
+ 0);
+
+ // makeSelfPrimary() doesn't actually conduct a true election, so the term will still be 0.
+ makeSelfPrimary();
+
+ // Currently, only the primary has config 2 (the initial config).
+ auto tagPattern =
+ getCurrentConfig().findCustomWriteMode(ReplSetConfig::kConfigMajorityWriteConcernModeName);
+ ASSERT_TRUE(tagPattern.isOK());
+ auto pred = getTopoCoord().makeConfigPredicate();
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(1);
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+}
+
+TEST_F(ConfigReplicationTest, ArbiterIncludedInAllReplicatedConfigChecks) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "term" << 0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
<< "arbiterOnly" << true))),
0);
// makeSelfPrimary() doesn't actually conduct a true election, so the term will still be 0.
makeSelfPrimary();
- // Currently, only the primary has config 2 (the initial config). But, since arbiters do not
- // count towards the config majority, haveMajorityReplicatedConfig() should still return true.
- ASSERT_TRUE(getTopoCoord().haveMajorityReplicatedConfig());
+ // Currently, only the primary has config 2 (the initial config).
+ auto tagPattern =
+ getCurrentConfig().findCustomWriteMode(ReplSetConfig::kConfigAllWriteConcernName);
+ ASSERT_TRUE(tagPattern.isOK());
+ auto pred = getTopoCoord().makeConfigPredicate();
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(1);
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(2);
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+}
+
+TEST_F(ConfigReplicationTest, ArbiterAndNonVotingNodeIncludedInAllReplicatedConfigChecks) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "term" << 0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
+ << "votes" << 0 << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"
+ << "arbiterOnly" << true))),
+ 0);
+
+ // makeSelfPrimary() doesn't actually conduct a true election, so the term will still be 0.
+ makeSelfPrimary();
+
+ // Currently, only the primary has config 2 (the initial config).
+ auto tagPattern =
+ getCurrentConfig().findCustomWriteMode(ReplSetConfig::kConfigAllWriteConcernName);
+ ASSERT_TRUE(tagPattern.isOK());
+ auto pred = getTopoCoord().makeConfigPredicate();
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(1);
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(2);
+ ASSERT_FALSE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
+
+ simulateHBWithConfigVersionAndTerm(3);
+ ASSERT_TRUE(getTopoCoord().haveTaggedNodesSatisfiedCondition(pred, tagPattern.getValue()));
}
TEST_F(TopoCoordTest, ArbiterNotIncludedInW3WriteInPSSAReplSet) {
diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp
index 8ffdd14baf7..298941fbdef 100644
--- a/src/mongo/db/write_concern_options.cpp
+++ b/src/mongo/db/write_concern_options.cpp
@@ -67,7 +67,6 @@ constexpr int WriteConcernOptions::kNoWaiting;
constexpr StringData WriteConcernOptions::kWriteConcernField;
const char WriteConcernOptions::kMajority[] = "majority";
-const char WriteConcernOptions::kConfigMajority[] = "configMajority";
const BSONObj WriteConcernOptions::Default = BSONObj();
const BSONObj WriteConcernOptions::Acknowledged(BSON("w" << W_NORMAL));
diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h
index fbd870b1e4a..07cd6a6e1d7 100644
--- a/src/mongo/db/write_concern_options.h
+++ b/src/mongo/db/write_concern_options.h
@@ -42,6 +42,10 @@ struct WriteConcernOptions {
public:
enum class SyncMode { UNSET, NONE, FSYNC, JOURNAL };
+ // This specifies the condition to check to satisfy given tags.
+ // Users can only provide OpTime condition, the others are used internally.
+ enum class CheckCondition { OpTime, Config };
+
static constexpr int kNoTimeout = 0;
static constexpr int kNoWaiting = -1;
@@ -52,8 +56,7 @@ public:
static const BSONObj ConfigMajority;
static constexpr StringData kWriteConcernField = "writeConcern"_sd;
- static const char kMajority[]; // = "majority"
- static const char kConfigMajority[]; // = "configMajority"
+ static const char kMajority[]; // = "majority"
static constexpr Seconds kWriteConcernTimeoutSystem{15};
static constexpr Seconds kWriteConcernTimeoutMigration{30};
@@ -130,6 +133,8 @@ public:
// True if the default 'w' value of w:1 was used.
bool usedDefaultW = false;
+ CheckCondition checkCondition = CheckCondition::OpTime;
+
private:
ReadWriteConcernProvenance _provenance;
};
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index fabaa6f64f5..0238e7ea547 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -346,6 +346,12 @@ Status ReplicationCoordinatorEmbedded::processReplSetReconfig(OperationContext*,
UASSERT_NOT_IMPLEMENTED;
}
+Status ReplicationCoordinatorEmbedded::doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) {
+ UASSERT_NOT_IMPLEMENTED;
+}
+
Status ReplicationCoordinatorEmbedded::processReplSetInitiate(OperationContext*,
const BSONObj&,
BSONObjBuilder*) {
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index a0028c7591b..9bff367083c 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -190,6 +190,10 @@ public:
const ReplSetReconfigArgs&,
BSONObjBuilder*) override;
+ Status doReplSetReconfig(OperationContext* opCtx,
+ GetNewConfigFn getNewConfig,
+ bool force) override;
+
Status processReplSetInitiate(OperationContext*, const BSONObj&, BSONObjBuilder*) override;
Status processReplSetUpdatePosition(const repl::UpdatePositionArgs&, long long*) override;
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index e88d5fad36c..be2f5e64128 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -1199,6 +1199,8 @@ var ReplSetTest = function(opts) {
assert.commandWorked(
self.getPrimary().adminCommand({setFeatureCompatibilityVersion: fcv}));
checkFCV(self.getPrimary().getDB("admin"), lastStableFCV);
+ print("Fetch the config version from primay since 4.4 downgrade runs a reconfig.");
+ config.version = self.getReplSetConfigFromNode().version;
});
}
@@ -1221,7 +1223,7 @@ var ReplSetTest = function(opts) {
if (originalSettings) {
config.settings = originalSettings;
}
- config.version = 2;
+ config.version = config.version ? config.version + 1 : 2;
// Nodes started with the --configsvr flag must have configsvr = true in their config.
if (this.nodes[0].hasOwnProperty("fullOptions") &&