summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavi Vetriselvan <pavithra.vetriselvan@mongodb.com>2021-03-31 22:52:25 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-20 15:42:02 +0000
commite71e821acd084d3010b59bb65a2d82ef7a7fdba4 (patch)
treed0cab2fc5108146de2714978dedde749115b0e66
parentbe75f9d33d8cb3c258fbd375a3fb1861dd9af93c (diff)
downloadmongo-e71e821acd084d3010b59bb65a2d82ef7a7fdba4.tar.gz
SERVER-54829 Avoid crashing during kUpgrading/kDowngrading state if config has incompatible field name
(cherry picked from commit f1ac83b03c330d1ef59dd13e55aeddf3c6d41b4b)
-rw-r--r--jstests/multiVersion/stepdown_interrupts_setFCV_before_reconfig.js102
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp5
-rw-r--r--src/mongo/db/repl/repl_set_config_checks.cpp11
-rw-r--r--src/mongo/db/repl/repl_set_config_checks.h17
-rw-r--r--src/mongo/db/repl/repl_set_config_checks_test.cpp65
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp20
6 files changed, 177 insertions, 43 deletions
diff --git a/jstests/multiVersion/stepdown_interrupts_setFCV_before_reconfig.js b/jstests/multiVersion/stepdown_interrupts_setFCV_before_reconfig.js
new file mode 100644
index 00000000000..962484d5b27
--- /dev/null
+++ b/jstests/multiVersion/stepdown_interrupts_setFCV_before_reconfig.js
@@ -0,0 +1,102 @@
+/**
+ * Tests a scenario where stepdown interrupts the setFCV command after transitioning to the
+ * intermediary kUpgrading/kDowngrading state but before the reconfig to change the delay field
+ * name. In this state, a subsequent stepup attempt may have an FCV incompatible field name.
+ * Since we are expected to run the setFCV command to finish the upgrade/downgrade procedure,
+ * the delay field name will eventually be changed to the correct value, so it's safe to
+ * temporarily skip the FCV compatibility check for stepup reconfigs instead of crashing the
+ * server.
+ *
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/parallel_shell_helpers.js");
+load("jstests/libs/fail_point_util.js");
+
+function runSetFCV(targetVersion) {
+ assert.commandFailedWithCode(db.adminCommand({setFeatureCompatibilityVersion: targetVersion}),
+ ErrorCodes.InterruptedDueToReplStateChange);
+}
+
+function checkForSecondaryDelaySecs(primary) {
+ let config = primary.adminCommand({replSetGetConfig: 1}).config;
+ assert.eq(config.members[0].secondaryDelaySecs, 0, config);
+ assert(!config.members[0].hasOwnProperty('slaveDelay'), config);
+}
+
+function checkForSlaveDelay(primary) {
+ let config = primary.adminCommand({replSetGetConfig: 1}).config;
+ assert.eq(config.members[0].slaveDelay, 0, config);
+ assert(!config.members[0].hasOwnProperty('secondaryDelaySecs'), config);
+}
+
+const replTest = new ReplSetTest({name: jsTestName(), nodes: 1});
+replTest.startSet();
+replTest.initiateWithHighElectionTimeout();
+let primary = replTest.getPrimary();
+
+jsTestLog("Testing stepdown interrupting FCV downgrade.");
+// Enable failpoint to hang after transitioning to kDowngrading.
+let failpoint = configureFailPoint(primary, 'hangAfterStartingFCVTransition');
+let setFCVThread = startParallelShell(funWithArgs(runSetFCV, lastLTSFCV), primary.port);
+failpoint.wait();
+
+// Verify the node is in a partially downgraded state by checking that the 'targetVersion'
+// was set to 'lastLTSFCV'.
+checkFCV(primary.getDB("admin"), lastLTSFCV /* version */, lastLTSFCV /* targetVersion */);
+
+// Interrupt the setFCV command with a stepdown.
+assert.commandWorked(primary.adminCommand({replSetStepDown: 1, force: true}));
+failpoint.off();
+setFCVThread();
+replTest.waitForState(primary, ReplSetTest.State.SECONDARY);
+
+// The automatic reconfig on stepup should be able to succeed, resulting in the
+// 'secondaryDelaySecs' field still being present in the config even though we're in the
+// kDowngrading FCV.
+replTest.stepUp(primary);
+primary = replTest.getPrimary();
+checkForSecondaryDelaySecs(primary);
+
+jsTestLog("Complete FCV downgrade.");
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
+
+// Verify the feature compatibility version transition is complete. Once we've completed the
+// downgrade, the delay field should have been properly renamed to 'slaveDelay'.
+checkFCV(primary.getDB("admin"), lastLTSFCV);
+checkForSlaveDelay(primary);
+
+jsTestLog("Testing stepdown interrupting FCV upgrade.");
+// Enable failpoint to hang after transitioning to kUpgrading.
+failpoint = configureFailPoint(primary, 'hangAfterStartingFCVTransition');
+setFCVThread = startParallelShell(funWithArgs(runSetFCV, latestFCV), primary.port);
+failpoint.wait();
+
+// Verify the node is in a partially upgraded state by checking that the 'targetVersion'
+// was set to 'latestFCV'.
+checkFCV(primary.getDB("admin"), lastLTSFCV /* version */, latestFCV /* targetVersion */);
+
+// Interrupt the setFCV command with a stepdown.
+assert.commandWorked(primary.adminCommand({replSetStepDown: 1, force: true}));
+failpoint.off();
+setFCVThread();
+replTest.waitForState(primary, ReplSetTest.State.SECONDARY);
+
+// The automatic reconfig on stepup should be able to succeed, resulting in the 'slaveDelay'
+// field still being present in the config even though we're in the kUpgrading FCV.
+replTest.stepUp(primary);
+primary = replTest.getPrimary();
+checkForSlaveDelay(primary);
+
+jsTestLog("Complete FCV upgrade.");
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+
+// Verify the feature compatibility version transition is complete. Once we've completed the
+// upgrade, the delay field should have been properly renamed to 'secondaryDelaySecs'.
+checkFCV(primary.getDB("admin"), latestFCV);
+checkForSecondaryDelaySecs(primary);
+
+replTest.stopSet();
+}());
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index a0e1d348403..2c41b6f3e3b 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -73,6 +73,7 @@ namespace {
MONGO_FAIL_POINT_DEFINE(failUpgrading);
MONGO_FAIL_POINT_DEFINE(hangWhileUpgrading);
+MONGO_FAIL_POINT_DEFINE(hangAfterStartingFCVTransition);
MONGO_FAIL_POINT_DEFINE(failDowngrading);
MONGO_FAIL_POINT_DEFINE(hangWhileDowngrading);
@@ -266,6 +267,8 @@ public:
isFromConfigServer,
true /* setTargetVersion */);
+ hangAfterStartingFCVTransition.pauseWhileSet(opCtx);
+
// If the 'useSecondaryDelaySecs' feature flag is enabled in the upgraded FCV, issue a
// reconfig to change the 'slaveDelay' field to 'secondaryDelaySecs'.
if (isReplSet && repl::feature_flags::gUseSecondaryDelaySecs.isEnabledAndIgnoreFCV() &&
@@ -371,6 +374,8 @@ public:
isFromConfigServer,
true /* setTargetVersion */);
+ hangAfterStartingFCVTransition.pauseWhileSet(opCtx);
+
// If the 'useSecondaryDelaySecs' feature flag is disabled in the downgraded FCV, issue
// a reconfig to change the 'secondaryDelaySecs' field to 'slaveDelay'.
if (isReplSet && repl::feature_flags::gUseSecondaryDelaySecs.isEnabledAndIgnoreFCV() &&
diff --git a/src/mongo/db/repl/repl_set_config_checks.cpp b/src/mongo/db/repl/repl_set_config_checks.cpp
index e425547e349..abd9b16a660 100644
--- a/src/mongo/db/repl/repl_set_config_checks.cpp
+++ b/src/mongo/db/repl/repl_set_config_checks.cpp
@@ -429,16 +429,19 @@ StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* e
Status validateConfigForReconfig(const ReplSetConfig& oldConfig,
const ReplSetConfig& newConfig,
bool force,
- bool allowSplitHorizonIP) {
+ bool allowSplitHorizonIP,
+ bool skipFCVCompatibilityCheck) {
Status status =
allowSplitHorizonIP ? newConfig.validateAllowingSplitHorizonIP() : newConfig.validate();
if (!status.isOK()) {
return status;
}
- status = isFCVCompatible(newConfig);
- if (!status.isOK()) {
- return status;
+ if (!skipFCVCompatibilityCheck) {
+ status = isFCVCompatible(newConfig);
+ if (!status.isOK()) {
+ return status;
+ }
}
status = newConfig.checkIfWriteConcernCanBeSatisfied(newConfig.getDefaultWriteConcern());
diff --git a/src/mongo/db/repl/repl_set_config_checks.h b/src/mongo/db/repl/repl_set_config_checks.h
index 616c231fc2c..3f4ea67acc3 100644
--- a/src/mongo/db/repl/repl_set_config_checks.h
+++ b/src/mongo/db/repl/repl_set_config_checks.h
@@ -96,20 +96,25 @@ StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* e
* If "allowSplitHorizonIP" is set to true, skips checking whether an IP address exists in
* split horizon configuration.
*
+ * If "skipFCVCompatibilityCheck" is set to true, skips checking whether the
+ * 'secondaryDelaySecs' field name is compatible with the current featureCompatibilityVersion.
+ *
* Returns an indicative error on validation failure.
*/
Status validateConfigForReconfig(const ReplSetConfig& oldConfig,
const ReplSetConfig& newConfig,
bool force,
- bool allowSplitHorizonIP);
+ bool allowSplitHorizonIP,
+ bool skipFCVCompatibilityCheck);
/**
- * Validates that "newConfig" is an acceptable configuration when received in a heartbeat
- * reasponse.
+ * Validates that "newConfig" is an acceptable configuration when
+ * received in a heartbeat reasponse.
*
- * If the new configuration omits the current node, but is otherwise valid, returns
- * ErrorCodes::NodeNotFound. If the configuration is wholly valid, returns Status::OK().
- * Otherwise, returns some other error status.
+ * If the new configuration omits the current node, but is
+ * otherwise valid, returns ErrorCodes::NodeNotFound. If the
+ * configuration is wholly valid, returns Status::OK(). Otherwise,
+ * returns some other error status.
*/
StatusWith<int> validateConfigForHeartbeatReconfig(
ReplicationCoordinatorExternalState* externalState,
diff --git a/src/mongo/db/repl/repl_set_config_checks_test.cpp b/src/mongo/db/repl/repl_set_config_checks_test.cpp
index e3b54553c57..e65b5079c83 100644
--- a/src/mongo/db/repl/repl_set_config_checks_test.cpp
+++ b/src/mongo/db/repl/repl_set_config_checks_test.cpp
@@ -311,16 +311,16 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigVersionNumberMustB
ASSERT_OK(newConfig.validate());
// Can reconfig from old to new.
- ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Cannot reconfig from old to old (versions must be different).
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, oldConfig, false, false));
+ validateConfigForReconfig(oldConfig, oldConfig, false, false, false));
// Cannot reconfig from new to old (versions must increase).
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(newConfig, oldConfig, false, false));
+ validateConfigForReconfig(newConfig, oldConfig, false, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_memberId) {
@@ -345,7 +345,7 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_memberId) {
<< "h2"))));
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Case 2: Change the member config setting for the existing member with member id > 255.
oldConfig = ReplSetConfig::parse(BSON("_id"
@@ -365,7 +365,7 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_memberId) {
<< "priority" << 0))));
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, newConfig, false, false, false));
}
@@ -400,10 +400,10 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotChangeSetNa
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, newConfig, false, false));
+ validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(newConfig, oldConfig, true, false));
+ validateConfigForReconfig(newConfig, oldConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotChangeSetId) {
@@ -438,13 +438,13 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotChangeSetId
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
- const auto status = validateConfigForReconfig(oldConfig, newConfig, false, false);
+ const auto status = validateConfigForReconfig(oldConfig, newConfig, false, false, false);
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
ASSERT_STRING_CONTAINS(status.reason(), "New and old configurations differ in replica set ID");
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(newConfig, oldConfig, true, false));
+ validateConfigForReconfig(newConfig, oldConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotFlipBuildIndexesFlag) {
@@ -498,13 +498,13 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotFlipBuildIn
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, oldConfigRefresh, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, oldConfigRefresh, false, false, false));
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, newConfig, false, false));
+ validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, newConfig, true, false));
+ validateConfigForReconfig(oldConfig, newConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotFlipArbiterFlag) {
@@ -555,12 +555,12 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigMustNotFlipArbiter
ASSERT_OK(oldConfig.validate());
ASSERT_OK(newConfig.validate());
ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, oldConfigRefresh, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, oldConfigRefresh, false, false, false));
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, newConfig, false, false));
+ validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, newConfig, true, false));
+ validateConfigForReconfig(oldConfig, newConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_HostAndIdRemappingRestricted) {
@@ -608,6 +608,7 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_HostAndIdRemappingRestricte
legalNewConfigWithNewHostAndId,
// Use 'force' since we're adding and removing a node atomically.
true,
+ false,
false));
//
@@ -625,11 +626,13 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_HostAndIdRemappingRestricte
<< BSON("_id" << 3 << "host"
<< "h3"))));
ASSERT_OK(illegalNewConfigReusingHost.validate());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, illegalNewConfigReusingHost, false, false));
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(oldConfig, illegalNewConfigReusingHost, false, false, false));
// Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(oldConfig, illegalNewConfigReusingHost, true, false));
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(oldConfig, illegalNewConfigReusingHost, true, false, false));
//
// Here, the new config is valid, because all we've changed is the name of
// the host representing _id 2.
@@ -645,7 +648,7 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_HostAndIdRemappingRestricte
<< BSON("_id" << 3 << "host"
<< "h3"))));
ASSERT_OK(illegalNewConfigReusingId.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, illegalNewConfigReusingId, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, illegalNewConfigReusingId, false, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_ArbiterPriorityValueMustBeZeroOrOne) {
@@ -706,13 +709,13 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_ArbiterPriorityValueMustBeZ
ASSERT_OK(zeroConfig.validate());
ASSERT_OK(oneConfig.validate());
ASSERT_OK(twoConfig.validate());
- ASSERT_OK(validateConfigForReconfig(oldConfig, zeroConfig, false, false));
- ASSERT_OK(validateConfigForReconfig(oldConfig, oneConfig, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, zeroConfig, false, false, false));
+ ASSERT_OK(validateConfigForReconfig(oldConfig, oneConfig, false, false, false));
ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- validateConfigForReconfig(oldConfig, twoConfig, false, false));
+ validateConfigForReconfig(oldConfig, twoConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- validateConfigForReconfig(oldConfig, twoConfig, true, false));
+ validateConfigForReconfig(oldConfig, twoConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForInitiate_NewConfigInvalid) {
@@ -762,10 +765,10 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigInvalid) {
ReplicationCoordinatorExternalStateMock presentOnceExternalState;
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForReconfig(oldConfig, newConfig, false, false));
+ validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForReconfig(oldConfig, newConfig, true, false));
+ validateConfigForReconfig(oldConfig, newConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigWriteConcernNotSatisifiable) {
@@ -791,10 +794,10 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_NewConfigWriteConcernNotSat
ReplicationCoordinatorExternalStateMock presentOnceExternalState;
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_EQUALS(ErrorCodes::UnsatisfiableWriteConcern,
- validateConfigForReconfig(oldConfig, newConfig, false, false));
+ validateConfigForReconfig(oldConfig, newConfig, false, false, false));
// Forced reconfigs also do not allow this.
ASSERT_EQUALS(ErrorCodes::UnsatisfiableWriteConcern,
- validateConfigForReconfig(oldConfig, newConfig, true, false));
+ validateConfigForReconfig(oldConfig, newConfig, true, false, false));
}
TEST_F(ServiceContextTest, ValidateConfigForStartUp_NewConfigInvalid) {
@@ -942,7 +945,7 @@ TEST_F(ServiceContextTest, ValidateForReconfig_ForceStillNeedsValidConfig) {
ReplicationCoordinatorExternalStateMock presentOnceExternalState;
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForReconfig(oldConfig, newConfig, true, false));
+ validateConfigForReconfig(oldConfig, newConfig, true, false, false));
}
//
@@ -1001,7 +1004,9 @@ Status validateMemberReconfig(BSONArray oldMembers, BSONArray newMembers, BSONOb
// Do reconfig.
const bool force = false;
const bool allowSplitHorizonIP = false;
- return validateConfigForReconfig(oldConfig, newConfig, force, allowSplitHorizonIP);
+ const bool skipFCVCompatibilityCheck = false;
+ return validateConfigForReconfig(
+ oldConfig, newConfig, force, allowSplitHorizonIP, skipFCVCompatibilityCheck);
}
TEST_F(ServiceContextTest, ValidateForReconfig_SingleNodeAdditionAllowed) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index d6635da0710..3fd6513fdd9 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3432,10 +3432,24 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx,
BSONObj newConfigObj = newConfig.toBSON();
audit::logReplSetReconfig(opCtx->getClient(), &oldConfigObj, &newConfigObj);
- // User reconfigs should not allow IP addresses in split horizons.
+ // Stepdown can interrupt the setFeatureCompatibilityVersion command after we've transitioned
+ // to the intermediary kUpgrading/kDowngrading state but before the reconfig to change the
+ // 'secondaryDelaySecs' field name. During a subsequent stepup's automatic reconfig, the config
+ // could have an incompatible field name based on the intermediate FCV, but since we must retry
+ // setFeatureCompatibilityVersion until we complete the upgrade/downgrade procedure, the
+ // reconfig to change the delay field name to the correct value will eventually succeed.
+ // Therefore, it is safe for us to skip the FCV compatibility check during the stepup reconfig
+ // to avoid crashing.
+ //
+ // (Generic FCV reference): feature flag support
+ // TODO (SERVER-53354): Remove this check once 5.0 becomes 'lastLTS'.
+ bool skipFCVCompatibilityCheck =
+ serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading();
+
bool allowSplitHorizonIP = !opCtx->getClient()->hasRemote();
- Status validateStatus =
- validateConfigForReconfig(oldConfig, newConfig, force, allowSplitHorizonIP);
+
+ Status validateStatus = validateConfigForReconfig(
+ oldConfig, newConfig, force, allowSplitHorizonIP, skipFCVCompatibilityCheck);
if (!validateStatus.isOK()) {
LOGV2_ERROR(21420,
"replSetReconfig got {error} while validating {newConfig}",