summaryrefslogtreecommitdiff
path: root/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/commands/set_feature_compatibility_version_command.cpp')
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp294
1 files changed, 167 insertions, 127 deletions
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index dba9bfc42d5..f598459ac0d 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -373,24 +373,12 @@ public:
if (request.getPhase() == SetFCVPhaseEnum::kStart) {
invariant(serverGlobalParams.clusterRole == ClusterRole::ShardServer);
- // TODO SERVER-68008: Remove collMod draining mechanism after 7.0 becomes last LTS.
- if (actualVersion > requestedVersion &&
- !feature_flags::gCollModCoordinatorV3.isEnabledOnVersion(requestedVersion)) {
- // Drain all running collMod v3 coordinator because they produce backward
- // incompatible on disk metadata
- ShardingDDLCoordinatorService::getService(opCtx)
- ->waitForCoordinatorsOfGivenTypeToComplete(
- opCtx, DDLCoordinatorTypeEnum::kCollMod);
- }
-
- // TODO SERVER-68373 remove once 7.0 becomes last LTS
- if (actualVersion > requestedVersion) {
- // Drain the QE compact coordinator because it persists state that is
- // not backwards compatible with earlier versions.
- ShardingDDLCoordinatorService::getService(opCtx)
- ->waitForCoordinatorsOfGivenTypeToComplete(
- opCtx, DDLCoordinatorTypeEnum::kCompactStructuredEncryptionData);
- }
+ // This helper function is only for any actions that should be done specifically on
+ // shard servers during phase 1 of the 2-phase setFCV protocol for sharded clusters.
+ // For example, before completing phase 1, we must wait for backward incompatible
+ // ShardingDDLCoordinators to finish.
+ // We do not expect any other feature-specific work to be done in phase 1.
+ _shardServerPhase1Tasks(opCtx, actualVersion, requestedVersion);
// If we are only running phase-1, then we are done
return true;
@@ -400,16 +388,10 @@ public:
invariant(serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
invariant(!request.getPhase() || request.getPhase() == SetFCVPhaseEnum::kComplete);
- // If we are downgrading to a version that doesn't support implicit translation of
- // Timeseries collection in sharding DDL Coordinators we need to drain all ongoing
- // coordinators
- if (serverGlobalParams.clusterRole == ClusterRole::ShardServer &&
- !feature_flags::gImplicitDDLTimeseriesNssTranslation.isEnabledOnVersion(
- requestedVersion)) {
- ShardingDDLCoordinatorService::getService(opCtx)->waitForOngoingCoordinatorsToFinish(
- opCtx);
- }
-
+ // All feature-specific FCV upgrade or downgrade code should go into the respective
+ // _runUpgrade and _runDowngrade functions. Each of them have their own helper functions
+ // where all feature-specific upgrade/downgrade code should be placed. Please read the
+ // comments on the helper functions for more details on where to place the code.
if (requestedVersion > actualVersion) {
_runUpgrade(opCtx, request, changeTimestamp);
} else {
@@ -447,10 +429,42 @@ public:
}
private:
+ // This helper function is only for any actions that should be done specifically on
+ // shard servers during phase 1 of the 2-phase setFCV protocol for sharded clusters.
+ // For example, before completing phase 1, we must wait for backward incompatible
+ // ShardingDDLCoordinators to finish. This is important in order to ensure that no
+ // shard that is currently a participant of such a backward-incompatible
+ // ShardingDDLCoordinator can transition to the fully downgraded state (and thus,
+ // possibly downgrade its binary) while the coordinator is still in progress.
+ // The fact that the FCV has already transitioned to kDowngrading ensures that no
+ // new backward-incompatible ShardingDDLCoordinators can start.
+ // We do not expect any other feature-specific work to be done in phase 1.
+ void _shardServerPhase1Tasks(OperationContext* opCtx,
+ multiversion::FeatureCompatibilityVersion actualVersion,
+ multiversion::FeatureCompatibilityVersion requestedVersion) {
+ // TODO SERVER-68008: Remove collMod draining mechanism after 7.0 becomes last LTS.
+ if (actualVersion > requestedVersion &&
+ !feature_flags::gCollModCoordinatorV3.isEnabledOnVersion(requestedVersion)) {
+ // Drain all running collMod v3 coordinator because they produce backward
+ // incompatible on disk metadata
+ ShardingDDLCoordinatorService::getService(opCtx)
+ ->waitForCoordinatorsOfGivenTypeToComplete(opCtx, DDLCoordinatorTypeEnum::kCollMod);
+ }
+
+ // TODO SERVER-68373 remove once 7.0 becomes last LTS
+ if (actualVersion > requestedVersion) {
+ // Drain the QE compact coordinator because it persists state that is
+ // not backwards compatible with earlier versions.
+ ShardingDDLCoordinatorService::getService(opCtx)
+ ->waitForCoordinatorsOfGivenTypeToComplete(
+ opCtx, DDLCoordinatorTypeEnum::kCompactStructuredEncryptionData);
+ }
+ }
+
// This helper function is for any actions that should be done before taking the FCV full
// transition lock in S mode. It is required that the code in this helper function is idempotent
// and could be done after _runDowngrade even if it failed at any point in the middle of
- // _userCollectionsDowngradeCleanup or _internalServerDowngradeCleanup.
+ // _userCollectionsUassertsForDowngrade or _internalServerCleanupForDowngrade.
void _prepareForUpgrade(OperationContext* opCtx) {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
return;
@@ -464,16 +478,16 @@ private:
// This helper function is for any user collections uasserts, creations, or deletions that need
// to happen during the upgrade. It is required that the code in this helper function is
// idempotent and could be done after _runDowngrade even if it failed at any point in the middle
- // of _userCollectionsDowngradeCleanup or _internalServerDowngradeCleanup.
- void _userCollectionsUpgradeUassertsAndUpdates() {
+ // of _userCollectionsUassertsForDowngrade or _internalServerCleanupForDowngrade.
+ void _userCollectionsWorkForUpgrade() {
return;
}
// This helper function is for updating metadata to make sure the new features in the
// upgraded version work for sharded and non-sharded clusters. It is required that the code
// in this helper function is idempotent and could be done after _runDowngrade even if it
- // failed at any point in the middle of _userCollectionsDowngradeCleanup or
- // _internalServerDowngradeCleanup.
+ // failed at any point in the middle of _userCollectionsUassertsForDowngrade or
+ // _internalServerCleanupForDowngrade.
void _completeUpgrade(OperationContext* opCtx,
const multiversion::FeatureCompatibilityVersion requestedVersion) {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
@@ -496,6 +510,17 @@ private:
}
}
+ // _runUpgrade performs all the upgrade specific code for setFCV. Any new feature specific
+ // upgrade code should be placed in the _runUpgrade helper functions:
+ // * _prepareForUpgrade: for any upgrade actions that should be done before taking the FCV full
+ // transition lock in S mode
+ // * _userCollectionsWorkForUpgrade: for any user collections uasserts, creations, or deletions
+ // that need to happen during the upgrade. This happens after the FCV full transition lock.
+ // * _completeUpgrade: for updating metadata to make sure the new features in the upgraded
+ // version
+ // work for sharded and non-sharded clusters
+ // Please read the comments on those helper functions for more details on what should be placed
+ // in each function.
void _runUpgrade(OperationContext* opCtx,
const SetFeatureCompatibilityVersion& request,
boost::optional<Timestamp> changeTimestamp) {
@@ -509,7 +534,7 @@ private:
// This helper function is for any actions that should be done before taking the FCV full
// transition lock in S mode. It is required that the code in this helper function is
// idempotent and could be done after _runDowngrade even if it failed at any point in the
- // middle of _userCollectionsDowngradeCleanup or _internalServerDowngradeCleanup.
+ // middle of _userCollectionsUassertsForDowngrade or _internalServerCleanupForDowngrade.
_prepareForUpgrade(opCtx);
{
@@ -526,10 +551,10 @@ private:
// This helper function is for any user collections uasserts, creations, or deletions that
// need to happen during the upgrade. It is required that the code in this helper function
// is idempotent and could be done after _runDowngrade even if it failed at any point in the
- // middle of _userCollectionsDowngradeCleanup or _internalServerDowngradeCleanup.
+ // middle of _userCollectionsUassertsForDowngrade or _internalServerCleanupForDowngrade.
if (serverGlobalParams.clusterRole == ClusterRole::ShardServer ||
serverGlobalParams.clusterRole == ClusterRole::None) {
- _userCollectionsUpgradeUassertsAndUpdates();
+ _userCollectionsWorkForUpgrade();
}
uassert(ErrorCodes::Error(549180),
@@ -553,8 +578,8 @@ private:
// This helper function is for updating metadata to make sure the new features in the
// upgraded version work for sharded and non-sharded clusters. It is required that the code
// in this helper function is idempotent and could be done after _runDowngrade even if it
- // failed at any point in the middle of _userCollectionsDowngradeCleanup or
- // _internalServerDowngradeCleanup.
+ // failed at any point in the middle of _userCollectionsUassertsForDowngrade or
+ // _internalServerCleanupForDowngrade.
_completeUpgrade(opCtx, requestedVersion);
hangWhileUpgrading.pauseWhileSet(opCtx);
@@ -596,108 +621,106 @@ private:
// InterruptedDueToReplStateChange) or ErrorCode::CannotDowngrade. The uasserts added to this
// helper function can only have the CannotDowngrade error code indicating that the user must
// manually clean up some user data in order to retry the FCV downgrade.
- void _uassertUserDataAndSettingsReadyForDowngrade(
+ void _userCollectionsUassertsForDowngrade(
OperationContext* opCtx, const multiversion::FeatureCompatibilityVersion requestedVersion) {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
return;
} else if (serverGlobalParams.clusterRole == ClusterRole::ShardServer ||
serverGlobalParams.clusterRole == ClusterRole::None) {
- _userCollectionsUasserts(opCtx, requestedVersion);
- }
- }
+ if (!feature_flags::gTimeseriesScalabilityImprovements.isEnabledOnVersion(
+ requestedVersion)) {
+ for (const auto& dbName : DatabaseHolder::get(opCtx)->getNames()) {
+ Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
+ catalog::forEachCollectionFromDb(
+ opCtx,
+ dbName,
+ MODE_S,
+ [&](const CollectionPtr& collection) {
+ invariant(collection->getTimeseriesOptions());
+
+ auto indexCatalog = collection->getIndexCatalog();
+ auto indexIt = indexCatalog->getIndexIterator(
+ opCtx,
+ IndexCatalog::InclusionPolicy::kReady |
+ IndexCatalog::InclusionPolicy::kUnfinished);
+
+ while (indexIt->more()) {
+ auto indexEntry = indexIt->next();
+ // Fail to downgrade if the time-series collection has a partial,
+ // TTL index.
+ if (indexEntry->descriptor()->isPartial()) {
+ // TODO (SERVER-67659): Remove partial, TTL index check once
+ // FCV 7.0 becomes last-lts.
+ uassert(
+ ErrorCodes::CannotDowngrade,
+ str::stream()
+ << "Cannot downgrade the cluster when there are "
+ "secondary "
+ "TTL indexes with partial filters on time-series "
+ "collections. Drop all partial, TTL indexes on "
+ "time-series collections before downgrading. First "
+ "detected incompatible index name: '"
+ << indexEntry->descriptor()->indexName()
+ << "' on collection: '"
+ << collection->ns().getTimeseriesViewNamespace() << "'",
+ !indexEntry->descriptor()->infoObj().hasField(
+ IndexDescriptor::kExpireAfterSecondsFieldName));
+ }
+ }
+ return true;
+ },
+ [&](const CollectionPtr& collection) {
+ return collection->getTimeseriesOptions() != boost::none;
+ });
+ }
+ }
- void _userCollectionsUasserts(
- OperationContext* opCtx, const multiversion::FeatureCompatibilityVersion requestedVersion) {
- if (!feature_flags::gTimeseriesScalabilityImprovements.isEnabledOnVersion(
- requestedVersion)) {
+ // Block downgrade for collections with encrypted fields
+ // TODO SERVER-67760: Remove once FCV 7.0 becomes last-lts.
for (const auto& dbName : DatabaseHolder::get(opCtx)->getNames()) {
Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
catalog::forEachCollectionFromDb(
- opCtx,
- dbName,
- MODE_S,
- [&](const CollectionPtr& collection) {
- invariant(collection->getTimeseriesOptions());
-
- auto indexCatalog = collection->getIndexCatalog();
- auto indexIt = indexCatalog->getIndexIterator(
- opCtx,
- IndexCatalog::InclusionPolicy::kReady |
- IndexCatalog::InclusionPolicy::kUnfinished);
-
- while (indexIt->more()) {
- auto indexEntry = indexIt->next();
- // Fail to downgrade if the time-series collection has a partial, TTL
- // index.
- if (indexEntry->descriptor()->isPartial()) {
- // TODO (SERVER-67659): Remove partial, TTL index check once FCV 7.0
- // becomes last-lts.
- uassert(
- ErrorCodes::CannotDowngrade,
- str::stream()
- << "Cannot downgrade the cluster when there are secondary "
- "TTL indexes with partial filters on time-series "
- "collections. Drop all partial, TTL indexes on "
- "time-series collections before downgrading. First "
- "detected incompatible index name: '"
- << indexEntry->descriptor()->indexName()
- << "' on collection: '"
- << collection->ns().getTimeseriesViewNamespace() << "'",
- !indexEntry->descriptor()->infoObj().hasField(
- IndexDescriptor::kExpireAfterSecondsFieldName));
- }
- }
+ opCtx, dbName, MODE_X, [&](const CollectionPtr& collection) {
+ auto& efc = collection->getCollectionOptions().encryptedFieldConfig;
- return true;
- },
- [&](const CollectionPtr& collection) {
- return collection->getTimeseriesOptions() != boost::none;
- });
- }
- }
-
- // Block downgrade for collections with encrypted fields
- // TODO SERVER-67760: Remove once FCV 7.0 becomes last-lts.
- for (const auto& dbName : DatabaseHolder::get(opCtx)->getNames()) {
- Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
- catalog::forEachCollectionFromDb(
- opCtx, dbName, MODE_X, [&](const CollectionPtr& collection) {
- auto& efc = collection->getCollectionOptions().encryptedFieldConfig;
-
- uassert(ErrorCodes::CannotDowngrade,
+ uassert(
+ ErrorCodes::CannotDowngrade,
str::stream()
<< "Cannot downgrade the cluster as collection " << collection->ns()
<< " has 'encryptedFields' with range indexes",
!(efc.has_value() && hasQueryType(efc.get(), QueryTypeEnum::Range)));
- return true;
- });
- }
-
- if (!feature_flags::gfeatureFlagCappedCollectionsRelaxedSize.isEnabled(
- serverGlobalParams.featureCompatibility)) {
- for (const auto& dbName : DatabaseHolder::get(opCtx)->getNames()) {
- Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
- catalog::forEachCollectionFromDb(
- opCtx,
- dbName,
- MODE_S,
- [&](const CollectionPtr& collection) {
- uasserted(
- ErrorCodes::CannotDowngrade,
- str::stream()
- << "Cannot downgrade the cluster when there are capped "
- "collection with a size that is non multiple of 256 bytes. "
- "Drop or resize the following collection: '"
- << collection->ns() << "'");
return true;
- },
- [&](const CollectionPtr& collection) {
- return collection->isCapped() && collection->getCappedMaxSize() % 256 != 0;
});
}
+
+ if (!feature_flags::gfeatureFlagCappedCollectionsRelaxedSize.isEnabledOnVersion(
+ requestedVersion)) {
+ for (const auto& dbName : DatabaseHolder::get(opCtx)->getNames()) {
+ Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
+ catalog::forEachCollectionFromDb(
+ opCtx,
+ dbName,
+ MODE_S,
+ [&](const CollectionPtr& collection) {
+ uasserted(
+ ErrorCodes::CannotDowngrade,
+ str::stream()
+ << "Cannot downgrade the cluster when there are capped "
+ "collection with a size that is non multiple of 256 bytes. "
+ "Drop or resize the following collection: '"
+ << collection->ns() << "'");
+ return true;
+ },
+ [&](const CollectionPtr& collection) {
+ return collection->isCapped() &&
+ collection->getCappedMaxSize() % 256 != 0;
+ });
+ }
+ }
}
}
+
// This helper function is for any internal server downgrade cleanup, such as dropping
// collections or aborting. This cleanup will happen after user collection downgrade
// cleanup. The code in this helper function is required to be idempotent in case the node
@@ -710,7 +733,7 @@ private:
// is consistent on disk and for reads/writes) or with an fassert (indicating a server bug and
// that the data is corrupted). ManualInterventionRequired and fasserts are errors that are not
// expected to occur in practice, but if they did, they would turn into a Support case.
- void _internalServerDowngradeCleanup(
+ void _internalServerCleanupForDowngrade(
OperationContext* opCtx, const multiversion::FeatureCompatibilityVersion requestedVersion) {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
_dropInternalGlobalIndexesCollection(opCtx, requestedVersion);
@@ -720,6 +743,14 @@ private:
// be able to apply the oplog entries correctly.
abortAllReshardCollection(opCtx);
} else if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
+ // If we are downgrading to a version that doesn't support implicit translation of
+ // Timeseries collection in sharding DDL Coordinators we need to drain all ongoing
+ // coordinators
+ if (!feature_flags::gImplicitDDLTimeseriesNssTranslation.isEnabledOnVersion(
+ requestedVersion)) {
+ ShardingDDLCoordinatorService::getService(opCtx)
+ ->waitForOngoingCoordinatorsToFinish(opCtx);
+ }
_dropInternalGlobalIndexesCollection(opCtx, requestedVersion);
} else {
return;
@@ -731,14 +762,14 @@ private:
// TODO SERVER-67392: Remove when 7.0 branches-out.
// Coordinators that commits indexes to the csrs must be drained before this point. Older
// FCV's must not find cluster-wide indexes.
- if (requestedVersion == GenericFCV::kLastLTS) {
+ if (!feature_flags::gGlobalIndexesShardingCatalog.isEnabledOnVersion(requestedVersion)) {
NamespaceString indexCatalogNss;
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
indexCatalogNss = NamespaceString::kConfigsvrIndexCatalogNamespace;
} else {
indexCatalogNss = NamespaceString::kShardIndexCatalogNamespace;
}
- LOGV2(6280502, "Droping global indexes collection", "nss"_attr = indexCatalogNss);
+ LOGV2(6280502, "Dropping global indexes collection", "nss"_attr = indexCatalogNss);
DropReply dropReply;
const auto deletionStatus =
dropCollection(opCtx,
@@ -785,6 +816,16 @@ private:
}
}
+ // _runDowngrade performs all the downgrade specific code for setFCV. Any new feature specific
+ // downgrade code should be placed in the _runDowngrade helper functions:
+ // * _prepareForDowngrade: Any downgrade actions that should be done before taking the FCV full
+ // transition lock in S mode should go in this function.
+ // * _userCollectionsUassertsForDowngrade: for any checks on user data or settings that will
+ // uassert
+ // if users need to manually clean up user data or settings.
+ // * _internalServerCleanupForDowngrade: for any internal server downgrade cleanup
+ // Please read the comments on those helper functions for more details on what should be placed
+ // in each function.
void _runDowngrade(OperationContext* opCtx,
const SetFeatureCompatibilityVersion& request,
boost::optional<Timestamp> changeTimestamp) {
@@ -838,7 +879,7 @@ private:
// InterruptedDueToReplStateChange) or ErrorCode::CannotDowngrade. The uasserts added to
// this helper function can only have the CannotDowngrade error code indicating that the
// user must manually clean up some user data in order to retry the FCV downgrade.
- _uassertUserDataAndSettingsReadyForDowngrade(opCtx, requestedVersion);
+ _userCollectionsUassertsForDowngrade(opCtx, requestedVersion);
// This helper function is for any internal server downgrade cleanup, such as dropping
// collections or aborting. This cleanup will happen after user collection downgrade
@@ -854,13 +895,12 @@ private:
// bug and that the data is corrupted). ManualInterventionRequired and fasserts are errors
// that are not expected to occur in practice, but if they did, they would turn into a
// Support case.
- _internalServerDowngradeCleanup(opCtx, requestedVersion);
+ _internalServerCleanupForDowngrade(opCtx, requestedVersion);
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
// Tell the shards to enter phase-2 of setFCV (fully downgraded)
_sendSetFCVRequestToShards(opCtx, request, changeTimestamp, SetFCVPhaseEnum::kComplete);
-
- if (requestedVersion == GenericFCV::kLastLTS) {
+ if (!feature_flags::gNoMoreAutoSplitter.isEnabledOnVersion(requestedVersion)) {
// chunkResizeAsyncTask is only used by config servers as part of internal server
// downgrade cleanup. Waiting for the task to complete is put at the end of
// _runDowngrade instead of inside _internalServerDowngradeCleanup because the task