diff options
author | Benety Goh <benety@mongodb.com> | 2022-08-08 21:18:34 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-08 21:40:35 +0000 |
commit | abdedd367e2f331816354563f8ae95f6bb35c51d (patch) | |
tree | 9113059752235094cee6a1ac91338d835fbb195b | |
parent | 9fbd63a46952ffc8d2a86c312c203cc3643fd842 (diff) | |
download | mongo-abdedd367e2f331816354563f8ae95f6bb35c51d.tar.gz |
SERVER-68522 Prevent 5.0 binary from starting in FCV 4.4 with misconfigured TTL indexr5.0.11-rc0
6 files changed, 264 insertions, 0 deletions
diff --git a/jstests/multiVersion/ttl_expire_nan_initial_sync.js b/jstests/multiVersion/ttl_expire_nan_initial_sync.js new file mode 100644 index 00000000000..78ff4295cd1 --- /dev/null +++ b/jstests/multiVersion/ttl_expire_nan_initial_sync.js @@ -0,0 +1,47 @@ +/** + * Tests that adding a node running a 5.0+ binary to an existing 4.4 cluster containing + * a TTL index with NaN for 'expireAfterSeconds' will trigger a fassert on startup. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load('jstests/noPassthrough/libs/index_build.js'); + +const rst = new ReplSetTest({ + nodes: [{binVersion: 'last-lts'}], +}); +rst.startSet(); +rst.initiate(); + +let primary = rst.getPrimary(); +const db = primary.getDB('test'); +const coll = db.t; + +assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); +assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); + +const newNode = rst.add({ + binVersion: 'latest', + rsConfig: {votes: 0, priority: 0}, + setParameter: {numInitialSyncAttempts: 1}, +}); +rst.reInitiate(); + +// Confirm that we are unable to use a 5.0+ server binary to join the replica set. +assert.soon(() => { + return rawMongoProgramOutput().search(/Fatal assertion/) >= 0; +}); +rst.stop(newNode, /*signal=*/undefined, {allowedExitCode: MongoRunner.EXIT_ABRUPT}); + +// Failed startup logs should contain details on the invalid TTL index. +assert.gte( + rawMongoProgramOutput().search( + /Fatal assertion.*40088.*CannotCreateIndex.*t_1.*TTL indexes cannot have NaN 'expireAfterSeconds'/), + 0); + +rst.stopSet(); +})(); diff --git a/jstests/multiVersion/ttl_expire_nan_mixed_cluster.js b/jstests/multiVersion/ttl_expire_nan_mixed_cluster.js new file mode 100644 index 00000000000..2cd07a9fee7 --- /dev/null +++ b/jstests/multiVersion/ttl_expire_nan_mixed_cluster.js @@ -0,0 +1,40 @@ +/** + * Tests that a mixed cluster in FCV 4.4 containing a TTL index with NaN for 'expireAfterSeconds' + * will fail to replicate the TTL index to a secondary running a 5.0+ binary. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load('jstests/noPassthrough/libs/index_build.js'); + +const rst = new ReplSetTest({ + nodes: [{binVersion: 'last-lts'}, {binVersion: 'latest', rsConfig: {votes: 0, priority: 0}}], +}); +rst.startSet(); +rst.initiate(); + +let primary = rst.getPrimary(); +const db = primary.getDB('test'); +const coll = db.t; + +assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); +assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); + +// The secondary should fail to create the TTL index with NaN for 'expireAfterSeconds' during +// oplog application shut down with a fatal assertion. +assert.soon(() => { + return rawMongoProgramOutput().search(/Fatal assertion/) >= 0; +}); +const secondary = rst.getSecondary(); +rst.stop(secondary, /*signal=*/undefined, {allowedExitCode: MongoRunner.EXIT_ABRUPT}); +assert.gte( + rawMongoProgramOutput().search( + /Fatal assertion.*34437.*CannotCreateIndex.*t_1.*TTL indexes cannot have NaN 'expireAfterSeconds'/), + 0); + +rst.stopSet(); +})(); diff --git a/jstests/multiVersion/ttl_expire_nan_upgrade_fails_on_startup.js b/jstests/multiVersion/ttl_expire_nan_upgrade_fails_on_startup.js new file mode 100644 index 00000000000..7c99a8a3065 --- /dev/null +++ b/jstests/multiVersion/ttl_expire_nan_upgrade_fails_on_startup.js @@ -0,0 +1,56 @@ +/** + * Tests that upgrading a server containing a TTL index with NaN for 'expireAfterSeconds' + * will trigger a fassert on startup. + * + * @tags: [ + * requires_persistence, + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load('jstests/noPassthrough/libs/index_build.js'); + +const rst = new ReplSetTest({ + nodes: [{binVersion: 'last-lts'}, {binVersion: 'last-lts', rsConfig: {votes: 0, priority: 0}}], +}); +rst.startSet(); +rst.initiate(); + +let primary = rst.getPrimary(); +const db = primary.getDB('test'); +const coll = db.t; + +assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); +assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); + +// Force checkpoint in storage engine to ensure index is part of the catalog in +// in finished state at startup. +rst.awaitReplication(); +const secondary = rst.getSecondary(); +assert.commandWorked(secondary.adminCommand({fsync: 1})); + +// Restart the secondary with a 5.0+ binary. Since the node is not expected to complete its +// startup procedure, we wait for the fassert to show up in the logs before using ReplSetTest.stop() +// to check the process exit code. +rst.restart( + secondary, {binVersion: 'latest', waitForConnect: false}, /*signal=*/undefined, /*wait=*/false); +assert.soon(() => { + return rawMongoProgramOutput().search(/Fatal assertion/) >= 0; +}); +rst.stop(secondary, /*signal=*/undefined, {allowedExitCode: MongoRunner.EXIT_ABORT}); + +// Failed startup logs should contain details on the invalid TTL index. +let logs = rawMongoProgramOutput(); +assert.gte(logs.search( + /6852200.*Found an existing TTL index with NaN 'expireAfterSeconds' in the catalog/), + 0); +assert.gte( + logs.search( + /6852201.*TTL indexes with NaN 'expireAfterSeconds' are not supported under FCV 4.4/), + 0); +assert.gte(logs.search(/Fatal assertion.*6852202/), 0); + +rst.stopSet(); +})(); diff --git a/jstests/noPassthrough/ttl_expire_nan_downgrade.js b/jstests/noPassthrough/ttl_expire_nan_downgrade.js new file mode 100644 index 00000000000..62b5895a773 --- /dev/null +++ b/jstests/noPassthrough/ttl_expire_nan_downgrade.js @@ -0,0 +1,36 @@ +/** + * Tests that the cluster cannot be downgraded when there are TTL indexes with + * NaN for 'expireAfterSeconds'. + * + * @tags: [ + * requires_fcv_50, + * requires_replication, + * ] + */ +(function() { +'use strict'; + +const rst = new ReplSetTest({ + nodes: [{}, {rsConfig: {votes: 0, priority: 0}}], +}); +rst.startSet(); +rst.initiate(); + +let primary = rst.getPrimary(); +const db = primary.getDB('test'); +const coll = db.t; + +assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); +assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); + +assert.commandFailedWithCode(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}), + ErrorCodes.CannotDowngrade); + +assert.commandWorked( + db.runCommand({collMod: coll.getName(), index: {name: 't_1', expireAfterSeconds: 60}})); + +assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV})); +assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV})); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 371680f0af8..bd9025afb10 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -128,6 +128,33 @@ Status IndexCatalogImpl::init(OperationContext* opCtx, Collection* collection) { } auto descriptor = std::make_unique<IndexDescriptor>(_getAccessMethodName(keyPattern), spec); + // TTL indexes with NaN 'expireAfterSeconds' cause problems in multiversion settings. + if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName)) { + if (spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()) { + LOGV2_OPTIONS(6852200, + {logv2::LogTag::kStartupWarnings}, + "Found an existing TTL index with NaN 'expireAfterSeconds' in the " + "catalog.", + "ns"_attr = collection->ns(), + "uuid"_attr = collection->uuid(), + "index"_attr = indexName, + "spec"_attr = spec); + using FCV = ServerGlobalParams::FeatureCompatibility; + const auto& fcv = serverGlobalParams.featureCompatibility; + if (fcv.isVersionInitialized() && + fcv.isLessThanOrEqualTo(FCV::Version::kFullyDowngradedTo44)) { + LOGV2_ERROR(6852201, + "TTL indexes with NaN 'expireAfterSeconds' are not supported " + "under FCV 4.4 on a 5.0+ binary.", + "ns"_attr = collection->ns(), + "uuid"_attr = collection->uuid(), + "index"_attr = indexName, + "spec"_attr = spec); + fassertFailed(6852202); + } + } + } + // TTL indexes are not compatible with capped collections. if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName) && !collection->isCapped()) { @@ -775,6 +802,20 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx, } } + // TTL indexes with NaN 'expireAfterSeconds' cause problems in multiversion settings. + if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName)) { + if (spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()) { + using FCV = ServerGlobalParams::FeatureCompatibility; + const auto& fcv = serverGlobalParams.featureCompatibility; + if (fcv.isVersionInitialized() && + fcv.isLessThanOrEqualTo(FCV::Version::kFullyDowngradedTo44)) { + return Status(ErrorCodes::CannotCreateIndex, + "TTL indexes cannot have NaN 'expireAfterSeconds' under FCV 4.4 " + "on a 5.0+ binary."); + } + } + } + // --- only storage engine checks allowed below this ---- BSONElement storageEngineElement = spec.getField("storageEngine"); diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index 31cce141b6e..aedd869061c 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -35,6 +35,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/catalog/coll_mod.h" +#include "mongo/db/catalog/collection_catalog_helper.h" #include "mongo/db/catalog/database.h" #include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/drop_collection.h" @@ -677,6 +678,48 @@ private: } } + /** + * TTL indexes with NaN 'expireAfterSeconds' are only supported in 5.0. If the user tries to + * downgrade the cluster to an earlier version, they must first remove all TTL indexes with + * NaN 'expireAfterSeconds'. + */ + void _disallowTTLIndexesWithNaNExpireAfterSecondsOnDowngrade(OperationContext* opCtx) { + auto collCatalog = CollectionCatalog::get(opCtx); + for (const auto& db : collCatalog->getAllDbNames()) { + for (auto collIt = collCatalog->begin(opCtx, db); collIt != collCatalog->end(opCtx); + ++collIt) { + NamespaceStringOrUUID collName( + collCatalog->lookupNSSByUUID(opCtx, collIt.uuid().get()).get()); + AutoGetCollectionForRead coll(opCtx, collName); + if (!coll) { + continue; + } + + auto idxCatalog = coll->getIndexCatalog(); + auto iter = idxCatalog->getIndexIterator(opCtx, /*includeUnfinished=*/true); + while (iter->more()) { + opCtx->checkForInterrupt(); + auto entry = iter->next(); + auto desc = entry->descriptor(); + const auto& spec = desc->infoObj(); + if (!spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName)) { + continue; + } + uassert( + ErrorCodes::CannotDowngrade, + fmt::format("Cannot downgrade the cluster when there are TTL indexes with " + "NaN 'expireAfterSeconds' in the catalog; drop all TTL indexes " + "with NaN 'expireAfterSeconds' before downgrading. " + "First detected index: namespace: {}, UUID: {}, index: {}", + coll->ns().toString(), + coll->uuid().toString(), + spec.toString()), + !spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()); + } + } + } + } + void _runDowngrade(OperationContext* opCtx, const SetFeatureCompatibilityVersion& request, boost::optional<Timestamp> changeTimestamp) { @@ -719,6 +762,7 @@ private: }); } + _disallowTTLIndexesWithNaNExpireAfterSecondsOnDowngrade(opCtx); // TODO (SERVER-56171): Remove once 5.0 is last-lts. removeTimeseriesEntriesFromConfigTransactions(opCtx); |