diff options
author | Matthew Saltz <matthew.saltz@mongodb.com> | 2022-10-17 15:12:30 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-17 16:57:31 +0000 |
commit | 16344f9a8ea2264190454a9c880ff8b08730b8ac (patch) | |
tree | 396d588c7816e3950daee0b7c58a952caf88098f | |
parent | c73958b4f0ba6180f920105d77af0a51ebd456e2 (diff) | |
download | mongo-16344f9a8ea2264190454a9c880ff8b08730b8ac.tar.gz |
SERVER-69091 Handle invalid TTL expireAfterSeconds values uniformly
-rw-r--r-- | jstests/noPassthrough/ttl_expire_nan.js | 120 | ||||
-rw-r--r-- | jstests/noPassthrough/ttl_expire_nan_warning_on_startup.js | 5 | ||||
-rw-r--r-- | jstests/noPassthrough/ttl_invalid_expire_after_secs.js | 133 | ||||
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/catalog/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/catalog/coll_mod_index.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_build_block.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_catalog_impl.cpp | 47 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_key_validate.cpp | 72 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_key_validate.h | 2 | ||||
-rw-r--r-- | src/mongo/db/commands/create_indexes_cmd.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/ttl.cpp | 38 | ||||
-rw-r--r-- | src/mongo/db/ttl_collection_cache.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/ttl_collection_cache.h | 18 | ||||
-rw-r--r-- | src/mongo/db/ttl_collection_cache_test.cpp | 16 |
15 files changed, 287 insertions, 215 deletions
diff --git a/jstests/noPassthrough/ttl_expire_nan.js b/jstests/noPassthrough/ttl_expire_nan.js deleted file mode 100644 index 8e07b9a1c2b..00000000000 --- a/jstests/noPassthrough/ttl_expire_nan.js +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Tests TTL indexes with NaN for 'expireAfterSeconds'. - * - * Existing TTL indexes from older versions of the server may contain a NaN for the duration. - * Newer server versions (5.0+) normalize the TTL duration to 0. - * - * @tags: [ - * requires_replication, - * ] - */ -(function() { -'use strict'; - -load("jstests/libs/fail_point_util.js"); -load('jstests/noPassthrough/libs/index_build.js'); - -const rst = new ReplSetTest({ - nodes: [{}, {rsConfig: {votes: 0, priority: 0}}], - nodeOptions: {setParameter: {ttlMonitorSleepSecs: 5}}, - // Sync from primary only so that we have a well-defined node to check listIndexes behavior. - settings: {chainingAllowed: false}, -}); -rst.startSet(); -rst.initiate(); - -let primary = rst.getPrimary(); -const db = primary.getDB('test'); -const coll = db.t; - -// The test cases here revolve around having a TTL index in the catalog with a NaN -// 'expireAfterSeconds'. The current createIndexes behavior will overwrite NaN with int32::max -// unless we use a fail point. -const fp = configureFailPoint(primary, 'skipTTLIndexNaNExpireAfterSecondsValidation'); -try { - assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); -} finally { - fp.off(); -} - -assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); - -// Wait for "TTL indexes require the expire field to be numeric, skipping TTL job" log message. -checkLog.containsJson(primary, 22542, {ns: coll.getFullName()}); - -// TTL index should be replicated to the secondary with a NaN 'expireAfterSeconds'. -const secondary = rst.getSecondary(); -checkLog.containsJson(secondary, 20384, { - namespace: coll.getFullName(), - properties: (spec) => { - jsTestLog('TTL index on secondary: ' + tojson(spec)); - return isNaN(spec.expireAfterSeconds); - } -}); - -assert.eq( - coll.countDocuments({}), 1, 'ttl index with NaN duration should not remove any documents.'); - -// Confirm that TTL index is replicated with a non-zero 'expireAfterSeconds' during initial sync. -const newNode = rst.add({rsConfig: {votes: 0, priority: 0}}); -rst.reInitiate(); -rst.waitForState(newNode, ReplSetTest.State.SECONDARY); -rst.awaitReplication(); -let newNodeTestDB = newNode.getDB(db.getName()); -let newNodeColl = newNodeTestDB.getCollection(coll.getName()); -const newNodeIndexes = IndexBuildTest.assertIndexes(newNodeColl, 2, ['_id_', 't_1']); -const newNodeSpec = newNodeIndexes.t_1; -jsTestLog('TTL index on initial sync node: ' + tojson(newNodeSpec)); -assert(newNodeSpec.hasOwnProperty('expireAfterSeconds'), - 'Index was not replicated as a TTL index during initial sync.'); -assert.gt(newNodeSpec.expireAfterSeconds, - 0, - 'NaN expireAferSeconds was replicated as zero during initial sync.'); - -// Check that listIndexes on the primary logged a "Fixing expire field from TTL index spec" message -// during the NaN 'expireAfterSeconds' conversion. -checkLog.containsJson(primary, 6835900, {namespace: coll.getFullName()}); - -// Confirm that a node with an existing TTL index with NaN 'expireAfterSeconds' will convert the -// duration on the TTL index from NaN to a large positive value when it becomes the primary node. -// When stepping down the primary, we use 'force' because there's no other electable node. -// Subsequently, we wait for the stepped down node to become primary again. -// To confirm that the TTL index has been fixed, we check the oplog for a collMod operation on the -// TTL index that changes the `expireAfterSeconds` field from NaN to a large positive value. -assert.commandWorked(primary.adminCommand({replSetStepDown: 5, force: true})); -primary = rst.waitForPrimary(); -const collModOplogEntries = - rst.findOplog(primary, - { - op: 'c', - ns: coll.getDB().getCollection('$cmd').getFullName(), - 'o.collMod': coll.getName(), - 'o.index.name': 't_1', - 'o.index.expireAfterSeconds': newNodeSpec.expireAfterSeconds - }, - /*limit=*/1) - .toArray(); -assert.eq(collModOplogEntries.length, - 1, - 'TTL index with NaN expireAfterSeconds was not fixed using collMod during step-up: ' + - tojson(rst.findOplog(primary, {op: {$ne: 'n'}}, /*limit=*/10).toArray())); - -// Confirm that createIndexes will overwrite a NaN 'expireAfterSeconds' in a TTL index before saving -// it to the catalog and replicating it downstream. -const coll2 = db.w; -assert.commandWorked(coll2.createIndex({t: 1}, {expireAfterSeconds: NaN})); -assert.commandWorked(coll2.insert({_id: 0, t: ISODate()})); - -// TTL index should be replicated to the secondary with a non-NaN 'expireAfterSeconds'. -checkLog.containsJson(secondary, 20384, { - namespace: coll2.getFullName(), - properties: (spec) => { - jsTestLog('TTL index on secondary (with overwritten NaN expireAfterSeconds): ' + - tojson(spec)); - return spec.hasOwnProperty('expireAfterSeconds') && !isNaN(spec.expireAfterSeconds) && - spec.expireAfterSeconds === newNodeSpec.expireAfterSeconds; - } -}); - -rst.stopSet(); -})(); diff --git a/jstests/noPassthrough/ttl_expire_nan_warning_on_startup.js b/jstests/noPassthrough/ttl_expire_nan_warning_on_startup.js index a285adc69a5..bf97a4a2913 100644 --- a/jstests/noPassthrough/ttl_expire_nan_warning_on_startup.js +++ b/jstests/noPassthrough/ttl_expire_nan_warning_on_startup.js @@ -24,11 +24,14 @@ const coll = db.t; // The test cases here revolve around having a TTL index in the catalog with a NaN // 'expireAfterSeconds'. The current createIndexes behavior will overwrite NaN with int32::max // unless we use a fail point. -const fp = configureFailPoint(primary, 'skipTTLIndexNaNExpireAfterSecondsValidation'); +const fp = configureFailPoint(primary, 'skipTTLIndexValidationOnCreateIndex'); +const fp2 = + configureFailPoint(primary, 'skipTTLIndexInvalidExpireAfterSecondsValidationForCreateIndex'); try { assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: NaN})); } finally { fp.off(); + fp2.off(); } assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); diff --git a/jstests/noPassthrough/ttl_invalid_expire_after_secs.js b/jstests/noPassthrough/ttl_invalid_expire_after_secs.js new file mode 100644 index 00000000000..32c582a3e98 --- /dev/null +++ b/jstests/noPassthrough/ttl_invalid_expire_after_secs.js @@ -0,0 +1,133 @@ +/** + * Tests TTL indexes with invalid values for 'expireAfterSeconds'. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load('jstests/noPassthrough/libs/index_build.js'); + +function test(expireAfterSecondsVal) { + jsTestLog("Testing expireAfterSeconds = " + expireAfterSecondsVal); + + const rst = new ReplSetTest({ + nodes: [{}, {rsConfig: {votes: 0, priority: 0}}], + nodeOptions: {setParameter: {ttlMonitorSleepSecs: 5}}, + // Sync from primary only so that we have a well-defined node to check listIndexes behavior. + settings: {chainingAllowed: false}, + }); + rst.startSet(); + rst.initiate(); + + let primary = rst.getPrimary(); + const db = primary.getDB('test'); + const coll = db.t; + + // The test cases here revolve around having a TTL index in the catalog with an invalid + // 'expireAfterSeconds'. The current createIndexes behavior will reject index creation for + // invalid values of expireAfterSeconds, so we use a failpoint to disable that checking to + // simulate a value leftover from very old MongoDB versions. + const fp = configureFailPoint(primary, 'skipTTLIndexValidationOnCreateIndex'); + const fp2 = configureFailPoint(primary, + 'skipTTLIndexInvalidExpireAfterSecondsValidationForCreateIndex'); + try { + assert.commandWorked(coll.createIndex({t: 1}, {expireAfterSeconds: expireAfterSecondsVal})); + } finally { + fp.off(); + fp2.off(); + } + + assert.commandWorked(coll.insert({_id: 0, t: ISODate()})); + + // Log the contents of the catalog for debugging purposes in case of failure. + let catalogContents = coll.aggregate([{$listCatalog: {}}]).toArray(); + jsTestLog("Catalog contents: " + tojson(catalogContents)); + + // Wait for "Skipping TTL job due to invalid index spec" log message. + checkLog.containsJson(primary, 6909100, {ns: coll.getFullName()}); + + // TTL index should be replicated to the secondary with an invalid 'expireAfterSeconds'. + const secondary = rst.getSecondary(); + checkLog.containsJson(secondary, 20384, { + namespace: coll.getFullName(), + properties: (spec) => { + jsTestLog('TTL index on secondary: ' + tojson(spec)); + // NaN does not equal NaN, so we have to use the isNaN function for this check. + if (isNaN(expireAfterSecondsVal)) + return isNaN(spec.expireAfterSeconds); + else + return spec.expireAfterSeconds == expireAfterSecondsVal; + } + }); + + assert.eq(coll.countDocuments({}), + 1, + 'ttl index with invalid expireAfterSeconds should not remove any documents.'); + + // Confirm that TTL index is replicated with a non-zero 'expireAfterSeconds' during initial + // sync. + const newNode = rst.add({rsConfig: {votes: 0, priority: 0}}); + rst.reInitiate(); + rst.waitForState(newNode, ReplSetTest.State.SECONDARY); + rst.awaitReplication(); + let newNodeTestDB = newNode.getDB(db.getName()); + let newNodeColl = newNodeTestDB.getCollection(coll.getName()); + const newNodeIndexes = IndexBuildTest.assertIndexes(newNodeColl, 2, ['_id_', 't_1']); + const newNodeSpec = newNodeIndexes.t_1; + jsTestLog('TTL index on initial sync node: ' + tojson(newNodeSpec)); + assert(newNodeSpec.hasOwnProperty('expireAfterSeconds'), + 'Index was not replicated as a TTL index during initial sync.'); + assert.eq( + newNodeSpec.expireAfterSeconds, + 2147483647, // This is the "disabled" value for expireAfterSeconds + expireAfterSecondsVal + + ' expireAferSeconds was replicated as something other than disabled during initial sync.'); + + // Check that listIndexes on the primary logged a "Fixing expire field from TTL index spec" + // message during the invalid 'expireAfterSeconds' conversion. + checkLog.containsJson(primary, 6835900, {namespace: coll.getFullName()}); + + // Confirm that a node with an existing TTL index with an invalid 'expireAfterSeconds' will + // convert the duration on the TTL index from the invalid value to a large positive value when + // it becomes the primary node. When stepping down the primary, we use 'force' because there's + // no other electable node. Subsequently, we wait for the stepped down node to become primary + // again. To confirm that the TTL index has been fixed, we check the oplog for a collMod + // operation on the TTL index that changes the `expireAfterSeconds` field from the invalid value + // to a large positive value. + assert.commandWorked(primary.adminCommand({replSetStepDown: 5, force: true})); + primary = rst.waitForPrimary(); + + // Log the contents of the catalog for debugging purposes in case of failure. + let newPrimaryColl = primary.getDB(db.getName()).getCollection(coll.getName()); + const newPrimaryCatalogContents = newPrimaryColl.aggregate([{$listCatalog: {}}]).toArray(); + jsTestLog("Catalog contents on new primary: " + tojson(newPrimaryCatalogContents)); + + const collModOplogEntries = + rst.findOplog(primary, + { + op: 'c', + ns: newPrimaryColl.getDB().getCollection('$cmd').getFullName(), + 'o.collMod': coll.getName(), + 'o.index.name': 't_1', + 'o.index.expireAfterSeconds': newNodeSpec.expireAfterSeconds + }, + /*limit=*/1) + .toArray(); + assert.eq(collModOplogEntries.length, + 1, + 'TTL index with ' + expireAfterSecondsVal + + ' expireAfterSeconds was not fixed using collMod during step-up: ' + + tojson(rst.findOplog(primary, {op: {$ne: 'n'}}, /*limit=*/10).toArray())); + + rst.stopSet(); +} + +test(NaN); +const maxDouble = 1.7976931348623157e+308; +test(maxDouble); +test(-maxDouble); +})(); diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 9110fc06a56..0b46b67c425 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1322,6 +1322,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ 'catalog/catalog_helpers', + 'catalog/index_key_validate', 'commands/fsync_locked', 'commands/server_status_core', 'ops/write_ops', diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index 27a82a4039d..f8ba8c26dfa 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -208,6 +208,7 @@ env.Library( '$BUILD_DIR/mongo/db/vector_clock', 'collection_catalog', 'collection_query_info', + 'index_key_validate', ], ) diff --git a/src/mongo/db/catalog/coll_mod_index.cpp b/src/mongo/db/catalog/coll_mod_index.cpp index a7f0097052d..a4748228d3c 100644 --- a/src/mongo/db/catalog/coll_mod_index.cpp +++ b/src/mongo/db/catalog/coll_mod_index.cpp @@ -31,6 +31,7 @@ #include "mongo/db/catalog/coll_mod_index.h" #include "mongo/db/catalog/cannot_convert_index_to_unique_info.h" +#include "mongo/db/catalog/index_key_validate.h" #include "mongo/db/catalog/throttle_cursor.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/storage/index_entry_comparison.h" @@ -69,10 +70,12 @@ void _processCollModIndexRequestExpireAfterSeconds(OperationContext* opCtx, // Do not refer to 'idx' within this commit handler as it may be be invalidated by // IndexCatalog::refreshEntry(). opCtx->recoveryUnit()->onCommit( - [ttlCache, uuid = coll->uuid(), indexName = idx->indexName(), indexExpireAfterSeconds]( - auto _) { + [ttlCache, uuid = coll->uuid(), indexName = idx->indexName()](auto _) { + // We assume the expireAfterSeconds field is valid, because we've already done + // validation of this field. ttlCache->registerTTLInfo( - uuid, TTLCollectionCache::Info{indexName, /*isExpireAfterSecondsNaN=*/false}); + uuid, + TTLCollectionCache::Info{indexName, /*isExpireAfterSecondsInvalid=*/false}); }); // Change the value of "expireAfterSeconds" on disk. @@ -81,9 +84,12 @@ void _processCollModIndexRequestExpireAfterSeconds(OperationContext* opCtx, return; } - // If the current `expireAfterSeconds` is NaN, it can never be equal to + // If the current `expireAfterSeconds` is invalid, it can never be equal to // 'indexExpireAfterSeconds'. - if (oldExpireSecsElement.isNaN()) { + if (auto status = index_key_validate::validateExpireAfterSeconds( + oldExpireSecsElement, + index_key_validate::ValidateExpireAfterSecondsMode::kSecondaryTTLIndex); + !status.isOK()) { // Setting *oldExpireSecs is mostly for informational purposes. // We could also use index_key_validate::kExpireAfterSecondsForInactiveTTLIndex but // 0 is more consistent with the previous safeNumberLong() behavior and avoids potential @@ -99,8 +105,9 @@ void _processCollModIndexRequestExpireAfterSeconds(OperationContext* opCtx, auto ttlCache = &TTLCollectionCache::get(opCtx->getServiceContext()); const auto& coll = autoColl->getCollection(); opCtx->recoveryUnit()->onCommit( - [ttlCache, uuid = coll->uuid(), indexName = idx->indexName(), indexExpireAfterSeconds]( - auto _) { ttlCache->unsetTTLIndexExpireAfterSecondsNaN(uuid, indexName); }); + [ttlCache, uuid = coll->uuid(), indexName = idx->indexName()](auto _) { + ttlCache->unsetTTLIndexExpireAfterSecondsInvalid(uuid, indexName); + }); return; } diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp index 375d7e59e31..bc8baba0dda 100644 --- a/src/mongo/db/catalog/index_build_block.cpp +++ b/src/mongo/db/catalog/index_build_block.cpp @@ -36,6 +36,7 @@ #include "mongo/db/audit.h" #include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/index_key_validate.h" #include "mongo/db/catalog_raii.h" #include "mongo/db/db_raii.h" #include "mongo/db/index/index_descriptor.h" @@ -290,14 +291,15 @@ void IndexBuildBlock::success(OperationContext* opCtx, Collection* collection) { } // Add the index to the TTLCollectionCache upon successfully committing the index build. - // TTL indexes are not compatible with capped collections. - // Note that TTL deletion is supported on capped clustered collections via bounded - // collection scan, which does not use an index. + // TTL indexes are not compatible with capped collections. Note that TTL deletion is + // supported on capped clustered collections via bounded collection scan, which does not + // use an index. if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName) && !coll->isCapped()) { + auto validateStatus = index_key_validate::validateExpireAfterSeconds( + spec[IndexDescriptor::kExpireAfterSecondsFieldName], + index_key_validate::ValidateExpireAfterSecondsMode::kSecondaryTTLIndex); TTLCollectionCache::get(svcCtx).registerTTLInfo( - coll->uuid(), - TTLCollectionCache::Info{ - indexName, spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()}); + coll->uuid(), TTLCollectionCache::Info{indexName, !validateStatus.isOK()}); } }); } diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 15b83e6f336..a57f0348ae8 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -243,30 +243,35 @@ Status IndexCatalogImpl::_init(OperationContext* opCtx, auto descriptor = std::make_unique<IndexDescriptor>(_getAccessMethodName(keyPattern), spec); - // TTL indexes with NaN 'expireAfterSeconds' cause problems in multiversion settings. if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName)) { - if (spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()) { - LOGV2_OPTIONS(6852200, - {logv2::LogTag::kStartupWarnings}, - "Found an existing TTL index with NaN 'expireAfterSeconds' in the " - "catalog.", - "ns"_attr = collection->ns(), - "uuid"_attr = collection->uuid(), - "index"_attr = indexName, - "spec"_attr = spec); + // TTL indexes with an invalid 'expireAfterSeconds' field cause problems in multiversion + // settings. + auto hasInvalidExpireAfterSeconds = + !index_key_validate::validateExpireAfterSeconds( + spec[IndexDescriptor::kExpireAfterSecondsFieldName], + index_key_validate::ValidateExpireAfterSecondsMode::kSecondaryTTLIndex) + .isOK(); + if (hasInvalidExpireAfterSeconds) { + LOGV2_OPTIONS( + 6852200, + {logv2::LogTag::kStartupWarnings}, + "Found an existing TTL index with invalid 'expireAfterSeconds' in the " + "catalog.", + "ns"_attr = collection->ns(), + "uuid"_attr = collection->uuid(), + "index"_attr = indexName, + "spec"_attr = spec); } - } - // TTL indexes are not compatible with capped collections. - // Note that TTL deletion is supported on capped clustered collections via bounded - // collection scan, which does not use an index. - if (spec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName) && - !collection->isCapped()) { - TTLCollectionCache::get(opCtx->getServiceContext()) - .registerTTLInfo( - collection->uuid(), - TTLCollectionCache::Info{ - indexName, spec[IndexDescriptor::kExpireAfterSecondsFieldName].isNaN()}); + // TTL indexes are not compatible with capped collections. + // Note that TTL deletion is supported on capped clustered collections via bounded + // collection scan, which does not use an index. + if (!collection->isCapped()) { + TTLCollectionCache::get(opCtx->getServiceContext()) + .registerTTLInfo( + collection->uuid(), + TTLCollectionCache::Info{indexName, hasInvalidExpireAfterSeconds}); + } } bool ready = collection->isIndexReady(indexName); diff --git a/src/mongo/db/catalog/index_key_validate.cpp b/src/mongo/db/catalog/index_key_validate.cpp index e814ceb3141..1819871df3d 100644 --- a/src/mongo/db/catalog/index_key_validate.cpp +++ b/src/mongo/db/catalog/index_key_validate.cpp @@ -71,9 +71,9 @@ namespace { // specification. MONGO_FAIL_POINT_DEFINE(skipIndexCreateFieldNameValidation); -// When the skipTTLIndexNaNExpireAfterSecondsValidation failpoint is enabled, validation for -// TTL index 'expireAfterSeconds' will be disabled. -MONGO_FAIL_POINT_DEFINE(skipTTLIndexNaNExpireAfterSecondsValidation); +// When the skipTTLIndexInvalidExpireAfterSecondsValidationForCreateIndex failpoint is enabled, +// validation for TTL index 'expireAfterSeconds' will be disabled in certain codepaths. +MONGO_FAIL_POINT_DEFINE(skipTTLIndexInvalidExpireAfterSecondsValidationForCreateIndex); static const std::set<StringData> allowedIdIndexFieldNames = { IndexDescriptor::kCollationFieldName, @@ -290,7 +290,9 @@ BSONObj repairIndexSpec(const NamespaceString& ns, "indexSpec"_attr = redact(indexSpec)); builder->appendBool(fieldName, true); } else if (IndexDescriptor::kExpireAfterSecondsFieldName == fieldName && - !(indexSpecElem.isNumber() && !indexSpecElem.isNaN())) { + !validateExpireAfterSeconds(indexSpecElem, + ValidateExpireAfterSecondsMode::kSecondaryTTLIndex) + .isOK()) { LOGV2_WARNING(6835900, "Fixing expire field from TTL index spec", "namespace"_attr = redact(ns.toString()), @@ -310,7 +312,7 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in bool hasKeyPatternField = false; bool hasIndexNameField = false; bool hasNamespaceField = false; - bool isTTLIndexWithNaNExpireAfterSeconds = false; + bool isTTLIndexWithInvalidExpireAfterSeconds = false; bool hasVersionField = false; bool hasCollationField = false; bool hasWeightsField = false; @@ -569,8 +571,12 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in str::stream() << "The field '" << indexSpecElemFieldName << "' must be a number, but got " << typeName(indexSpecElem.type())}; - } else if (IndexDescriptor::kExpireAfterSecondsFieldName == indexSpecElemFieldName) { - isTTLIndexWithNaNExpireAfterSeconds = indexSpecElem.isNaN(); + } else if (IndexDescriptor::kExpireAfterSecondsFieldName == indexSpecElemFieldName && + !validateExpireAfterSeconds(indexSpecElem, + ValidateExpireAfterSecondsMode::kSecondaryTTLIndex) + .isOK() && + !skipTTLIndexInvalidExpireAfterSecondsValidationForCreateIndex.shouldFail()) { + isTTLIndexWithInvalidExpireAfterSeconds = true; } else { // We can assume field name is valid at this point. Validation of fieldname is handled // prior to this in validateIndexSpecFieldNames(). @@ -642,10 +648,9 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in modifiedSpec = modifiedSpec.removeField(IndexDescriptor::kNamespaceFieldName); } - if (isTTLIndexWithNaNExpireAfterSeconds && - !skipTTLIndexNaNExpireAfterSecondsValidation.shouldFail()) { + if (isTTLIndexWithInvalidExpireAfterSeconds) { // We create a new index specification with the 'expireAfterSeconds' field set as - // kExpireAfterSecondsForInactiveTTLIndex if the current value is NaN. A similar + // kExpireAfterSecondsForInactiveTTLIndex if the current value is invalid. A similar // treatment is done in repairIndexSpec(). This rewrites the 'expireAfterSeconds' // value to be compliant with the 'safeInt' IDL type for the listIndexes response. BSONObjBuilder builder; @@ -856,6 +861,38 @@ Status validateExpireAfterSeconds(std::int64_t expireAfterSeconds, return Status::OK(); } +Status validateExpireAfterSeconds(BSONElement expireAfterSeconds, + ValidateExpireAfterSecondsMode mode) { + if (!expireAfterSeconds.isNumber()) { + return {ErrorCodes::CannotCreateIndex, + str::stream() << "TTL index '" << IndexDescriptor::kExpireAfterSecondsFieldName + << "' option must be numeric, but received a type of '" + << typeName(expireAfterSeconds.type())}; + } + + if (expireAfterSeconds.isNaN()) { + return {ErrorCodes::CannotCreateIndex, + str::stream() << "TTL index '" << IndexDescriptor::kExpireAfterSecondsFieldName + << "' option must not be NaN"}; + } + + // Clustered indexes allow 64-bit integers for expireAfterSeconds, but secondary indexes only + // allow 32-bit integers, so we check the range here for secondary indexes. + if (mode == ValidateExpireAfterSecondsMode::kSecondaryTTLIndex && + expireAfterSeconds.safeNumberInt() != expireAfterSeconds.safeNumberLong()) { + return {ErrorCodes::CannotCreateIndex, + str::stream() << "TTL index '" << IndexDescriptor::kExpireAfterSecondsFieldName + << "' must be within the range of a 32-bit integer"}; + } + + if (auto status = validateExpireAfterSeconds(expireAfterSeconds.safeNumberLong(), mode); + !status.isOK()) { + return {ErrorCodes::CannotCreateIndex, str::stream() << status.reason()}; + } + + return Status::OK(); +} + bool isIndexTTL(const BSONObj& indexSpec) { return indexSpec.hasField(IndexDescriptor::kExpireAfterSecondsFieldName); } @@ -865,22 +902,11 @@ Status validateIndexSpecTTL(const BSONObj& indexSpec) { return Status::OK(); } - const BSONElement expireAfterSecondsElt = - indexSpec[IndexDescriptor::kExpireAfterSecondsFieldName]; - if (!expireAfterSecondsElt.isNumber()) { - return {ErrorCodes::CannotCreateIndex, - str::stream() << "TTL index '" << IndexDescriptor::kExpireAfterSecondsFieldName - << "' option must be numeric, but received a type of '" - << typeName(expireAfterSecondsElt.type()) - << "'. Index spec: " << indexSpec}; - } - if (auto status = - validateExpireAfterSeconds(expireAfterSecondsElt.safeNumberLong(), + validateExpireAfterSeconds(indexSpec[IndexDescriptor::kExpireAfterSecondsFieldName], ValidateExpireAfterSecondsMode::kSecondaryTTLIndex); !status.isOK()) { - return {ErrorCodes::CannotCreateIndex, - str::stream() << status.reason() << ". Index spec: " << indexSpec}; + return status.withContext(str::stream() << ". Index spec: " << indexSpec); } const BSONObj key = indexSpec["key"].Obj(); diff --git a/src/mongo/db/catalog/index_key_validate.h b/src/mongo/db/catalog/index_key_validate.h index d7be951e9ad..f6b5f37978f 100644 --- a/src/mongo/db/catalog/index_key_validate.h +++ b/src/mongo/db/catalog/index_key_validate.h @@ -137,6 +137,8 @@ enum class ValidateExpireAfterSecondsMode { Status validateExpireAfterSeconds(std::int64_t expireAfterSeconds, ValidateExpireAfterSecondsMode mode); +Status validateExpireAfterSeconds(BSONElement expireAfterSeconds, + ValidateExpireAfterSecondsMode mode); /** * Returns true if 'indexSpec' refers to a TTL index. */ diff --git a/src/mongo/db/commands/create_indexes_cmd.cpp b/src/mongo/db/commands/create_indexes_cmd.cpp index 9d867dd340d..6752ecb0207 100644 --- a/src/mongo/db/commands/create_indexes_cmd.cpp +++ b/src/mongo/db/commands/create_indexes_cmd.cpp @@ -91,6 +91,9 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildAbort); // through the IndexBuildsCoordinator. MONGO_FAIL_POINT_DEFINE(hangCreateIndexesBeforeStartingIndexBuild); + +MONGO_FAIL_POINT_DEFINE(skipTTLIndexValidationOnCreateIndex); + constexpr auto kCommandName = "createIndexes"_sd; constexpr auto kAllIndexesAlreadyExist = "all indexes already exist"_sd; constexpr auto kIndexAlreadyExists = "index already exists"_sd; @@ -177,6 +180,12 @@ void appendFinalIndexFieldsToResult(CreateIndexesReply* reply, void validateTTLOptions(OperationContext* opCtx, const NamespaceString& ns, const CreateIndexesCommand& cmd) { + if (MONGO_unlikely(skipTTLIndexValidationOnCreateIndex.shouldFail())) { + LOGV2( + 6909101, "Skipping TTL index validation due to failpoint", "cmd"_attr = cmd.toBSON({})); + return; + } + const auto clusteredAndCapped = [&](LockMode mode) { AutoGetCollection collection(opCtx, ns, mode); if (collection) { diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp index eb78bb6f0b6..04499465034 100644 --- a/src/mongo/db/ttl.cpp +++ b/src/mongo/db/ttl.cpp @@ -177,18 +177,16 @@ const IndexDescriptor* getValidTTLIndex(OperationContext* opCtx, return nullptr; } - BSONElement secondsExpireElt = spec[IndexDescriptor::kExpireAfterSecondsFieldName]; - if (!secondsExpireElt.isNumber() || secondsExpireElt.isNaN()) { - LOGV2_ERROR( - 22542, - "TTL indexes require the expire field to be numeric and not a NaN, skipping TTL job", - "ns"_attr = collection->ns(), - "uuid"_attr = collection->uuid(), - "field"_attr = IndexDescriptor::kExpireAfterSecondsFieldName, - "type"_attr = typeName(secondsExpireElt.type()), - "index"_attr = spec); + if (auto status = index_key_validate::validateIndexSpecTTL(spec); !status.isOK()) { + LOGV2_ERROR(6909100, + "Skipping TTL job due to invalid index spec", + "reason"_attr = status.reason(), + "ns"_attr = collection->ns(), + "uuid"_attr = collection->uuid(), + "index"_attr = spec); return nullptr; } + return desc; } @@ -726,13 +724,14 @@ void TTLMonitor::onStepUp(OperationContext* opCtx) { if (info.isClustered()) { continue; } - if (!info.isExpireAfterSecondsNaN()) { + + if (!info.isExpireAfterSecondsInvalid()) { continue; } auto indexName = info.getIndexName(); LOGV2(6847700, - "Running collMod to fix TTL index with NaN 'expireAfterSeconds'.", + "Running collMod to fix TTL index with invalid 'expireAfterSeconds'.", "ns"_attr = *nss, "uuid"_attr = uuid, "name"_attr = indexName, @@ -740,7 +739,7 @@ void TTLMonitor::onStepUp(OperationContext* opCtx) { index_key_validate::kExpireAfterSecondsForInactiveTTLIndex); // Compose collMod command to amend 'expireAfterSeconds' to same value that - // would be used by listIndexes() to convert the NaN value in the catalog. + // would be used by listIndexes() to convert a NaN value in the catalog. CollModIndex collModIndex; collModIndex.setName(StringData{indexName}); collModIndex.setExpireAfterSeconds(mongo::durationCount<Seconds>( @@ -753,12 +752,13 @@ void TTLMonitor::onStepUp(OperationContext* opCtx) { uassertStatusOK( processCollModCommand(opCtx, {nss->db(), uuid}, collModCmd, &builder)); auto result = builder.obj(); - LOGV2(6847701, - "Successfully fixed TTL index with NaN 'expireAfterSeconds' using collMod", - "ns"_attr = *nss, - "uuid"_attr = uuid, - "name"_attr = indexName, - "result"_attr = result); + LOGV2( + 6847701, + "Successfully fixed TTL index with invalid 'expireAfterSeconds' using collMod", + "ns"_attr = *nss, + "uuid"_attr = uuid, + "name"_attr = indexName, + "result"_attr = result); } } catch (const ExceptionForCat<ErrorCategory::Interruption>&) { // The exception is relevant to the entire TTL monitoring process, not just the specific diff --git a/src/mongo/db/ttl_collection_cache.cpp b/src/mongo/db/ttl_collection_cache.cpp index 43d55a46301..966700ec862 100644 --- a/src/mongo/db/ttl_collection_cache.cpp +++ b/src/mongo/db/ttl_collection_cache.cpp @@ -105,7 +105,8 @@ void TTLCollectionCache::deregisterTTLClusteredIndex(UUID uuid) { TTLCollectionCache::Info{TTLCollectionCache::ClusteredId{}}); } -void TTLCollectionCache::unsetTTLIndexExpireAfterSecondsNaN(UUID uuid, const IndexName& indexName) { +void TTLCollectionCache::unsetTTLIndexExpireAfterSecondsInvalid(UUID uuid, + const IndexName& indexName) { stdx::lock_guard<Latch> lock(_ttlInfosLock); auto infoIt = _ttlInfos.find(uuid); if (infoIt == _ttlInfos.end()) { @@ -115,7 +116,7 @@ void TTLCollectionCache::unsetTTLIndexExpireAfterSecondsNaN(UUID uuid, const Ind auto&& infoVec = infoIt->second; for (auto&& info : infoVec) { if (!info.isClustered() && info.getIndexName() == indexName) { - info.unsetExpireAfterSecondsNaN(); + info.unsetExpireAfterSecondsInvalid(); break; } } diff --git a/src/mongo/db/ttl_collection_cache.h b/src/mongo/db/ttl_collection_cache.h index a8f00b6c6a6..f1a5fe12476 100644 --- a/src/mongo/db/ttl_collection_cache.h +++ b/src/mongo/db/ttl_collection_cache.h @@ -56,28 +56,28 @@ public: // Specifies how a collection should expire data with TTL. class Info { public: - explicit Info(ClusteredId) : _isClustered(true), _isExpireAfterSecondsNaN(false) {} - Info(IndexName indexName, bool isExpireAfterSecondsNaN) + explicit Info(ClusteredId) : _isClustered(true), _isExpireAfterSecondsInvalid(false) {} + Info(IndexName indexName, bool isExpireAfterSecondsInvalid) : _isClustered(false), _indexName(std::move(indexName)), - _isExpireAfterSecondsNaN(isExpireAfterSecondsNaN) {} + _isExpireAfterSecondsInvalid(isExpireAfterSecondsInvalid) {} bool isClustered() const { return _isClustered; } IndexName getIndexName() const { return _indexName; } - bool isExpireAfterSecondsNaN() const { - return _isExpireAfterSecondsNaN; + bool isExpireAfterSecondsInvalid() const { + return _isExpireAfterSecondsInvalid; } - void unsetExpireAfterSecondsNaN() { - _isExpireAfterSecondsNaN = false; + void unsetExpireAfterSecondsInvalid() { + _isExpireAfterSecondsInvalid = false; } private: bool _isClustered; IndexName _indexName; - bool _isExpireAfterSecondsNaN; + bool _isExpireAfterSecondsInvalid; }; // Caller is responsible for ensuring no duplicates are registered. @@ -89,7 +89,7 @@ public: * Resets expireAfterSeconds flag on TTL index. * For idempotency, this has no effect if index is not found. */ - void unsetTTLIndexExpireAfterSecondsNaN(UUID uuid, const IndexName& indexName); + void unsetTTLIndexExpireAfterSecondsInvalid(UUID uuid, const IndexName& indexName); using InfoMap = stdx::unordered_map<UUID, std::vector<Info>, UUID::Hash>; InfoMap getTTLInfos(); diff --git a/src/mongo/db/ttl_collection_cache_test.cpp b/src/mongo/db/ttl_collection_cache_test.cpp index 747d1371887..d252faa6b7b 100644 --- a/src/mongo/db/ttl_collection_cache_test.cpp +++ b/src/mongo/db/ttl_collection_cache_test.cpp @@ -39,9 +39,11 @@ TEST(TTLCollectionCacheTest, Basic) { auto uuidCollA = UUID::gen(); auto uuidCollB = UUID::gen(); - auto infoIndexA1 = TTLCollectionCache::Info{"collA_ttl_1", /*isExpireAfterSecondsNaN=*/false}; + auto infoIndexA1 = + TTLCollectionCache::Info{"collA_ttl_1", /*isExpireAfterSecondsInvalid=*/false}; auto infoClusteredA = TTLCollectionCache::Info{TTLCollectionCache::ClusteredId{}}; - auto infoIndexB1 = TTLCollectionCache::Info("collB_ttl_1", /*isExpireAfterSecondsNaN=*/true); + auto infoIndexB1 = + TTLCollectionCache::Info("collB_ttl_1", /*isExpireAfterSecondsInvalid=*/true); // Confirm registerTTLInfo() behavior using getTTLInfo(). cache.registerTTLInfo(uuidCollA, infoIndexA1); @@ -54,7 +56,7 @@ TEST(TTLCollectionCacheTest, Basic) { ASSERT_EQ(infos[uuidCollA].size(), 2U); ASSERT_FALSE(infos[uuidCollA][0].isClustered()); ASSERT_EQ(infos[uuidCollA][0].getIndexName(), "collA_ttl_1"); - ASSERT_FALSE(infos[uuidCollA][0].isExpireAfterSecondsNaN()); + ASSERT_FALSE(infos[uuidCollA][0].isExpireAfterSecondsInvalid()); ASSERT(infos[uuidCollA][1].isClustered()); ASSERT_EQ(infos.count(uuidCollB), 1U); @@ -62,16 +64,16 @@ TEST(TTLCollectionCacheTest, Basic) { ASSERT_FALSE(infos[uuidCollB][0].isClustered()); ASSERT_EQ(infos[uuidCollB][0].getIndexName(), "collB_ttl_1"); - ASSERT(infos[uuidCollB][0].isExpireAfterSecondsNaN()); + ASSERT(infos[uuidCollB][0].isExpireAfterSecondsInvalid()); - // Check that we can reset '_isExpireAfterSecondsNaN()' on the TTL index. - cache.unsetTTLIndexExpireAfterSecondsNaN(uuidCollB, infoIndexB1.getIndexName()); + // Check that we can reset '_isExpireAfterSecondsInvalid()' on the TTL index. + cache.unsetTTLIndexExpireAfterSecondsInvalid(uuidCollB, infoIndexB1.getIndexName()); infos = cache.getTTLInfos(); ASSERT_EQ(infos.size(), 2U); ASSERT_EQ(infos.count(uuidCollB), 1U); ASSERT_EQ(infos[uuidCollB].size(), 1U); ASSERT_EQ(infos[uuidCollB][0].getIndexName(), "collB_ttl_1"); - ASSERT_FALSE(infos[uuidCollB][0].isExpireAfterSecondsNaN()); + ASSERT_FALSE(infos[uuidCollB][0].isExpireAfterSecondsInvalid()); // Check deregisterTTLInfo(). TTLCollectionCache should clean up // UUIDs that no longer have any TTL infos registered. |