diff options
author | Faustoleyva54 <fausto.leyva@mongodb.com> | 2022-06-10 15:56:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-10 16:58:59 +0000 |
commit | 7ad95de6d0d0ff06f5161f7d5e3243b0cdd5e9cc (patch) | |
tree | 2976e75cc7ad9bb036d119c7c6f52435ff0a603b | |
parent | d94f6725598eac9b617bd5b1d383254f174b089a (diff) | |
download | mongo-7ad95de6d0d0ff06f5161f7d5e3243b0cdd5e9cc.tar.gz |
SERVER-66692 Allow creating TTL index over timeField with partialFilterExpression defined over metaField
-rw-r--r-- | jstests/core/timeseries/timeseries_index_ttl_partial.js | 144 | ||||
-rw-r--r-- | jstests/libs/parallelTester.js | 4 | ||||
-rw-r--r-- | jstests/noPassthrough/timeseries_expires_with_partial_index.js | 119 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.h | 5 | ||||
-rw-r--r-- | src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp | 62 |
6 files changed, 326 insertions, 12 deletions
diff --git a/jstests/core/timeseries/timeseries_index_ttl_partial.js b/jstests/core/timeseries/timeseries_index_ttl_partial.js new file mode 100644 index 00000000000..b8364919b33 --- /dev/null +++ b/jstests/core/timeseries/timeseries_index_ttl_partial.js @@ -0,0 +1,144 @@ +/** + * Tests the creation of partial, TTL indexes on a time-series collection. + * + * @tags: [ + * does_not_support_stepdowns, + * does_not_support_transactions, + * featureFlagTimeseriesScalabilityImprovements, + * ] + */ +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); + +const collName = "timeseries_index_ttl_partial"; +const indexName = "partialTTLIndex"; +const coll = db.getCollection(collName); +const bucketsColl = db.getCollection("system.buckets." + collName); + +const timeFieldName = "tm"; +const metaFieldName = "mm"; +const timeSpec = { + [timeFieldName]: 1 +}; +const metaSpec = { + [metaFieldName]: 1 +}; + +const expireAfterSeconds = NumberLong(400); + +const resetTsColl = function(extraOptions = {}) { + coll.drop(); + + let options = {timeseries: {timeField: timeFieldName, metaField: metaFieldName}}; + assert.commandWorked(db.createCollection(coll.getName(), Object.merge(options, extraOptions))); +}; + +(function invalidTTLIndexes() { + resetTsColl(); + + let options = {name: indexName, expireAfterSeconds: 3600}; + // TTL indexes on the time field are only allowed in conjunction with partialFilterExpressions + // on the metafield. + assert.commandFailedWithCode(coll.createIndex(timeSpec, options), ErrorCodes.InvalidOptions); + + // TTL indexes on the metafield are not allowed. + assert.commandFailedWithCode(coll.createIndex(metaSpec, options), ErrorCodes.InvalidOptions); +}()); + +(function partialTTLIndexesShouldSucceed() { + resetTsColl(); + const options = { + name: indexName, + partialFilterExpression: {[metaFieldName]: {$gt: 5}}, + expireAfterSeconds: expireAfterSeconds + }; + + // Creating a TTL index on time, with a partial filter expression on the metaField should + // succeed. + assert.commandWorked(coll.createIndex( + timeSpec, Object.merge(options, {expireAfterSeconds: expireAfterSeconds}))); + let indexes = coll.getIndexes().filter(ix => ix.name === indexName); + assert.eq(1, indexes.length, tojson(indexes)); + + let partialTTLIndex = indexes[0]; + assert.eq(indexName, partialTTLIndex.name, tojson(partialTTLIndex)); + assert.eq(timeSpec, partialTTLIndex.key, tojson(partialTTLIndex)); + assert.eq(expireAfterSeconds, partialTTLIndex.expireAfterSeconds, tojson(partialTTLIndex)); + + resetTsColl({expireAfterSeconds: 3600}); + + // Creating an index on time (on a time-series collection created with the expireAfterSeconds + // parameter) with a partial filter expression on the metaField should succeed. + assert.commandWorked(coll.createIndex(timeSpec, options)); + indexes = coll.getIndexes().filter(ix => ix.name === indexName); + assert.eq(1, indexes.length, tojson(indexes)); + + partialTTLIndex = indexes[0]; + assert.eq(indexName, partialTTLIndex.name, tojson(partialTTLIndex)); + assert.eq(timeSpec, partialTTLIndex.key, tojson(partialTTLIndex)); + assert.eq(expireAfterSeconds, partialTTLIndex.expireAfterSeconds, tojson(partialTTLIndex)); +}()); + +(function invalidPartialTTLIndexesShouldFail() { + resetTsColl(); + + const currentData = ISODate(); + const filterOnData = { + name: indexName, + partialFilterExpression: {"data": {$gt: 5}}, + expireAfterSeconds: expireAfterSeconds + }; + const filterOnMeta = { + name: indexName, + partialFilterExpression: {[metaFieldName]: {$gt: 5}}, + expireAfterSeconds: expireAfterSeconds + }; + const filterOnMetaAndData = { + name: indexName, + partialFilterExpression: {[metaFieldName]: {$gt: 5}, "data": {$gt: 5}}, + expireAfterSeconds: expireAfterSeconds + }; + const filterOnTime = { + name: indexName, + partialFilterExpression: {[timeFieldName]: {$gt: currentData}}, + expireAfterSeconds: expireAfterSeconds + }; + const dataSpec = {"data": 1}; + + // These cases have a valid index specs on the time field but invalid partialFilterExpressions. + { + // A TTL index on time requires partial indexes to be on the metadata field. + assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnData), + ErrorCodes.InvalidOptions); + + // A TTL index on time requires partial indexes on the metadata field only, no compound + // expressions. + assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnMetaAndData), + ErrorCodes.InvalidOptions); + + // Partial indexes are not allowed to be on the timeField. + assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnTime), + ErrorCodes.InvalidOptions); + } + + const timeAndMetaSpec = Object.merge(timeSpec, metaSpec); + const timeAndDataSpec = Object.merge(timeSpec, dataSpec); + // These cases have valid partialFilterExpressions but invalid index specs. + { + // TTL indexes are only allowed on the time field. + assert.commandFailedWithCode(coll.createIndex(metaSpec, filterOnMeta), + ErrorCodes.InvalidOptions); + assert.commandFailedWithCode(coll.createIndex(dataSpec, filterOnMeta), + ErrorCodes.InvalidOptions); + + // TTL indexes are not allowed on compound indexes (even if a time field exists in the + // spec). + assert.commandFailedWithCode(coll.createIndex(timeAndMetaSpec, filterOnMeta), + ErrorCodes.CannotCreateIndex); + assert.commandFailedWithCode(coll.createIndex(timeAndDataSpec, filterOnMeta), + ErrorCodes.CannotCreateIndex); + } +}()); +})(); diff --git a/jstests/libs/parallelTester.js b/jstests/libs/parallelTester.js index 8974cf569dd..e6bf0a29498 100644 --- a/jstests/libs/parallelTester.js +++ b/jstests/libs/parallelTester.js @@ -244,6 +244,10 @@ if (typeof _threadInject != "undefined") { "collmod_convert_to_unique_apply_ops.js", "collmod_convert_to_unique_violations.js", "collmod_convert_to_unique_violations_size_limit.js", + + // TODO (SERVER-63228): Remove this exclusion once the feature flag is enabled by + // default. + "timeseries/timeseries_index_ttl_partial.js", ]); // Get files, including files in subdirectories. diff --git a/jstests/noPassthrough/timeseries_expires_with_partial_index.js b/jstests/noPassthrough/timeseries_expires_with_partial_index.js new file mode 100644 index 00000000000..c5bd39b2387 --- /dev/null +++ b/jstests/noPassthrough/timeseries_expires_with_partial_index.js @@ -0,0 +1,119 @@ +/** + * Tests that a time-series collection created with the 'expireAfterSeconds' option will remove + * buckets older than 'expireAfterSeconds' based on the bucket creation time while also regarding + * the partial filter on the metafield. + * + * @tags: [ + * does_not_support_stepdowns, + * does_not_support_transactions, + * featureFlagTimeseriesScalabilityImprovements + * ] + */ +(function() { +"use strict"; + +load('jstests/libs/fixture_helpers.js'); // For 'FixtureHelpers' +load("jstests/libs/clustered_collections/clustered_collection_util.js"); +load("jstests/core/timeseries/libs/timeseries.js"); + +const conn = MongoRunner.runMongod({setParameter: 'ttlMonitorSleepSecs=1'}); +const testDB = conn.getDB(jsTestName()); +assert.commandWorked(testDB.dropDatabase()); + +TimeseriesTest.run((insert) => { + const coll = testDB.timeseries_expires_with_partial_index; + const bucketsColl = testDB.getCollection('system.buckets.' + coll.getName()); + + const timeFieldName = 'tm'; + const metaFieldName = "mm"; + const indexName = "partialTTLIndex"; + const timeSpec = {[timeFieldName]: 1}; + const expireAfterSecond = NumberLong(1); + const expireAfterSeconds = NumberLong(10000); + + const startDate = new Date(); + const expiredDate = new Date(startDate - ((expireAfterSeconds / 2) * 1000)); + const collectionTTLExpiredDate = new Date(startDate - ((expireAfterSeconds * 2) * 1000)); + const futureDate = new Date(startDate.getTime() + (10000 * 10)); + + assert.lt(expiredDate, startDate); + assert.gt(futureDate, startDate); + + const expiredDoc = {_id: 0, [timeFieldName]: expiredDate, [metaFieldName]: 8, x: 0}; + const expiredDocLowMeta = {_id: 1, [timeFieldName]: expiredDate, [metaFieldName]: 0, x: 1}; + const collectionTTLExpiredDocLowMeta = + {_id: 2, [timeFieldName]: collectionTTLExpiredDate, [metaFieldName]: 0, x: 2}; + const futureDoc = {_id: 3, [timeFieldName]: futureDate, [metaFieldName]: 10, x: 3}; + + const partialIndexOptions = { + name: indexName, + partialFilterExpression: {[metaFieldName]: {$gt: 5}}, + expireAfterSeconds: expireAfterSecond + }; + + const checkInsertion = function(coll, doc, expectDeletion) { + jsTestLog("Inserting doc into collection."); + const prevCount = bucketsColl.find().itcount(); + assert.commandWorked(insert(coll, doc), 'failed to insert doc: ' + tojson(doc)); + + // Wait for the TTL monitor to process the indexes. + jsTestLog("Waiting for TTL monitor to process..."); + ClusteredCollectionUtil.waitForTTL(testDB); + + // Check the number of bucket documents. + const expectedCount = (expectDeletion) ? prevCount : prevCount + 1; + const bucketDocs = bucketsColl.find().sort({'control.min._id': 1}).toArray(); + + assert.eq(expectedCount, bucketDocs.length, bucketDocs); + jsTestLog("Doc deleted: " + expectDeletion + "."); + }; + + const testTTLIndex = function(coll) { + // Inserts a measurement with a time in the past to ensure the measurement will be removed + // immediately. + checkInsertion(coll, expiredDoc, true); + + // Inserts a measurement that does not meet the partialFilterExpression to ensure it will + // not be removed (even though it is 'expired'). + checkInsertion(coll, expiredDocLowMeta, false); + + // Inserts a measurement with a time in the future to ensure the measurement is not removed. + checkInsertion(coll, futureDoc, false); + }; + + { + coll.drop(); + assert.commandWorked(testDB.createCollection( + coll.getName(), {timeseries: {timeField: timeFieldName, metaField: metaFieldName}})); + assert.contains(bucketsColl.getName(), testDB.getCollectionNames()); + + // Create a TTL index on time, with a partial filter expression on the metaField. + assert.commandWorked(coll.createIndex(timeSpec, partialIndexOptions)); + } + + // Test the TTL Deleter on a time-series collection with a TTL index and partialFilter. + testTTLIndex(coll); + + { + coll.drop(); + assert.commandWorked(testDB.createCollection(coll.getName(), { + timeseries: {timeField: timeFieldName, metaField: metaFieldName}, + expireAfterSeconds: expireAfterSeconds + })); + assert.contains(bucketsColl.getName(), testDB.getCollectionNames()); + + // Create a secondary TTL index on time, with a partial filter expression on the metaField. + assert.commandWorked(coll.createIndex(timeSpec, partialIndexOptions)); + } + + // Test the TTL Deleter on a time-series collection with a TTL index and partialFilter and a + // pre-existing TTL index. + testTTLIndex(coll); + + // As a sanity check, check that the TTL deleter deletes a document that does not match partial + // filter but is expired, with respect to the collection TTL index. + checkInsertion(coll, collectionTTLExpiredDocLowMeta, true); +}); + +MongoRunner.stopMongod(conn); +})(); diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index 0651aae78ee..6d96dd51676 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -606,7 +606,7 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( return handleIneligible(policy, matchExpr, "can't handle this predicate"); } -BSONObj BucketSpec::pushdownPredicate( +std::pair<bool, BSONObj> BucketSpec::pushdownPredicate( const boost::intrusive_ptr<ExpressionContext>& expCtx, const TimeseriesOptions& tsOptions, ExpressionContext::CollationMatchesDefault collationMatchesDefault, @@ -666,7 +666,7 @@ BSONObj BucketSpec::pushdownPredicate( metaOnlyPredicate->serialize(&result); if (bucketMetricPredicate) bucketMetricPredicate->serialize(&result); - return result.obj(); + return std::make_pair(bucketMetricPredicate.get(), result.obj()); } class BucketUnpacker::UnpackingImpl { diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h index 287bd9f2540..7e32629407d 100644 --- a/src/mongo/db/exec/bucket_unpacker.h +++ b/src/mongo/db/exec/bucket_unpacker.h @@ -167,8 +167,11 @@ public: * * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it * returns null. When using IneligiblePredicatePolicy::kError it raises a user error. + * + * Returns a boolean (alongside the bucket-level predicate) describing if the result contains + * a metric predicate. */ - static BSONObj pushdownPredicate( + static std::pair<bool, BSONObj> pushdownPredicate( const boost::intrusive_ptr<ExpressionContext>& expCtx, const TimeseriesOptions& tsOptions, ExpressionContext::CollationMatchesDefault collationMatchesDefault, diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp index 7b8899a101a..6481aa7bbf9 100644 --- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp +++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp @@ -50,6 +50,28 @@ namespace { NamespaceString makeTimeseriesBucketsNamespace(const NamespaceString& nss) { return nss.isTimeseriesBucketsCollection() ? nss : nss.makeTimeseriesBucketsNamespace(); } + +/** + * Converts the key field on time to 'control.min.$timeField' field. Depends on error checking from + * 'createBucketsSpecFromTimeseriesSpec()' which should be called before this function. + */ +BSONObj convertToTTLTimeField(const BSONObj& origKeyField, StringData timeField) { + BSONObjBuilder keyBuilder; + uassert(ErrorCodes::CannotCreateIndex, + str::stream() << "TTL indexes are single-field indexes, compound indexes do " + "not support TTL. Index spec: " + << origKeyField, + origKeyField.nFields() == 1); + + const auto& firstElem = origKeyField.firstElement(); + uassert(ErrorCodes::InvalidOptions, + "TTL indexes on non-time fields are not supported on time-series collections", + firstElem.fieldName() == timeField); + + keyBuilder.appendAs(firstElem, + str::stream() << timeseries::kControlMinFieldNamePrefix << timeField); + return keyBuilder.obj(); +} } // namespace @@ -83,7 +105,12 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, std::vector<mongo::BSONObj> indexes; for (const auto& origIndex : origIndexes) { BSONObjBuilder builder; + BSONObj keyField; + BSONObj originalKeyField; + bool isTTLIndex = false; + bool hasPartialFilterOnMetaField = false; bool includeOriginalSpec = false; + for (const auto& elem : origIndex) { if (elem.fieldNameStringData() == IndexDescriptor::kPartialFilterExprFieldName) { if (feature_flags::gTimeseriesMetricIndexes.isEnabled( @@ -135,7 +162,7 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, // planner, this will be true. bool assumeNoMixedSchemaData = true; - BSONObj bucketPred = + auto [hasMetricPred, bucketPred] = BucketSpec::pushdownPredicate(expCtx, options, collationMatchesDefault, @@ -144,6 +171,9 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, includeMetaField, assumeNoMixedSchemaData, BucketSpec::IneligiblePredicatePolicy::kError); + + hasPartialFilterOnMetaField = !hasMetricPred; + builder.append(IndexDescriptor::kPartialFilterExprFieldName, bucketPred); continue; } @@ -171,11 +201,11 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, } if (elem.fieldNameStringData() == IndexDescriptor::kExpireAfterSecondsFieldName) { - uasserted(ErrorCodes::InvalidOptions, - "TTL indexes are not supported on time-series collections"); + isTTLIndex = true; + builder.append(elem); + continue; } - if (elem.fieldNameStringData() == IndexDescriptor::kUniqueFieldName) { uassert(ErrorCodes::InvalidOptions, "Unique indexes are not supported on time-series collections", @@ -183,13 +213,16 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, } if (elem.fieldNameStringData() == NewIndexSpec::kKeyFieldName) { - auto pluginName = IndexNames::findPluginName(elem.Obj()); + originalKeyField = elem.Obj(); + + auto pluginName = IndexNames::findPluginName(originalKeyField); uassert(ErrorCodes::InvalidOptions, "Text indexes are not supported on time-series collections", pluginName != IndexNames::TEXT); auto bucketsIndexSpecWithStatus = - timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options, elem.Obj()); + timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options, + originalKeyField); uassert(ErrorCodes::CannotCreateIndex, str::stream() << bucketsIndexSpecWithStatus.getStatus().toString() << " Command request: " << redact(origCmd.toBSON({})), @@ -201,9 +234,7 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, << bucketsIndexSpecWithStatus.getValue()))) { includeOriginalSpec = true; } - - builder.append(NewIndexSpec::kKeyFieldName, - std::move(bucketsIndexSpecWithStatus.getValue())); + keyField = std::move(bucketsIndexSpecWithStatus.getValue()); continue; } @@ -212,6 +243,19 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, builder.append(elem); } + if (isTTLIndex) { + uassert(ErrorCodes::InvalidOptions, + "TTL indexes are not supported on time-series collections", + feature_flags::gTimeseriesScalabilityImprovements.isEnabled( + serverGlobalParams.featureCompatibility)); + uassert(ErrorCodes::InvalidOptions, + "TTL indexes on time-series collections require a partialFilterExpression on " + "the metaField", + hasPartialFilterOnMetaField); + keyField = convertToTTLTimeField(originalKeyField, options.getTimeField()); + } + builder.append(NewIndexSpec::kKeyFieldName, std::move(keyField)); + if (feature_flags::gTimeseriesMetricIndexes.isEnabled( serverGlobalParams.featureCompatibility) && includeOriginalSpec) { |