diff options
author | David Percy <david.percy@mongodb.com> | 2021-08-23 19:53:37 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-01-21 04:24:53 +0000 |
commit | 610898a1ccec0afc3d38f7c29f2553d5b6102d30 (patch) | |
tree | 6f480562ae4bcee8ff3c4daf07eadba5a56ef9e0 | |
parent | e446bef952d2ef42ae799e9b88c877b3fe0be6e4 (diff) | |
download | mongo-610898a1ccec0afc3d38f7c29f2553d5b6102d30.tar.gz |
SERVER-59163 Allow creating partial indexes on time-series collections
27 files changed, 1738 insertions, 210 deletions
diff --git a/jstests/core/timeseries/timeseries_index_partial.js b/jstests/core/timeseries/timeseries_index_partial.js new file mode 100644 index 00000000000..3a637282253 --- /dev/null +++ b/jstests/core/timeseries/timeseries_index_partial.js @@ -0,0 +1,565 @@ +/** + * Test creating and using partial indexes, on a time-series collection. + * + * @tags: [ + * does_not_support_stepdowns, + * does_not_support_transactions, + * requires_fcv_52, + * ] + */ +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); +load("jstests/libs/analyze_plan.js"); + +if (!TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) { + jsTestLog( + "Skipped test as the featureFlagTimeseriesMetricIndexes feature flag is not enabled."); + return; +} + +const coll = db.timeseries_index_partial; +const timeField = 'time'; +const metaField = 'm'; + +coll.drop(); +assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField, metaField}})); +const buckets = db.getCollection('system.buckets.' + coll.getName()); +let extraIndexes = []; +let extraBucketIndexes = []; +if (FixtureHelpers.isSharded(buckets)) { + // If the collection is sharded, expect an implicitly-created index on time. + // It will appear differently in listIndexes depending on whether you look at the time-series + // collection or the buckets collection. + extraIndexes.push({ + "v": 2, + "key": {"time": 1}, + "name": "control.min.time_1", + }); + extraBucketIndexes.push({ + "v": 2, + "key": {"control.min.time": 1}, + "name": "control.min.time_1", + }); +} +assert.sameMembers(coll.getIndexes(), extraIndexes); +assert.sameMembers(buckets.getIndexes(), extraBucketIndexes); + +assert.commandWorked(coll.insert([ + // In bucket A, some but not all documents match the partial filter. + {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'A'}, a: 0, b: 20}, + {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'A'}, a: 1, b: 16}, + {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'A'}, a: 2, b: 12}, + {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'A'}, a: 3, b: 8}, + {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'A'}, a: 4, b: 4}, + + // In bucket B, no documents match the partial filter. + {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'B'}, a: 5, b: 99}, + {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'B'}, a: 6, b: 99}, + {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'B'}, a: 7, b: 99}, + {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'B'}, a: 8, b: 99}, + {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'B'}, a: 9, b: 99}, + + // In bucket C, every document matches the partial filter. + {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'C'}, a: 10, b: 0}, + {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'C'}, a: 11, b: 0}, + {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'C'}, a: 12, b: 0}, + {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'C'}, a: 13, b: 0}, + {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'C'}, a: 14, b: 0}, +])); +assert.eq(15, coll.count()); +assert.eq(3, buckets.count()); + +// Expected partialFilterExpression to be an object. +assert.commandFailedWithCode(coll.createIndex({a: 1}, {partialFilterExpression: 123}), [10065]); + +// Test creating and using a partial index. +{ + // Make sure the query uses the {a: 1} index. + function checkPlan(predicate) { + const explain = coll.find(predicate).explain(); + const scan = getAggPlanStage(explain, 'IXSCAN'); + const indexes = buckets.getIndexes(); + assert(scan, + "Expected an index scan for predicate: " + tojson(predicate) + + " but got: " + tojson(explain) + "\nAvailable indexes were: " + tojson(indexes)); + assert.eq(scan.indexName, "a_1", scan); + } + // Make sure the query results match a collection-scan plan. + function checkResults(predicate) { + const result = coll.aggregate({$match: predicate}).toArray(); + const unindexed = + coll.aggregate([{$_internalInhibitOptimization: {}}, {$match: predicate}]).toArray(); + assert.docEq(result, unindexed); + } + function checkPlanAndResults(predicate) { + checkPlan(predicate); + checkResults(predicate); + } + const check = checkPlanAndResults; + + // Test some predicates on a metric field. + { + assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$lt: 12}}})); + // Query predicate mentions partialFilterExpression exactly. + // The extra predicate on 'a' is necessary for the multiplanner to think an {a: 1} index is + // relevant. + check({a: {$lt: 999}, b: {$lt: 12}}); + // Query predicate is a subset of partialFilterExpression. + check({a: {$lt: 999}, b: {$lt: 11}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$lte: 12}}})); + check({a: {$lt: 999}, b: {$lte: 12}}); + check({a: {$lt: 999}, b: {$lte: 11}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$gt: 12}}})); + check({a: {$lt: 999}, b: {$gt: 12}}); + check({a: {$lt: 999}, b: {$gt: 13}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$gte: 12}}})); + check({a: {$lt: 999}, b: {$gte: 12}}); + check({a: {$lt: 999}, b: {$gte: 13}}); + } + + // Test some predicates on the time field. + { + const t0 = ISODate('2000-01-01T00:00:00Z'); + const t1 = ISODate('2000-01-01T00:00:01Z'); + const t2 = ISODate('2000-01-01T00:00:02Z'); + + // When the collection is sharded, there is an index on time that can win, instead of the + // partial index. So only check the results in that case, not the plan. + const check = FixtureHelpers.isSharded(buckets) ? checkResults : checkPlanAndResults; + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked( + coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$lt: t1}}})); + check({a: {$lt: 999}, [timeField]: {$lt: t1}}); + check({a: {$lt: 999}, [timeField]: {$lt: t0}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked( + coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$lte: t1}}})); + check({a: {$lt: 999}, [timeField]: {$lte: t1}}); + check({a: {$lt: 999}, [timeField]: {$lte: t0}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked( + coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$gt: t1}}})); + check({a: {$lt: 999}, [timeField]: {$gt: t1}}); + check({a: {$lt: 999}, [timeField]: {$gt: t2}}); + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.commandWorked( + coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$gte: t1}}})); + check({a: {$lt: 999}, [timeField]: {$gte: t1}}); + check({a: {$lt: 999}, [timeField]: {$gte: t2}}); + } + + assert.commandWorked(coll.dropIndex({a: 1})); + assert.sameMembers(coll.getIndexes(), extraIndexes); + assert.sameMembers(buckets.getIndexes(), extraBucketIndexes); +} + +// Check that partialFilterExpression can use a mixture of metadata, time, and measurement fields, +// and that this is translated to the bucket-level predicate we expect. +assert.commandWorked(coll.createIndex({a: 1}, { + partialFilterExpression: { + $and: [ + {time: {$gt: ISODate("2000-01-01")}}, + {[metaField + '.bucket']: {$gte: "B"}}, + {b: {$gte: 0}}, + ] + } +})); +assert.docEq(buckets.getIndexes(), extraBucketIndexes.concat([ + { + "v": 2, + "key": {"control.min.a": 1, "control.max.a": 1}, + "name": "a_1", + + "partialFilterExpression": { + // Meta predicates are pushed down verbatim. + ['meta.bucket']: {"$gte": "B"}, + $and: [ + { + $and: [ + // $gt on time creates a bound on the max time. + {"control.max.time": {"$_internalExprGt": ISODate("2000-01-01T00:00:00Z")}}, + // We also have a bound on the min time, derived from bucketMaxSpanSeconds. + {"control.min.time": {"$_internalExprGt": ISODate("1999-12-31T23:00:00Z")}}, + // The min time is also encoded in the _id, so we have a bound on that as + // well. + {"_id": {"$gt": ObjectId("386d3570ffffffffffffffff")}}, + ] + }, + // $gt on a non-time field can only bound the control.max for that field. + {"control.max.b": {"$_internalExprGte": 0}}, + ] + }, + "originalSpec": { + key: {a: 1}, + name: "a_1", + partialFilterExpression: { + $and: [ + {time: {$gt: ISODate("2000-01-01")}}, + {[metaField + '.bucket']: {$gte: "B"}}, + {b: {$gte: 0}}, + ] + }, + v: 2, + } + }, +])); + +// Test how partialFilterExpression interacts with collation. +{ + const numericCollation = {locale: "en_US", numericOrdering: true}; + coll.drop(); + assert.commandWorked(db.createCollection(coll.getName(), { + timeseries: {timeField, metaField}, + collation: numericCollation, + })); + assert.commandWorked(coll.insert([ + {[timeField]: ISODate(), [metaField]: {x: "1000", y: 1}, a: "120"}, + {[timeField]: ISODate(), [metaField]: {x: "1000", y: 2}, a: "3"}, + {[timeField]: ISODate(), [metaField]: {x: "500", y: 3}, a: "120"}, + {[timeField]: ISODate(), [metaField]: {x: "500", y: 4}, a: "3"}, + ])); + + // Queries on the collection use the collection's collation by default. + assert.docEq( + coll.find({}, {_id: 0, [metaField + '.x']: 1}).sort({[metaField + '.x']: 1}).toArray(), [ + {[metaField]: {x: "500"}}, + {[metaField]: {x: "500"}}, + {[metaField]: {x: "1000"}}, + {[metaField]: {x: "1000"}}, + ]); + assert.docEq(coll.find({}, {_id: 0, a: 1}).sort({a: 1}).toArray(), [ + {a: "3"}, + {a: "3"}, + {a: "120"}, + {a: "120"}, + ]); + + // Specifying a collation and partialFilterExpression together fails, even if the collation + // matches the collection's default collation. + assert.commandFailedWithCode(coll.createIndex({a: 1}, { + name: "a_lt_25_simple", + collation: {locale: "simple"}, + partialFilterExpression: {a: {$lt: "25"}} + }), + [5916300]); + assert.commandFailedWithCode( + coll.createIndex({a: 1}, { + name: "a_lt_25_numeric", + collation: numericCollation, + partialFilterExpression: {a: {$lt: "25"}} + }), + // The default collation is also numeric, so this index is equivalent to the previous. + [5916300]); + + assert.commandWorked(coll.createIndex( + {a: 1}, {name: "a_lt_25_default", partialFilterExpression: {a: {$lt: "25"}}})); + + // Verify that the index contains what we expect. + assert.docEq(coll.find({}, {_id: 0, a: 1}).hint("a_lt_25_default").toArray(), + [{a: "3"}, {a: "3"}]); + + // Verify that the index is used when possible. + function checkPlanAndResult({predicate, collation, stageName, indexName, expectedResults}) { + let cur = coll.find(predicate, {_id: 0, a: 1}); + if (collation) { + cur.collation(collation); + } + + const plan = cur.explain(); + const stage = getAggPlanStage(plan, stageName); + assert(stage, "Expected a " + stageName + " stage: " + tojson(plan)); + if (indexName) { + assert.eq(stage.indexName, indexName, stage); + } + + const results = cur.toArray(); + assert.docEq(results, expectedResults); + } + + // a < "25" can use the index, since the collations match. + checkPlanAndResult({ + predicate: {a: {$lt: "25"}}, + collation: null, + stageName: 'IXSCAN', + indexName: 'a_lt_25_default', + expectedResults: [{a: "3"}, {a: "3"}], + }); + checkPlanAndResult({ + predicate: {a: {$lt: "25"}}, + collation: numericCollation, + stageName: 'IXSCAN', + indexName: 'a_lt_25_default', + expectedResults: [{a: "3"}, {a: "3"}], + }); + + // Likewise a < "24" can use the index. + checkPlanAndResult({ + predicate: {a: {$lt: "24"}}, + collation: null, + stageName: 'IXSCAN', + indexName: 'a_lt_25_default', + expectedResults: [{a: "3"}, {a: "3"}], + }); + checkPlanAndResult({ + predicate: {a: {$lt: "24"}}, + collation: numericCollation, + stageName: 'IXSCAN', + indexName: 'a_lt_25_default', + expectedResults: [{a: "3"}, {a: "3"}], + }); + + // a < "30" can't use the index; it's not a subset. + checkPlanAndResult({ + predicate: {a: {$lt: "30"}}, + collation: null, + stageName: 'COLLSCAN', + indexName: null, + expectedResults: [{a: "3"}, {a: "3"}], + }); + checkPlanAndResult({ + predicate: {a: {$lt: "30"}}, + collation: numericCollation, + stageName: 'COLLSCAN', + indexName: null, + expectedResults: [{a: "3"}, {a: "3"}], + }); + + // a < "100" also can't use the index, because according to the numeric collation, "20" < "100". + // Note that if we were using a simple collation we'd get the opposite outcome: "100" < "20", + // because it would compare strings lexicographically instead of numerically. + checkPlanAndResult({ + predicate: {a: {$lt: "100"}}, + collation: null, + stageName: 'COLLSCAN', + indexName: null, + expectedResults: [{a: "3"}, {a: "3"}], + }); + checkPlanAndResult({ + predicate: {a: {$lt: "100"}}, + collation: numericCollation, + stageName: 'COLLSCAN', + indexName: null, + expectedResults: [{a: "3"}, {a: "3"}], + }); +} + +// Test which types of predicates are allowed, and test that the bucket-level +// partialFilterExpression is what we expect. +{ + assert.commandWorked(coll.dropIndex({a: 1})); + assert.eq(coll.getIndexes(), extraIndexes); + function checkPredicateDisallowed(predicate) { + assert.commandFailedWithCode(coll.createIndex({a: 1}, {partialFilterExpression: predicate}), + [5916301]); + } + function checkPredicateOK({input: predicate, output: expectedBucketPredicate}) { + const name = 'example_pushdown_index'; + assert.commandWorked(coll.createIndex({a: 1}, {name, partialFilterExpression: predicate})); + const indexes = buckets.getIndexes().filter(ix => ix.name === name); + assert.eq(1, indexes.length, "Expected 1 index but got " + tojson(indexes)); + const actualBucketPredicate = indexes[0].partialFilterExpression; + assert.eq(actualBucketPredicate, + expectedBucketPredicate, + "Expected the bucket-level predicate to be " + tojson(expectedBucketPredicate) + + " but it was " + tojson(actualBucketPredicate)); + assert.commandWorked(coll.dropIndex(name)); + } + // A trivial, empty predicate is fine. + // It doesn't seem useful but it's allowed on a normal collection. + checkPredicateOK({ + input: {}, + output: {}, + }); + // Comparison with non-scalar. + checkPredicateDisallowed({a: {}}); + checkPredicateDisallowed({a: {b: 3}}); + checkPredicateDisallowed({a: []}); + checkPredicateDisallowed({a: [1]}); + checkPredicateDisallowed({a: [1, 2]}); + // Always-false predicate on time (which is always a Date). + checkPredicateDisallowed({time: 7}); + checkPredicateDisallowed({time: "abc"}); + + // Scalar $eq is equivalent to a conjunction of $lte and $gte. + checkPredicateOK({ + input: {a: 5}, + output: { + $and: [ + {'control.min.a': {$_internalExprLte: 5}}, + {'control.max.a': {$_internalExprGte: 5}}, + ] + } + }); + + // Comparisons with null/missing are not implemented. These would be slightly more complicated + // because {$eq: null} actually means "null, or missing, or undefined", + // while {$_internalExprEq: null} only matches null. + checkPredicateDisallowed({a: null}); + checkPredicateDisallowed({a: {$eq: null}}); + checkPredicateDisallowed({a: {$ne: null}}); + + // Regex queries are not allowed, but {$eq: /.../} is a simple scalar match, not a regex query. + checkPredicateDisallowed({a: /a/}); + checkPredicateOK({ + input: {a: {$eq: /a/}}, + output: { + $and: [ + {'control.min.a': {$_internalExprLte: /a/}}, + {'control.max.a': {$_internalExprGte: /a/}}, + ] + } + }); + + // The min/max for a field is present iff at least one event in the bucket has that field. + // So {$exists: true} queries can be mapped to the min/max for that field. + // This can be used as an alternative to sparse indexes. + checkPredicateOK({ + input: {a: {$exists: true}}, + output: { + $and: [ + {'control.min.a': {$exists: true}}, + {'control.max.a': {$exists: true}}, + ] + } + }); + // However, this means we can't push down {$exists: false}. A bucket where the min/max for a + // field is non-missing may contain a mixture of missing / non-missing, so we can't exclude it + // on the basis of the control fields. + checkPredicateDisallowed({a: {$exists: false}}); + + // $or on metadata, metric, or both. + checkPredicateOK({ + input: {$or: [{[metaField + '.a']: {$lt: 5}}, {[metaField + '.b']: {$lt: 6}}]}, + output: {$or: [{'meta.a': {$lt: 5}}, {'meta.b': {$lt: 6}}]}, + }); + checkPredicateOK({ + input: {$or: [{'a': {$lt: 5}}, {b: {$lt: 6}}]}, + output: { + $or: [ + {'control.min.a': {$_internalExprLt: 5}}, + {'control.min.b': {$_internalExprLt: 6}}, + ] + } + }); + checkPredicateOK({ + input: {$or: [{'a': {$lt: 5}}, {[metaField + '.b']: {$lt: 6}}]}, + output: { + $or: [ + {'control.min.a': {$_internalExprLt: 5}}, + {'meta.b': {$lt: 6}}, + ] + } + }); + + // If any argument of the $or is disallowed, we report the error even when an always-true + // predicate appears in it. + checkPredicateDisallowed({$or: [{}, {a: {}}]}); + + // $in on metadata is fine. + checkPredicateOK({ + input: {[metaField + '.a']: {$in: [1, 2, 5]}}, + output: {'meta.a': {$in: [1, 2, 5]}}, + }); + + // $in on a metric is slightly complicated. $in is equivalent to a disjunction of {a: _}. + // In a typical case this is the same as a disjunction of $eq: + checkPredicateOK({ + input: {a: {$in: [1, 2, 5]}}, + output: { + $or: [ + { + $and: [ + {'control.min.a': {$_internalExprLte: 1}}, + {'control.max.a': {$_internalExprGte: 1}}, + ] + }, + { + $and: [ + {'control.min.a': {$_internalExprLte: 2}}, + {'control.max.a': {$_internalExprGte: 2}}, + ] + }, + { + $and: [ + {'control.min.a': {$_internalExprLte: 5}}, + {'control.max.a': {$_internalExprGte: 5}}, + ] + }, + ] + }, + }); + // Since {a: null} is not implemented, neither is {$in: [null]}. + checkPredicateDisallowed({a: {$in: [null]}}); + // {a: {$in: [/abc/]}} is equivalent to {a: /abc/} which executes the regex, and is not allowed. + checkPredicateDisallowed({a: {$in: [/abc/]}}); + + // Predicates on time are pushed down, and also some extra predicates are inferred. + // These inferred predicates don't necessarily result in fewer buckets being indexed: + // we're just following the same rules for createIndex that we use when optimizing a query. + checkPredicateOK({ + input: {[timeField]: {$lt: ISODate('2020-01-01T00:00:00Z')}}, + output: { + $and: [ + {"control.min.time": {"$_internalExprLt": ISODate("2020-01-01T00:00:00Z")}}, + {"control.max.time": {"$_internalExprLt": ISODate("2020-01-01T01:00:00Z")}}, + {"_id": {"$lt": ObjectId("5e0be1000000000000000000")}}, + ] + } + }); + checkPredicateOK({ + input: {[timeField]: {$gt: ISODate('2020-01-01T00:00:00Z')}}, + output: { + $and: [ + {"control.max.time": {"$_internalExprGt": ISODate("2020-01-01T00:00:00Z")}}, + {"control.min.time": {"$_internalExprGt": ISODate("2019-12-31T23:00:00Z")}}, + {"_id": {"$gt": ObjectId("5e0bd2f0ffffffffffffffff")}}, + ] + } + }); + checkPredicateOK({ + input: {[timeField]: {$lte: ISODate('2020-01-01T00:00:00Z')}}, + output: { + $and: [ + {"control.min.time": {"$_internalExprLte": ISODate("2020-01-01T00:00:00Z")}}, + {"control.max.time": {"$_internalExprLte": ISODate("2020-01-01T01:00:00Z")}}, + {"_id": {"$lte": ObjectId("5e0be100ffffffffffffffff")}}, + ] + } + }); + checkPredicateOK({ + input: {[timeField]: {$gte: ISODate('2020-01-01T00:00:00Z')}}, + output: { + $and: [ + {"control.max.time": {"$_internalExprGte": ISODate("2020-01-01T00:00:00Z")}}, + {"control.min.time": {"$_internalExprGte": ISODate("2019-12-31T23:00:00Z")}}, + {"_id": {"$gte": ObjectId("5e0bd2f00000000000000000")}}, + ] + } + }); + checkPredicateOK({ + input: {[timeField]: {$eq: ISODate('2020-01-01T00:00:00Z')}}, + output: { + $and: [ + {"control.min.time": {"$_internalExprLte": ISODate("2020-01-01T00:00:00Z")}}, + {"control.min.time": {"$_internalExprGte": ISODate("2019-12-31T23:00:00Z")}}, + {"control.max.time": {"$_internalExprGte": ISODate("2020-01-01T00:00:00Z")}}, + {"control.max.time": {"$_internalExprLte": ISODate("2020-01-01T01:00:00Z")}}, + {"_id": {"$lte": ObjectId("5e0be100ffffffffffffffff")}}, + {"_id": {"$gte": ObjectId("5e0bd2f00000000000000000")}}, + ] + } + }); +} +})(); diff --git a/jstests/core/timeseries/timeseries_index_spec.js b/jstests/core/timeseries/timeseries_index_spec.js index eaf7065d3e9..109be8f856a 100644 --- a/jstests/core/timeseries/timeseries_index_spec.js +++ b/jstests/core/timeseries/timeseries_index_spec.js @@ -77,20 +77,19 @@ TimeseriesTest.run(() => { assert.commandWorked(coll.createIndex({x: 1})); verifyAndDropIndex(/*isDowngradeCompatible=*/false); - assert.commandWorked( - coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}})); + assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}})); verifyAndDropIndex(/*isDowngradeCompatible=*/false); - assert.commandWorked(coll.createIndex({[timeFieldName]: 1}, - {partialFilterExpression: {x: {$type: "number"}}})); + assert.commandWorked( + coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); verifyAndDropIndex(/*isDowngradeCompatible=*/false); - assert.commandWorked(coll.createIndex({[metaFieldName]: 1}, - {partialFilterExpression: {x: {$type: "number"}}})); + assert.commandWorked( + coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); verifyAndDropIndex(/*isDowngradeCompatible=*/false); - assert.commandWorked(coll.createIndex({[metaFieldName]: 1, x: 1}, - {partialFilterExpression: {x: {$type: "number"}}})); + assert.commandWorked( + coll.createIndex({[metaFieldName]: 1, x: 1}, {partialFilterExpression: {x: {$gt: 5}}})); verifyAndDropIndex(/*isDowngradeCompatible=*/false); } diff --git a/jstests/core/timeseries/timeseries_predicates.js b/jstests/core/timeseries/timeseries_predicates.js new file mode 100644 index 00000000000..93a1e479b1a --- /dev/null +++ b/jstests/core/timeseries/timeseries_predicates.js @@ -0,0 +1,221 @@ +/** + * Test the input/output behavior of some predicates on time-series collections. + * + * @tags: [ + * does_not_support_stepdowns, + * does_not_support_transactions, + * requires_fcv_52, + * ] + */ +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); + +const coll = db.timeseries_predicates_normal; +const tsColl = db.timeseries_predicates_timeseries; +coll.drop(); +tsColl.drop(); +assert.commandWorked( + db.createCollection(tsColl.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}})); +const bucketsColl = db.getCollection('system.buckets.' + tsColl.getName()); + +// Test that 'predicate' behaves correctly on the example documents, +// by comparing the result on a time-series collection against a normal collection. +function checkPredicateResult(predicate, documents) { + assert.commandWorked(coll.deleteMany({})); + bucketsColl.deleteMany({}); + assert.commandWorked(coll.insert(documents)); + assert.commandWorked(tsColl.insert(documents)); + + const normalResult = coll.aggregate({$match: predicate}).toArray(); + const tsResult = tsColl.aggregate({$match: predicate}).toArray(); + assert.sameMembers(normalResult, tsResult); +} + +// Test that 'predicate' behaves correctly, no matter how the documents are bucketed: +// insert the documents with different combinations of metadata to change how they are bucketed. +// 'documents' should be small, since this runs 2^N tests. +function checkAllBucketings(predicate, documents) { + for (const doc of documents) { + doc._id = ObjectId(); + doc.time = ISODate(); + } + + // For N documents, there are 2^N ways to assign them to buckets A and B. + const numDocs = documents.length; + const numBucketings = 1 << numDocs; + for (let bucketing = 0; bucketing < numBucketings; ++bucketing) { + // The ith bit tells you how to assign documents[i]. + const labeledDocs = documents.map( + (doc, i) => Object.merge(doc, {meta: {bucket: bucketing & (1 << i)}}, true /*deep*/)); + checkPredicateResult(predicate, labeledDocs); + } +} + +// $in +checkAllBucketings({x: {$in: [2, 3, 4]}}, [ + {x: 1}, + {x: 2}, + {x: 3}, + {x: 42}, +]); + +// $exists: true +checkAllBucketings({x: {$exists: true}}, [ + {}, + {x: 2}, + {x: ISODate()}, + {x: null}, + {x: undefined}, +]); + +// Test $or... +{ + // ... on metric + meta. + checkAllBucketings({ + $or: [ + {x: {$lt: 0}}, + {'meta.y': {$gt: 0}}, + ] + }, + [ + {x: +1, meta: {y: -1}}, + {x: +1, meta: {y: +1}}, + {x: -1, meta: {y: -1}}, + {x: -1, meta: {y: +1}}, + ]); + + // ... when one argument can't be pushed down. + checkAllBucketings({ + $or: [ + {x: {$lt: 0}}, + {y: {$exists: false}}, + ] + }, + [ + {x: -1}, + {x: +1}, + {x: -1, y: 'asdf'}, + {x: +1, y: 'asdf'}, + ]); + + // ... when neither argument can be pushed down. + checkAllBucketings({ + $or: [ + {x: {$exists: false}}, + {y: {$exists: false}}, + ] + }, + [ + {}, + {x: 'qwer'}, + {y: 'asdf'}, + {x: 'qwer', y: 'asdf'}, + ]); +} + +// Test $and... +{ + // ... on metric + meta. + checkAllBucketings({ + $and: [ + {x: {$lt: 0}}, + {'meta.y': {$gt: 0}}, + ] + }, + [ + {x: +1, meta: {y: -1}}, + {x: +1, meta: {y: +1}}, + {x: -1, meta: {y: -1}}, + {x: -1, meta: {y: +1}}, + ]); + + // ... when one argument can't be pushed down. + checkAllBucketings({ + $and: [ + {x: {$lt: 0}}, + {y: {$exists: false}}, + ] + }, + [ + {x: -1}, + {x: +1}, + {x: -1, y: 'asdf'}, + {x: +1, y: 'asdf'}, + ]); + + // ... when neither argument can be pushed down. + checkAllBucketings({ + $and: [ + {x: {$exists: false}}, + {y: {$exists: false}}, + ] + }, + [ + {}, + {x: 'qwer'}, + {y: 'asdf'}, + {x: 'qwer', y: 'asdf'}, + ]); +} + +// Test nested $and / $or. +checkAllBucketings({ + // The top-level $or prevents us from splitting into 2 top-level $match stages. + $or: [ + { + $and: [ + {'meta.a': {$gt: 0}}, + {'x': {$lt: 0}}, + ] + }, + { + $and: [ + {'meta.b': {$gte: 0}}, + {time: {$gt: ISODate('2020-01-01')}}, + ] + }, + ] +}, + [ + {meta: {a: -1, b: -1}, x: -1, time: ISODate('2020-02-01')}, + {meta: {a: -1, b: -1}, x: -1, time: ISODate('2019-12-31')}, + {meta: {a: -1, b: -1}, x: +1, time: ISODate('2020-02-01')}, + {meta: {a: -1, b: -1}, x: +1, time: ISODate('2019-12-31')}, + + {meta: {a: +1, b: -1}, x: -1, time: ISODate('2020-02-01')}, + {meta: {a: +1, b: -1}, x: -1, time: ISODate('2019-12-31')}, + {meta: {a: +1, b: -1}, x: +1, time: ISODate('2020-02-01')}, + {meta: {a: +1, b: -1}, x: +1, time: ISODate('2019-12-31')}, + ]); + +// Test nested $and / $or where some leaf predicates cannot be pushed down. +checkAllBucketings({ + $or: [ + { + $and: [ + {'meta.a': {$gt: 0}}, + {'x': {$exists: false}}, + ] + }, + { + $and: [ + {'meta.b': {$gte: 0}}, + {time: {$gt: ISODate('2020-01-01')}}, + ] + }, + ] +}, + [ + {meta: {a: -1, b: -1}, time: ISODate('2020-02-01')}, + {meta: {a: -1, b: -1}, time: ISODate('2019-12-31')}, + {meta: {a: -1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')}, + {meta: {a: -1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')}, + + {meta: {a: +1, b: -1}, time: ISODate('2020-02-01')}, + {meta: {a: +1, b: -1}, time: ISODate('2019-12-31')}, + {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')}, + {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')}, + ]); +})(); diff --git a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js index affcfe4e3bd..e714a38d698 100644 --- a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js +++ b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js @@ -1,8 +1,11 @@ /** * Tests that upgrading time-series collections created using the last-lts binary warns about * potentially mixed-schema data when building secondary indexes on time-series measurements on the - * latest binary. Additionally, tests that downgrading FCV from 5.2 removes the + * latest binary. Additionally, tests that downgrading FCV from 5.3 removes the * 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag from time-series collections. + * + * Also, tests that upgrading a time-series collection with no mixed-schema data allows metric + * indexes to be created. */ (function() { "use strict"; @@ -50,7 +53,7 @@ if (!TimeseriesTest.timeseriesMetricIndexesEnabled(primary)) { return; } -// Building indexes on time-series measurements is only supported in FCV >= 5.2. +// Building indexes on time-series measurements is only supported in FCV >= 5.3. jsTest.log("Setting FCV to 'latestFCV'"); assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV})); @@ -62,34 +65,62 @@ assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: true}, /* assert( checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: true}, /*expectedCount=*/1)); +// Creating an index on time does not involve checking for mixed-schema data. assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"})); assert(checkLog.checkContainsWithCountJson( primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0)); +// Creating an index on metadata does not involve checking for mixed-schema data. assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"})); assert(checkLog.checkContainsWithCountJson( primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0)); -assert.commandFailedWithCode(coll.createIndex({x: 1}, {name: "x_1"}), ErrorCodes.CannotCreateIndex); - -// May have mixed-schema data. +// Creating a partial index, on metadata and time only, does not involve checking for mixed-schema +// data. +assert.commandWorked(coll.createIndex({[timeField]: 1, [metaField]: 1}, { + name: "time_1_meta_1_partial", + partialFilterExpression: {[timeField]: {$gt: ISODate()}, [metaField]: 1} +})); assert(checkLog.checkContainsWithCountJson( - primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/1)); - -// Mixed-schema data detected. -assert(checkLog.checkContainsWithCountJson( - primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/1)); + primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0)); +// Creating a metric index requires checking for mixed-schema data. +assert.commandFailedWithCode(coll.createIndex({x: 1}, {name: "x_1"}), ErrorCodes.CannotCreateIndex); +assert(checkLog.checkContainsWithCountJson(primary, + 6057502 /* May have mixed-schema data. */, + {namespace: bucketCollName}, + /*expectedCount=*/1)); +assert(checkLog.checkContainsWithCountJson(primary, + 6057700 /* Mixed-schema data detected. */, + {namespace: bucketCollName}, + /*expectedCount=*/1)); + +// A compound index that includes a metric counts as a metric index. assert.commandFailedWithCode(coll.createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}), ErrorCodes.CannotCreateIndex); - -// May have mixed-schema data. -assert(checkLog.checkContainsWithCountJson( - primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/2)); - -// Mixed-schema data detected. -assert(checkLog.checkContainsWithCountJson( - primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/2)); +assert(checkLog.checkContainsWithCountJson(primary, + 6057502 /* May have mixed-schema data. */, + {namespace: bucketCollName}, + /*expectedCount=*/2)); +assert(checkLog.checkContainsWithCountJson(primary, + 6057700 /* Mixed-schema data detected. */, + {namespace: bucketCollName}, + /*expectedCount=*/2)); + +// A partialFilterExperssion on a metric makes it a metric index, even if the index key is +// metadata+time only. +assert.commandFailedWithCode( + coll.createIndex({[timeField]: 1, [metaField]: 1}, + {name: "time_1_meta_1_partial_metric", partialFilterExpression: {x: 1}}), + ErrorCodes.CannotCreateIndex); +assert(checkLog.checkContainsWithCountJson(primary, + 6057502 /* May have mixed-schema data. */, + {namespace: bucketCollName}, + /*expectedCount=*/3)); +assert(checkLog.checkContainsWithCountJson(primary, + 6057700 /* Mixed-schema data detected. */, + {namespace: bucketCollName}, + /*expectedCount=*/3)); // Check that the catalog entry flag doesn't get set to false. assert( @@ -99,10 +130,41 @@ assert( // The FCV downgrade process removes the catalog entry flag from time-series collections. jsTest.log("Setting FCV to 'lastLTSFCV'"); -assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV})); +{ + // However, the first attempt at downgrading fails because a partial index still exists. + assert.commandFailedWithCode(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}), + ErrorCodes.CannotDowngrade); + // Once we remove the index, downgrading succeeds. + assert.commandWorked(coll.dropIndex("time_1_meta_1_partial")); + assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV})); +} assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: null}, /*expectedCount=*/1)); assert( checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: null}, /*expectedCount=*/1)); +// Check that upgrading a clean collection allows all kinds of indexes to be created +// (time, metadata, metric). +jsTest.log("Testing upgrade of a non-mixed-schema collection."); +coll.drop(); +assert.commandWorked( + db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}})); +assert.commandWorked(coll.insert([ + {[timeField]: ISODate(), [metaField]: 1, x: 1}, + {[timeField]: ISODate(), [metaField]: 1, x: 2}, + {[timeField]: ISODate(), [metaField]: 1, x: 3}, +])); +assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV})); +assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"})); +assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"})); +assert.commandWorked(coll.createIndex({[timeField]: 1, [metaField]: 1}, { + name: "time_1_meta_1_partial", + partialFilterExpression: {[timeField]: {$gt: ISODate()}, [metaField]: 1} +})); +assert.commandWorked(coll.createIndex({x: 1}, {name: "x_1"})); +assert.commandWorked(coll.createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"})); +assert.commandWorked( + coll.createIndex({[timeField]: 1, [metaField]: 1}, + {name: "time_1_meta_1_partial_metric", partialFilterExpression: {x: 1}})); + rst.stopSet(); -}());
\ No newline at end of file +}()); diff --git a/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js b/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js index 4889af415e6..8ada752669a 100644 --- a/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js +++ b/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js @@ -117,33 +117,33 @@ checkIndexForDowngrade(lastContinuousFCV, true, false); // Partial indexes are not supported in versions earlier than v5.2. assert.commandWorked( - coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}})); + coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastLTSFCV, false, false); assert.commandWorked( - coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}})); + coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastContinuousFCV, true, false); assert.commandWorked( - coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}})); + coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastLTSFCV, false, false); assert.commandWorked( - coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}})); + coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastContinuousFCV, true, false); -assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}})); +assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastLTSFCV, false, false); -assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}})); +assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastContinuousFCV, true, false); -assert.commandWorked(coll.createIndex({[metaFieldName]: 1, x: 1}, - {partialFilterExpression: {x: {$type: "number"}}})); +assert.commandWorked( + coll.createIndex({[metaFieldName]: 1, x: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastLTSFCV, false, false); -assert.commandWorked(coll.createIndex({x: 1, [metaFieldName]: 1}, - {partialFilterExpression: {x: {$type: "number"}}})); +assert.commandWorked( + coll.createIndex({x: 1, [metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}})); checkIndexForDowngrade(lastContinuousFCV, true, false); MongoRunner.stopMongod(conn); diff --git a/jstests/sharding/timeseries_query.js b/jstests/sharding/timeseries_query.js index f350522cf6f..4cf3b1c28c5 100644 --- a/jstests/sharding/timeseries_query.js +++ b/jstests/sharding/timeseries_query.js @@ -291,14 +291,13 @@ function runQuery( expectedShards: [otherShard.shardName] }); - // OR queries are currently not re-written. So expect the query to be sent to both the shards, - // and do a full collection scan on each shard. + // OR queries are rewritten if all their arguments are. runQuery({ query: {$or: [{time: ISODate("2019-11-11")}, {time: ISODate("2019-11-12")}]}, expectedDocs: 2, - expectedShards: [primaryShard.shardName, otherShard.shardName], - expectQueryRewrite: false, - expectCollScan: true, + expectedShards: [primaryShard.shardName], + expectQueryRewrite: true, + expectCollScan: false, }); assert(coll.drop()); diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index eb18ed1212c..8ed4e53ae99 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -2017,8 +2017,8 @@ Status CollectionImpl::prepareForIndexBuild(OperationContext* opCtx, if (getTimeseriesOptions() && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() && serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() && - timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*getTimeseriesOptions(), - spec->infoObj())) { + timeseries::doesBucketsIndexIncludeMeasurement( + opCtx, ns(), *getTimeseriesOptions(), spec->infoObj())) { invariant(_metadata->timeseriesBucketsMayHaveMixedSchemaData); if (*_metadata->timeseriesBucketsMayHaveMixedSchemaData) { LOGV2(6057502, diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 09ae09ed303..093a1055e76 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -582,19 +582,12 @@ Status _checkValidFilterExpressions(const MatchExpression* expression, } return Status::OK(); case MatchExpression::GEO: - if (timeseriesMetricIndexesFeatureFlagEnabled) { - return Status::OK(); - } - return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "Expression not supported in partial index: " - << expression->debugString()); case MatchExpression::INTERNAL_BUCKET_GEO_WITHIN: - if (timeseriesMetricIndexesFeatureFlagEnabled) { - return Status::OK(); - } - return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "Expression not supported in partial index: " - << expression->debugString()); + case MatchExpression::INTERNAL_EXPR_EQ: + case MatchExpression::INTERNAL_EXPR_LT: + case MatchExpression::INTERNAL_EXPR_LTE: + case MatchExpression::INTERNAL_EXPR_GT: + case MatchExpression::INTERNAL_EXPR_GTE: case MatchExpression::MATCH_IN: if (timeseriesMetricIndexesFeatureFlagEnabled) { return Status::OK(); diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 91d8f1224e3..2ffc1ad92b0 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -275,7 +275,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init( boost::optional<TimeseriesOptions> options = collection->getTimeseriesOptions(); if (options && serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() && - timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*options, info)) { + timeseries::doesBucketsIndexIncludeMeasurement( + opCtx, collection->ns(), *options, info)) { invariant(collection->getTimeseriesBucketsMayHaveMixedSchemaData()); _containsIndexBuildOnTimeseriesMeasurement = true; } diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index 7b3a65167de..9b59a40ee76 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -609,18 +609,20 @@ private: // in 5.2 and up. If the user tries to downgrade the cluster to an // earlier version, they must first remove all incompatible secondary // indexes on time-series measurements. - uassert(ErrorCodes::CannotDowngrade, - str::stream() - << "Cannot downgrade the cluster when there are secondary " - "indexes on time-series measurements present. Drop all " - "secondary indexes on time-series measurements before " - "downgrading. First detected incompatible index name: '" - << indexEntry->descriptor()->indexName() - << "' on collection: '" - << collection->ns().getTimeseriesViewNamespace() << "'", - timeseries::isBucketsIndexSpecCompatibleForDowngrade( - *collection->getTimeseriesOptions(), - indexEntry->descriptor()->infoObj())); + uassert( + ErrorCodes::CannotDowngrade, + str::stream() + << "Cannot downgrade the cluster when there are secondary " + "indexes on time-series measurements present, or when there " + "are partial indexes on a time-series collection. Drop all " + "secondary indexes on time-series measurements, and all " + "partial indexes on time-series collections, before " + "downgrading. First detected incompatible index name: '" + << indexEntry->descriptor()->indexName() << "' on collection: '" + << collection->ns().getTimeseriesViewNamespace() << "'", + timeseries::isBucketsIndexSpecCompatibleForDowngrade( + *collection->getTimeseriesOptions(), + indexEntry->descriptor()->infoObj())); if (auto filter = indexEntry->getFilterExpression()) { auto status = IndexCatalogImpl::checkValidFilterExpressions( diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 1a920757dad..89c4d304015 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -58,6 +58,7 @@ env.Library( ], LIBDEPS = [ "$BUILD_DIR/mongo/db/matcher/expressions", + "$BUILD_DIR/mongo/db/timeseries/timeseries_options", "document_value/document_value", ], LIBDEPS_PRIVATE = [ diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index f663b49f46d..2627b680e3c 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -36,12 +36,17 @@ #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" +#include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/expression_tree.h" +#include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/timeseries/timeseries_constants.h" +#include "mongo/db/timeseries/timeseries_options.h" namespace mongo { +using IneligiblePredicatePolicy = BucketSpec::IneligiblePredicatePolicy; + bool BucketSpec::fieldIsComputed(StringData field) const { return std::any_of(computedMetaProjFields.begin(), computedMetaProjFields.end(), [&](auto& s) { return s == field || expression::isPathPrefixOf(field, s) || @@ -102,6 +107,42 @@ auto constructObjectIdValue(const BSONElement& rhs, int bucketMaxSpanSeconds) { MONGO_UNREACHABLE_TASSERT(5756800); } +/** + * Makes a disjunction of the given predicates. + * + * - The result is non-null; it may be an OrMatchExpression with zero children. + * - Any trivially-false arguments are omitted. + * - If only one argument is nontrivial, returns that argument rather than adding an extra + * OrMatchExpression around it. + */ +std::unique_ptr<MatchExpression> makeOr(std::vector<std::unique_ptr<MatchExpression>> predicates) { + std::vector<std::unique_ptr<MatchExpression>> nontrivial; + for (auto&& p : predicates) { + if (!p->isTriviallyFalse()) + nontrivial.push_back(std::move(p)); + } + + if (nontrivial.size() == 1) + return std::move(nontrivial[0]); + + return std::make_unique<OrMatchExpression>(std::move(nontrivial)); +} + +std::unique_ptr<MatchExpression> handleIneligible(IneligiblePredicatePolicy policy, + const MatchExpression* matchExpr, + StringData message) { + switch (policy) { + case IneligiblePredicatePolicy::kError: + uasserted( + 5916301, + "Error translating non-metadata time-series predicate to operate on buckets: " + + message + ": " + matchExpr->serialize().toString()); + case IneligiblePredicatePolicy::kIgnore: + return nullptr; + } + MONGO_UNREACHABLE_TASSERT(5916307); +} + /* * Creates a predicate that ensures that if there exists a subpath of matchExprPath such that the * type of `control.min.subpath` is not the same as `control.max.subpath` then we will match that @@ -118,7 +159,7 @@ std::unique_ptr<MatchExpression> createTypeEqualityPredicate( std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates; if (assumeNoMixedSchemaData) - return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates)); + return makeOr(std::move(typeEqualityPredicates)); FieldPath matchExprField(matchExprPath); using namespace timeseries; @@ -158,7 +199,7 @@ std::unique_ptr<MatchExpression> createTypeEqualityPredicate( pExpCtx.get(), maxPath, pExpCtx->variablesParseState))))), pExpCtx)); } - return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates)); + return makeOr(std::move(typeEqualityPredicates)); } std::unique_ptr<MatchExpression> createComparisonPredicate( @@ -167,7 +208,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( int bucketMaxSpanSeconds, ExpressionContext::CollationMatchesDefault collationMatchesDefault, boost::intrusive_ptr<ExpressionContext> pExpCtx, - bool assumeNoMixedSchemaData) { + bool haveComputedMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy) { using namespace timeseries; const auto matchExprPath = matchExpr->path(); const auto matchExprData = matchExpr->getData(); @@ -176,39 +219,59 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( // MatchExpressions use a comparator that treats field-order as significant. Because of this we // will not perform this optimization on queries with operands of compound types. if (matchExprData.type() == BSONType::Object || matchExprData.type() == BSONType::Array) - return nullptr; + return handleIneligible(policy, matchExpr, "operand can't be an object or array"_sd); // MatchExpressions have special comparison semantics regarding null, in that {$eq: null} will // match all documents where the field is either null or missing. Because this is different // from both the comparison semantics that InternalExprComparison expressions and the control's // min and max fields use, we will not perform this optimization on queries with null operands. if (matchExprData.type() == BSONType::jstNULL) - return nullptr; + return handleIneligible(policy, matchExpr, "can't handle {$eq: null}"_sd); // The control field's min and max are chosen based on the collation of the collection. If the // query's collation does not match the collection's collation and the query operand is a // string or compound type (skipped above) we will not perform this optimization. if (collationMatchesDefault == ExpressionContext::CollationMatchesDefault::kNo && matchExprData.type() == BSONType::String) { - return nullptr; + return handleIneligible( + policy, matchExpr, "can't handle string comparison with a non-default collation"_sd); } - // We must avoid mapping predicates on the meta field onto the control field. + // We must avoid mapping predicates on the meta field onto the control field. These should be + // mapped to the meta field instead. + // + // You might think these were handled earlier, by splitting the match expression into a + // metadata-only part, and measurement/time-only part. However, splitting a $match into two + // sequential $matches only works when splitting a conjunction. A predicate like + // {$or: [ {a: 5}, {meta.b: 5} ]} cannot be split, and can't be metadata-only, so we have to + // handle it here. if (bucketSpec.metaField() && (matchExprPath == bucketSpec.metaField().get() || - expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) - return nullptr; + expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) { + + if (haveComputedMetaField) + return handleIneligible(policy, matchExpr, "can't handle a computed meta field"); + + auto result = matchExpr->shallowClone(); + expression::applyRenamesToExpression( + result.get(), + {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}}); + return result; + } // We must avoid mapping predicates on fields computed via $addFields or a computed $project. if (bucketSpec.fieldIsComputed(matchExprPath.toString())) { - return nullptr; + return handleIneligible(policy, matchExpr, "can't handle a computed field"); } const auto isTimeField = (matchExprPath == bucketSpec.timeField()); if (isTimeField && matchExprData.type() != BSONType::Date) { // Users are not allowed to insert non-date measurements into time field. So this query // would not match anything. We do not need to optimize for this case. - return nullptr; + return handleIneligible( + policy, + matchExpr, + "This predicate will never be true, because the time field always contains a Date"); } BSONObj minTime; @@ -255,7 +318,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + : makeOr(makeVector<std::unique_ptr<MatchExpression>>( makePredicate(MatchExprPredicate<InternalExprLTEMatchExpression>( minPath, matchExprData), MatchExprPredicate<InternalExprGTEMatchExpression>( @@ -284,7 +347,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + : makeOr(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData), createTypeEqualityPredicate( pExpCtx, matchExprPath, assumeNoMixedSchemaData))); @@ -310,7 +373,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + : makeOr(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData), createTypeEqualityPredicate( pExpCtx, matchExprPath, assumeNoMixedSchemaData))); @@ -335,7 +398,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<LTMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + : makeOr(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData), createTypeEqualityPredicate( pExpCtx, matchExprPath, assumeNoMixedSchemaData))); @@ -360,7 +423,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<LTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + : makeOr(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData), createTypeEqualityPredicate( pExpCtx, matchExprPath, assumeNoMixedSchemaData))); @@ -380,19 +443,25 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( int bucketMaxSpanSeconds, ExpressionContext::CollationMatchesDefault collationMatchesDefault, const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - bool assumeNoMixedSchemaData) { + bool haveComputedMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy) { + + tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr); + if (matchExpr->matchType() == MatchExpression::AND) { auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr); auto andMatchExpr = std::make_unique<AndMatchExpression>(); for (size_t i = 0; i < nextAnd->numChildren(); i++) { - if (auto child = createPredicatesOnBucketLevelField( - nextAnd->getChild(i), - bucketSpec, - bucketMaxSpanSeconds, - collationMatchesDefault, - pExpCtx, - assumeNoMixedSchemaData)) { + if (auto child = createPredicatesOnBucketLevelField(nextAnd->getChild(i), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + assumeNoMixedSchemaData, + policy)) { andMatchExpr->add(std::move(child)); } } @@ -402,15 +471,55 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( if (andMatchExpr->numChildren() > 0) { return andMatchExpr; } + + // No error message here: an empty AND is valid. + return nullptr; + } else if (matchExpr->matchType() == MatchExpression::OR) { + // Given {$or: [A, B]}, suppose A, B can be pushed down as A', B'. + // If an event matches {$or: [A, B]} then either: + // - it matches A, which means any bucket containing it matches A' + // - it matches B, which means any bucket containing it matches B' + // So {$or: [A', B']} will capture all the buckets we need to satisfy {$or: [A, B]}. + auto nextOr = static_cast<const OrMatchExpression*>(matchExpr); + auto result = std::make_unique<OrMatchExpression>(); + + bool alwaysTrue = false; + for (size_t i = 0; i < nextOr->numChildren(); i++) { + auto child = createPredicatesOnBucketLevelField(nextOr->getChild(i), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + assumeNoMixedSchemaData, + policy); + if (child) { + result->add(std::move(child)); + } else { + // Since this argument is always-true, the entire OR is always-true. + alwaysTrue = true; + + // Only short circuit if we're uninterested in reporting errors. + if (policy == IneligiblePredicatePolicy::kIgnore) + break; + } + } + if (alwaysTrue) + return nullptr; + + // No special case for an empty OR: returning nullptr would be incorrect because it + // means 'always-true', here. + return result; } else if (ComparisonMatchExpression::isComparisonMatchExpression(matchExpr) || ComparisonMatchExpressionBase::isInternalExprComparison(matchExpr->matchType())) { - return createComparisonPredicate( - static_cast<const ComparisonMatchExpressionBase*>(matchExpr), - bucketSpec, - bucketMaxSpanSeconds, - pExpCtx->collationMatchesDefault, - pExpCtx, - assumeNoMixedSchemaData); + return createComparisonPredicate(static_cast<const ComparisonMatchExpression*>(matchExpr), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + assumeNoMixedSchemaData, + policy); } else if (matchExpr->matchType() == MatchExpression::GEO) { auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression(); if (geoExpr.getPred() == GeoExpression::WITHIN || @@ -418,9 +527,115 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( return std::make_unique<InternalBucketGeoWithinMatchExpression>( geoExpr.getGeometryPtr(), geoExpr.getField()); } + } else if (matchExpr->matchType() == MatchExpression::EXISTS) { + if (assumeNoMixedSchemaData) { + // We know that every field that appears in an event will also appear in the min/max. + auto result = std::make_unique<AndMatchExpression>(); + result->add(std::make_unique<ExistsMatchExpression>( + std::string{timeseries::kControlMinFieldNamePrefix} + matchExpr->path())); + result->add(std::make_unique<ExistsMatchExpression>( + std::string{timeseries::kControlMaxFieldNamePrefix} + matchExpr->path())); + return result; + } else { + // At time of writing, we only pass 'kError' when creating a partial index, and + // we know the collection will have no mixed-schema buckets by the time the index is + // done building. + tassert(5916305, + "Can't push down {$exists: true} when the collection may have mixed-schema " + "buckets.", + policy != IneligiblePredicatePolicy::kError); + return nullptr; + } + } else if (matchExpr->matchType() == MatchExpression::MATCH_IN) { + // {a: {$in: [X, Y]}} is equivalent to {$or: [ {a: X}, {a: Y} ]}. + // {$in: [/a/]} is interpreted as a regex query. + // {$in: [null]} matches any nullish value. + const auto* inExpr = static_cast<const InMatchExpression*>(matchExpr); + if (inExpr->hasRegex()) + return handleIneligible( + policy, matchExpr, "can't handle $regex predicate (inside $in predicate)"); + if (inExpr->hasNull()) + return handleIneligible( + policy, matchExpr, "can't handle {$eq: null} predicate (inside $in predicate)"); + + auto result = std::make_unique<OrMatchExpression>(); + for (auto&& elem : inExpr->getEqualities()) { + // If inExpr is {$in: [X, Y]} then the elems are '0: X' and '1: Y'. + auto eq = std::make_unique<EqualityMatchExpression>( + inExpr->path(), elem, nullptr /*annotation*/, inExpr->getCollator()); + result->add(createComparisonPredicate(eq.get(), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + assumeNoMixedSchemaData, + policy)); + } + return result; } + return handleIneligible(policy, matchExpr, "can't handle this predicate"); +} + +BSONObj BucketSpec::pushdownPredicate( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const TimeseriesOptions& tsOptions, + ExpressionContext::CollationMatchesDefault collationMatchesDefault, + const BSONObj& predicate, + bool haveComputedMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy) { + + auto allowedFeatures = MatchExpressionParser::kDefaultSpecialFeatures; + auto matchExpr = uassertStatusOK( + MatchExpressionParser::parse(predicate, expCtx, ExtensionsCallbackNoop(), allowedFeatures)); + + auto metaField = haveComputedMetaField ? boost::none : tsOptions.getMetaField(); + auto [metaOnlyPredicate, metricPredicate] = [&] { + if (!metaField) { + // If there's no metadata field, then none of the predicates are metadata-only + // predicates. + return std::make_pair(std::unique_ptr<MatchExpression>(nullptr), std::move(matchExpr)); + } - return nullptr; + return expression::splitMatchExpressionBy( + std::move(matchExpr), + {metaField->toString()}, + {{metaField->toString(), timeseries::kBucketMetaFieldName.toString()}}, + expression::isOnlyDependentOn); + }(); + + int maxSpanSeconds = tsOptions.getBucketMaxSpanSeconds() + ? *tsOptions.getBucketMaxSpanSeconds() + : timeseries::getMaxSpanSecondsFromGranularity(tsOptions.getGranularity()); + + std::unique_ptr<MatchExpression> bucketMetricPredicate = metricPredicate + ? createPredicatesOnBucketLevelField( + metricPredicate.get(), + BucketSpec{ + tsOptions.getTimeField().toString(), + metaField.map([](StringData s) { return s.toString(); }), + // Since we are operating on a collection, not a query-result, there are no + // inclusion/exclusion projections we need to apply to the buckets before + // unpacking. + {}, + // And there are no computed projections. + {}, + }, + maxSpanSeconds, + collationMatchesDefault, + expCtx, + haveComputedMetaField, + assumeNoMixedSchemaData, + policy) + : nullptr; + + BSONObjBuilder result; + if (metaOnlyPredicate) + metaOnlyPredicate->serialize(&result); + if (bucketMetricPredicate) + bucketMetricPredicate->serialize(&result); + return result.obj(); } class BucketUnpacker::UnpackingImpl { diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h index 05223ace00d..bd1894c2ac1 100644 --- a/src/mongo/db/exec/bucket_unpacker.h +++ b/src/mongo/db/exec/bucket_unpacker.h @@ -69,6 +69,17 @@ public: // Returns whether 'field' depends on a pushed down $addFields or computed $project. bool fieldIsComputed(StringData field) const; + // Says what to do when an event-level predicate cannot be mapped to a bucket-level predicate. + enum class IneligiblePredicatePolicy { + // When optimizing a query, it's fine if some predicates can't be pushed down. We'll still + // run the predicate after unpacking, so the results will be correct. + kIgnore, + // When creating a partial index, it's misleading if we can't handle a predicate: the user + // expects every predicate in the partialFilterExpression to contribute, somehow, to making + // the index smaller. + kError, + }; + /** * Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and * attempts to map it to a new predicate on the 'control' field. For example, the predicate @@ -84,6 +95,10 @@ public: * ]} * * If the provided predicate is ineligible for this mapping, the function will return a nullptr. + * This should be interpreted as an always-true predicate. + * + * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it + * returns null. When using IneligiblePredicatePolicy::kError it raises a user error. */ static std::unique_ptr<MatchExpression> createPredicatesOnBucketLevelField( const MatchExpression* matchExpr, @@ -91,7 +106,37 @@ public: int bucketMaxSpanSeconds, ExpressionContext::CollationMatchesDefault collationMatchesDefault, const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - bool assumeNoMixedSchemaData); + bool haveComputedMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy); + + /** + * Converts an event-level predicate to a bucket-level predicate, such that + * + * {$unpackBucket ...} {$match: <event-level predicate>} + * + * gives the same result as + * + * {$match: <bucket-level predict>} {$unpackBucket ...} {$match: <event-level predicate>} + * + * This means the bucket-level predicate must include every bucket that might contain an event + * matching the event-level predicate. + * + * This helper is used when creating a partial index on a time-series collection: logically, + * we index only events that match the event-level partialFilterExpression, but physically we + * index any bucket that matches the bucket-level partialFilterExpression. + * + * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it + * returns null. When using IneligiblePredicatePolicy::kError it raises a user error. + */ + static BSONObj pushdownPredicate( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const TimeseriesOptions& tsOptions, + ExpressionContext::CollationMatchesDefault collationMatchesDefault, + const BSONObj& predicate, + bool haveComputedMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy); // The set of field names in the data region that should be included or excluded. std::set<std::string> fieldSet; diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index c0595c7f825..c3d0e777c1f 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -27,6 +27,8 @@ * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + #include "mongo/platform/basic.h" #include "mongo/base/checked_cast.h" @@ -42,6 +44,7 @@ #include "mongo/db/pipeline/dependencies.h" #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/collation/collator_interface.h" +#include "mongo/logv2/log.h" namespace mongo { @@ -133,9 +136,88 @@ bool _isSubsetOf(const ComparisonMatchExpression* lhs, const ComparisonMatchExpr } } +bool _isSubsetOfInternalExpr(const ComparisonMatchExpressionBase* lhs, + const ComparisonMatchExpressionBase* rhs) { + // An expression can only match a subset of the documents matched by another if they are + // comparing the same field. + if (lhs->path() != rhs->path()) { + return false; + } + + const BSONElement lhsData = lhs->getData(); + const BSONElement rhsData = rhs->getData(); + + if (!CollatorInterface::collatorsMatch(lhs->getCollator(), rhs->getCollator()) && + CollationIndexKey::isCollatableType(lhsData.type())) { + return false; + } + + int cmp = lhsData.woCompare( + rhsData, BSONElement::ComparisonRules::kConsiderFieldName, rhs->getCollator()); + + // Check whether the two expressions are equivalent. + if (lhs->matchType() == rhs->matchType() && cmp == 0) { + return true; + } + + switch (rhs->matchType()) { + case MatchExpression::INTERNAL_EXPR_LT: + case MatchExpression::INTERNAL_EXPR_LTE: + switch (lhs->matchType()) { + case MatchExpression::INTERNAL_EXPR_LT: + case MatchExpression::INTERNAL_EXPR_LTE: + case MatchExpression::INTERNAL_EXPR_EQ: + // + if (rhs->matchType() == MatchExpression::LTE) { + return cmp <= 0; + } + return cmp < 0; + default: + return false; + } + case MatchExpression::INTERNAL_EXPR_GT: + case MatchExpression::INTERNAL_EXPR_GTE: + switch (lhs->matchType()) { + case MatchExpression::INTERNAL_EXPR_GT: + case MatchExpression::INTERNAL_EXPR_GTE: + case MatchExpression::INTERNAL_EXPR_EQ: + if (rhs->matchType() == MatchExpression::GTE) { + return cmp >= 0; + } + return cmp > 0; + default: + return false; + } + default: + return false; + } +} + /** * Returns true if the documents matched by 'lhs' are a subset of the documents matched by * 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise. + * + * This overload handles the $_internalExpr family of comparisons. + */ +bool _isSubsetOfInternalExpr(const MatchExpression* lhs, const ComparisonMatchExpressionBase* rhs) { + // An expression can only match a subset of the documents matched by another if they are + // comparing the same field. + if (lhs->path() != rhs->path()) { + return false; + } + + if (ComparisonMatchExpressionBase::isInternalExprComparison(lhs->matchType())) { + return _isSubsetOfInternalExpr(static_cast<const ComparisonMatchExpressionBase*>(lhs), rhs); + } + + return false; +} + +/** + * Returns true if the documents matched by 'lhs' are a subset of the documents matched by + * 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise. + * + * This overload handles comparisons such as $lt, $eq, $gte, but not $_internalExprLt, etc. */ bool _isSubsetOf(const MatchExpression* lhs, const ComparisonMatchExpression* rhs) { // An expression can only match a subset of the documents matched by another if they are @@ -508,6 +590,10 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { return _isSubsetOf(lhs, static_cast<const ComparisonMatchExpression*>(rhs)); } + if (ComparisonMatchExpressionBase::isInternalExprComparison(rhs->matchType())) { + return _isSubsetOfInternalExpr(lhs, static_cast<const ComparisonMatchExpressionBase*>(rhs)); + } + if (rhs->matchType() == MatchExpression::EXISTS) { return _isSubsetOf(lhs, static_cast<const ExistsMatchExpression*>(rhs)); } @@ -555,7 +641,7 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p }); } -bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) { +bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& pathSet) { // Any expression types that do not have renaming implemented cannot have their independence // evaluated here. See applyRenamesToExpression(). if (!hasOnlyRenameableMatchExpressionChildren(expr)) { @@ -564,11 +650,11 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& auto depsTracker = DepsTracker{}; expr.addDependencies(&depsTracker); - return std::all_of( - depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) { - auto fieldRef = FieldRef{field}; - return !fieldRef.empty() && roots.find(fieldRef.getPart(0).toString()) != roots.end(); + return std::all_of(depsTracker.fields.begin(), depsTracker.fields.end(), [&](auto&& field) { + return std::any_of(pathSet.begin(), pathSet.end(), [&](auto&& path) { + return path == field || isPathPrefixOf(path, field); }); + }); } std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy( diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index fb703066206..31bc8f9e227 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -93,9 +93,9 @@ bool isSplittableBy(const MatchExpression& expr, const std::set<std::string>& pa bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet); /** - * Determine if 'expr' is reliant only upon paths rooted in 'roots'. + * Determine if 'expr' is reliant only upon paths from 'pathSet'. */ -bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots); +bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& pathSet); /** * Returns whether the path represented by 'first' is an prefix of the path represented by 'second'. diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h index fdcc14e10b0..015dae13010 100644 --- a/src/mongo/db/matcher/expression_leaf.h +++ b/src/mongo/db/matcher/expression_leaf.h @@ -702,6 +702,10 @@ public: return _hasNull; } + bool hasRegex() const { + return !_regexes.empty(); + } + bool hasEmptyArray() const { return _hasEmptyArray; } diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index fd3b9ecbb49..0492feff232 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -356,6 +356,7 @@ pipelineEnv.Library( '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/index_entry_comparison', '$BUILD_DIR/mongo/db/storage/storage_options', + '$BUILD_DIR/mongo/db/timeseries/timeseries_options', '$BUILD_DIR/mongo/db/update/update_document_diff', '$BUILD_DIR/mongo/db/views/resolved_view', '$BUILD_DIR/mongo/s/is_mongos', @@ -375,7 +376,6 @@ pipelineEnv.Library( '$BUILD_DIR/mongo/db/repl/image_collection_entry', '$BUILD_DIR/mongo/db/sorter/sorter_idl', '$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util', - '$BUILD_DIR/mongo/db/timeseries/timeseries_options', '$BUILD_DIR/mongo/rpc/command_status', ] ) diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 055b7be6fa6..1fa5ff224dd 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -58,6 +58,7 @@ #include "mongo/db/pipeline/lite_parsed_document_source.h" #include "mongo/db/query/util/make_data_structure.h" #include "mongo/db/timeseries/timeseries_constants.h" +#include "mongo/db/timeseries/timeseries_options.h" #include "mongo/logv2/log.h" #include "mongo/util/duration.h" #include "mongo/util/time_support.h" @@ -511,12 +512,15 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje std::unique_ptr<MatchExpression> DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( const MatchExpression* matchExpr) const { - return BucketSpec::createPredicatesOnBucketLevelField(matchExpr, - _bucketUnpacker.bucketSpec(), - _bucketMaxSpanSeconds, - pExpCtx->collationMatchesDefault, - pExpCtx, - _assumeNoMixedSchemaData); + return BucketSpec::createPredicatesOnBucketLevelField( + matchExpr, + _bucketUnpacker.bucketSpec(), + _bucketMaxSpanSeconds, + pExpCtx->collationMatchesDefault, + pExpCtx, + haveComputedMetaField(), + _assumeNoMixedSchemaData, + BucketSpec::IneligiblePredicatePolicy::kIgnore); } std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> @@ -645,6 +649,12 @@ DocumentSourceInternalUnpackBucket::rewriteGroupByMinMax(Pipeline::SourceContain return {}; } +bool DocumentSourceInternalUnpackBucket::haveComputedMetaField() const { + return _bucketUnpacker.bucketSpec().metaField() && + _bucketUnpacker.bucketSpec().fieldIsComputed( + _bucketUnpacker.bucketSpec().metaField().get()); +} + Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -655,8 +665,7 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi // Some optimizations may not be safe to do if we have computed the metaField via an $addFields // or a computed $project. We won't do those optimizations if 'haveComputedMetaField' is true. - bool haveComputedMetaField = _bucketUnpacker.bucketSpec().metaField() && - _bucketUnpacker.bucketSpec().fieldIsComputed(_bucketUnpacker.bucketSpec().metaField().get()); + bool haveComputedMetaField = this->haveComputedMetaField(); // Before any other rewrites for the current stage, consider reordering with $sort. if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get())) { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 0c5a17b4e23..bebe67b4a20 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -190,6 +190,7 @@ public: private: GetNextResult doGetNext() final; + bool haveComputedMetaField() const; // If buckets contained a mixed type schema along some path, we have to push down special // predicates in order to ensure correctness. diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp index d3a3030be8d..ffe291dcbd0 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp @@ -57,8 +57,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -78,8 +78,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -99,8 +99,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -120,8 +120,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -142,8 +142,50 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}}," "{'control.max.a': {$_internalExprGte: 1}}]}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); +} + +TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, + OptimizeMapsINPredicatesOnControlField) { + auto pipeline = + Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " + "'time', bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {a: {$in: [1, 2]}}}")), + getExpCtx()); + auto& container = pipeline->getSources(); + + ASSERT_EQ(pipeline->getSources().size(), 2U); + + auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get()); + auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + ASSERT(predicate); + auto expected = fromjson( + "{$or: [" + " {$or: [" + " {$and: [" + " {'control.min.a': {$_internalExprLte: 1}}," + " {'control.max.a': {$_internalExprGte: 1}}" + " ]}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}," + " {$or: [" + " {$and: [" + " {'control.min.a': {$_internalExprLte: 2}}," + " {'control.max.a': {$_internalExprGte: 2}}" + " ]}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}" + "]}"); + ASSERT_BSONOBJ_EQ(predicate->serialize(true), expected); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -168,8 +210,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -194,8 +236,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -220,8 +262,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -246,8 +288,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -273,8 +315,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}}," "{'control.max.a': {$_internalExprGte: 1}}]}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -294,11 +336,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," - "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]}," "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -335,9 +377,13 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$or: [ {'control.max.b': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," - "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}")); + fromjson("{$or: [" + " {'control.max.b': {$_internalExprGt: 1}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.b\" ]}," + " {$type: [ \"$control.max.b\" ]}" + " ]}}" + "]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -358,14 +404,132 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," - "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]}," "{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," - "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]}," "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]} ]}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}")); +} + +TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, + OptimizeMapsOrWithPushableChildrenOnControlField) { + auto pipeline = + Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " + "'time', bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {$or: [{b: {$gt: 1}}, {a: {$lt: 5}}]}}")), + getExpCtx()); + auto& container = pipeline->getSources(); + + ASSERT_EQ(pipeline->getSources().size(), 2U); + + auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get()); + auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + ASSERT(predicate); + ASSERT_BSONOBJ_EQ(predicate->serialize(true), + fromjson("{$or: [" + " {$or: [" + " {'control.max.b': {$_internalExprGt: 1}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.b\" ]}," + " {$type: [ \"$control.max.b\" ]}" + " ]}}" + " ]}," + " {$or: [" + " {'control.min.a': {$_internalExprLt: 5}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}" + "]}")); +} + +TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, + OptimizeDoesNotMapOrWithUnpushableChildrenOnControlField) { + auto pipeline = + Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " + "'time', bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {$or: [{b: {$ne: 1}}, {a: {$ne: 5}}]}}")), + getExpCtx()); + auto& container = pipeline->getSources(); + + ASSERT_EQ(pipeline->getSources().size(), 2U); + + auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get()); + auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + ASSERT(predicate == nullptr); +} + +TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, + OptimizeDoesNotMapOrWithPushableAndUnpushableChildrenOnControlField) { + auto pipeline = + Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " + "'time', bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {$or: [{b: {$gt: 1}}, {a: {$ne: 5}}]}}")), + getExpCtx()); + auto& container = pipeline->getSources(); + + ASSERT_EQ(pipeline->getSources().size(), 2U); + + auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get()); + auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + // When a predicate can't be pushed down, it's the same as pushing down a trivially-true + // predicate. So when any child of an $or can't be pushed down, we could generate something like + // {$or: [ ... {$alwaysTrue: {}}, ... ]}, but then we might as well not push down the whole $or. + ASSERT(predicate == nullptr); +} + +TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, + OptimizeMapsNestedOrWithPushableChildrenOnControlField) { + auto pipeline = Pipeline::parse( + makeVector( + fromjson("{$_internalUnpackBucket: {exclude: [], timeField: 'time', " + "bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {$or: [{b: {$gte: 2}}, {$or: [{b: {$gt: 1}}, {a: {$lt: 5}}]}]}}")), + getExpCtx()); + auto& container = pipeline->getSources(); + + ASSERT_EQ(pipeline->getSources().size(), 2U); + + auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get()); + auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + ASSERT_BSONOBJ_EQ(predicate->serialize(true), + fromjson("{$or: [" + " {$or: [" + " {'control.max.b': {$_internalExprGte: 2}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.b\" ]}," + " {$type: [ \"$control.max.b\" ]}" + " ]}}" + " ]}," + " {$or: [" + " {$or: [" + " {'control.max.b': {$_internalExprGt: 1}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.b\" ]}," + " {$type: [ \"$control.max.b\" ]}" + " ]}}" + " ]}," + " {$or: [" + " {'control.min.a': {$_internalExprLt: 5}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}" + " ]}" + "]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -410,14 +574,14 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ( stages[0], fromjson("{$match: {$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," - "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]}," "{$or: [ {'control.max.c': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.c\" ]}," - "{$type: [ \"$control.max.c\" ]} ]}} ]} ]}," + "{$expr: {$ne: [ {$type: [ \"$control.min.c\" ]}," + "{$type: [ \"$control.max.c\" ]} ]}} ]}," "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}")); + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}")); ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj); ASSERT_BSONOBJ_EQ(stages[2], matchObj); } @@ -491,7 +655,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + // Meta predicates are mapped to the meta field, not the control min/max fields. + ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{meta: {$gt: 5}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -509,7 +674,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + // Meta predicates are mapped to the meta field, not the control min/max fields. + ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{'meta.foo': {$gt: 5}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -528,9 +694,16 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + fromjson("{$and: [" + " {$or: [" + " {'control.max.a': {$_internalExprGt: 1}}," + " {$expr: {$ne: [" + " {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}," + " {meta: {$eq: 5}}" + "]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePredicatesOnId) { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp index 7bf4bde0eb0..cfe92c09f5f 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp @@ -84,26 +84,45 @@ TEST_F(OptimizePipeline, MetaMatchPushedDown) { ASSERT_BSONOBJ_EQ(unpack, serialized[1]); } -TEST_F(OptimizePipeline, MixedMatchOnlyControlPredicatesPushedDown) { +TEST_F(OptimizePipeline, MixedMatchOr) { auto unpack = fromjson( "{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta', " "bucketMaxSpanSeconds: 3600}}"); auto match = fromjson( - "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': " - "{$gt: 1}}, {y: {$lt: 1}}]}]}}"); + "{$match: {$and: [" + " {x: {$lte: 1}}," + " {$or: [" + // This $or is mixed: it contains both metadata and metric predicates. + " {'myMeta.a': {$gt: 1}}," + " {y: {$lt: 1}}" + " ]}" + "]}}"); auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx()); ASSERT_EQ(2u, pipeline->getSources().size()); pipeline->optimizePipeline(); - // We should fail to push down the $match on meta because of the $or clause. We should still be - // able to map the predicate on 'x' to a predicate on the control field. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(3u, stages.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$or: [ {'control.min.x': {$_internalExprLte: 1}}," - "{$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," - "{$type: [ \"$control.max.x\" ] } ] } } ] }}"), - stages[0].getDocument().toBson()); + auto expected = fromjson( + "{$match: {$and: [" + // Result of pushing down {x: {$lte: 1}}. + " {$or: [" + " {'control.min.x': {$_internalExprLte: 1}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," + " {$type: [ \"$control.max.x\" ]}" + " ]}}" + " ]}," + // Result of pushing down the $or predicate. + " {$or: [" + " {'meta.a': {$gt: 1}}," + " {'control.min.y': {$_internalExprLt: 1}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.y\" ]}," + " {$type: [ \"$control.max.y\" ]}" + " ]}}" + " ]}" + "]}}"); + ASSERT_BSONOBJ_EQ(expected, stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(match, stages[2].getDocument().toBson()); } @@ -113,20 +132,21 @@ TEST_F(OptimizePipeline, MixedMatchOnlyMetaMatchPushedDown) { "{$_internalUnpackBucket: { exclude: [], timeField: 'time', metaField: 'myMeta', " "bucketMaxSpanSeconds: 3600}}"); auto pipeline = Pipeline::parse( - makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$in: [1, 2, 3]}}}")), + makeVector(unpack, + fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$type: \"string\"}}}")), getExpCtx()); ASSERT_EQ(2u, pipeline->getSources().size()); pipeline->optimizePipeline(); // We should push down the $match on the metaField but not the predicate on '$a', which is - // ineligible because of the $in. + // ineligible because of the $type. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}}"), serialized[0]); ASSERT_BSONOBJ_EQ(unpack, serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$in: [1, 2, 3]}}}"), serialized[2]); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$type: [ 2 ]}}}"), serialized[2]); } TEST_F(OptimizePipeline, MultipleMatchesPushedDown) { @@ -242,11 +262,17 @@ TEST_F(OptimizePipeline, SortThenMixedMatchPushedDown) { // We should push down both the $sort and parts of the $match. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(4u, serialized.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: 'abc'}}," - "{$or: [ {'control.max.a': {$_internalExprGte: 5}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"), - serialized[0]); + auto expected = fromjson( + "{$match: {$and: [" + " {meta: {$eq: 'abc'}}," + " {$or: [" + " {'control.max.a': {$_internalExprGte: 5}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}" + "]}}"); + ASSERT_BSONOBJ_EQ(expected, serialized[0]); ASSERT_BSONOBJ_EQ(fromjson("{$sort: {meta: -1}}"), serialized[1]); ASSERT_BSONOBJ_EQ(unpack, serialized[2]); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$gte: 5}}}"), serialized[3]); @@ -437,7 +463,7 @@ TEST_F(OptimizePipeline, ComputedProjectThenMetaMatchNotPushedDown) { pipeline->optimizePipeline(); - // We should push down both the project and internalize the remaining project, but we can't + // We should both push down the project and internalize the remaining project, but we can't // push down the meta match due to the (now invalid) renaming. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp index 5e492dba424..8f31d177a53 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp @@ -261,10 +261,16 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMaps // $_internalUnpackBucket and merged. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, " - "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," - "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [" + " {meta: {$gte: 0}}," + " {meta: {$lte: 5}}," + " {$or: [" + " {'control.min.a': {$_internalExprLte: 4}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + " {$type: [ \"$control.max.a\" ]}" + " ]}}" + " ]}" + "]}}"), serialized[0]); ASSERT_BSONOBJ_EQ(unpack, serialized[1]); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]); @@ -293,8 +299,13 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, "{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta', " "bucketMaxSpanSeconds: 3600}}"); auto match = fromjson( - "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': " - "{$gt: 1}}, {y: {$lt: 1}}]}]}}"); + "{$match: {$and: [" + " {x: {$lte: 1}}," + " {$or: [" + " {'myMeta.a': {$gt: 1}}," + " {y: {$lt: 1}}" + " ]}" + "]}}"); auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx()); ASSERT_EQ(2u, pipeline->getSources().size()); @@ -304,10 +315,25 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, // map the predicate on 'x' to a predicate on the control field. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$or: [ {'control.min.x': {$_internalExprLte: 1}}," - "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," - "{$type: [ \"$control.max.x\" ]} ]}} ]} ]}}"), - serialized[0]); + auto expected = fromjson( + "{$match: {$and: [" + // Result of pushing down {x: {$lte: 1}}. + " {$or: [" + " {'control.min.x': {$_internalExprLte: 1}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," + " {$type: [ \"$control.max.x\" ]} ]}}" + " ]}," + // Result of pushing down {$or ... myMeta.a ... y ...}. + " {$or: [" + " {'meta.a': {$gt: 1}}," + " {$or: [" + " {'control.min.y': {$_internalExprLt: 1}}," + " {$expr: {$ne: [ {$type: [ \"$control.min.y\" ]}," + " {$type: [ \"$control.max.y\" ]} ]}}" + " ]}" + " ]}" + "]}}"); + ASSERT_BSONOBJ_EQ(expected, serialized[0]); ASSERT_BSONOBJ_EQ(unpack, serialized[1]); ASSERT_BSONOBJ_EQ(match, serialized[2]); } diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript index a9cc40ef985..84745946b65 100644 --- a/src/mongo/db/timeseries/SConscript +++ b/src/mongo/db/timeseries/SConscript @@ -56,6 +56,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/pipeline/pipeline', ], ) diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp index e857ab8a3a8..dc6714f9ab1 100644 --- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp +++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp @@ -33,6 +33,9 @@ #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_names.h" +#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/db/query/collation/collator_factory_interface.h" +#include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/timeseries/timeseries_constants.h" #include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h" @@ -83,12 +86,59 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, if (elem.fieldNameStringData() == IndexDescriptor::kPartialFilterExprFieldName) { if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() && serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest()) { - // Partial indexes are not supported in FCV < 5.2. isBucketsIndexSpecCompatibleForDowngrade = false; } else { uasserted(ErrorCodes::InvalidOptions, "Partial indexes are not supported on time-series collections"); } + + uassert(ErrorCodes::CannotCreateIndex, + "Partial indexes on time-series collections require FCV 5.3", + feature_flags::gTimeseriesMetricIndexes.isEnabled( + serverGlobalParams.featureCompatibility)); + BSONObj pred = elem.Obj(); + + // If the createIndexes command specifies a collation for this index, then that + // collation affects how we should interpret expressions in the partial filter + // ($gt, $lt, etc). + if (auto collatorSpec = origIndex[NewIndexSpec::kCollationFieldName]) { + uasserted( + 5916300, + std::string{"On a time-series collection, partialFilterExpression and "} + + NewIndexSpec::kCollationFieldName + " arguments are incompatible"_sd); + } + // Since no collation was specified in the command, we know the index collation will + // match the collection's collation. + auto collationMatchesDefault = ExpressionContext::CollationMatchesDefault::kYes; + + // Even though the index collation will match the collection's collation, we don't + // know whether or not that collation is simple. However, I think we can correctly + // rewrite the filter expression without knowing this... Looking up the correct + // value would require handling mongos and mongod separately. + std::unique_ptr<CollatorInterface> collator{nullptr}; + + auto expCtx = make_intrusive<ExpressionContext>(opCtx, std::move(collator), origNs); + expCtx->collationMatchesDefault = collationMatchesDefault; + + // partialFilterExpression is evaluated against a collection, so there are no + // computed fields. + bool haveComputedMetaField = false; + + // As part of building the index, we verify that the collection does not contain + // any mixed-schema buckets. So by the time the index is visible to the query + // planner, this will be true. + bool assumeNoMixedSchemaData = true; + + BSONObj bucketPred = + BucketSpec::pushdownPredicate(expCtx, + options, + collationMatchesDefault, + pred, + haveComputedMetaField, + assumeNoMixedSchemaData, + BucketSpec::IneligiblePredicatePolicy::kError); + builder.append(IndexDescriptor::kPartialFilterExprFieldName, bucketPred); + continue; } if (elem.fieldNameStringData() == IndexDescriptor::kSparseFieldName) { @@ -149,6 +199,9 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx, std::move(bucketsIndexSpecWithStatus.getValue())); continue; } + + // Any index option that's not explicitly banned, and not handled specially, we pass + // through unchanged. builder.append(elem); } diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp index 570ab781281..1dff8c06341 100644 --- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp +++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp @@ -32,6 +32,8 @@ #include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h" #include "mongo/db/index_names.h" +#include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/matcher/expression_parser.h" #include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/timeseries/timeseries_constants.h" #include "mongo/db/timeseries/timeseries_gen.h" @@ -400,11 +402,13 @@ bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeserie /*timeseriesMetricIndexesFeatureFlagEnabled=*/false) != boost::none; } -bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseriesOptions, - const BSONObj& bucketsIndex) { - if (!bucketsIndex.hasField(kKeyFieldName)) { - return false; - } +bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx, + const NamespaceString& bucketNs, + const TimeseriesOptions& timeseriesOptions, + const BSONObj& bucketsIndex) { + tassert(5916306, + str::stream() << "Index spec has no 'key': " << bucketsIndex.toString(), + bucketsIndex.hasField(kKeyFieldName)); auto timeField = timeseriesOptions.getTimeField(); auto metaField = timeseriesOptions.getMetaField(); @@ -413,22 +417,58 @@ bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseries << timeseries::kControlMinFieldNamePrefix << timeField; const std::string controlMaxTimeField = str::stream() << timeseries::kControlMaxFieldNamePrefix << timeField; + static const std::string idField = "_id"; - const BSONObj keyObj = bucketsIndex.getField(kKeyFieldName).Obj(); - for (const auto& elem : keyObj) { - if (elem.fieldNameStringData() == controlMinTimeField || - elem.fieldNameStringData() == controlMaxTimeField) { - continue; + auto isMeasurementField = [&](StringData name) -> bool { + if (name == controlMinTimeField || name == controlMaxTimeField) { + return false; } if (metaField) { - if (elem.fieldNameStringData() == timeseries::kBucketMetaFieldName || - elem.fieldNameStringData().startsWith(timeseries::kBucketMetaFieldName + ".")) { - continue; + if (name == timeseries::kBucketMetaFieldName || + name.startsWith(timeseries::kBucketMetaFieldName + ".")) { + return false; } } return true; + }; + + // Check index key. + const BSONObj keyObj = bucketsIndex.getField(kKeyFieldName).Obj(); + for (const auto& elem : keyObj) { + if (isMeasurementField(elem.fieldNameStringData())) + return true; + } + + // Check partial filter expression. + if (auto filterElem = bucketsIndex[kPartialFilterExpressionFieldName]) { + tassert(5916302, + str::stream() << "Partial filter expression is not an object: " << filterElem, + filterElem.type() == BSONType::Object); + + auto expCtx = make_intrusive<ExpressionContext>(opCtx, nullptr /* collator */, bucketNs); + + MatchExpressionParser::AllowedFeatureSet allowedFeatures = + MatchExpressionParser::kDefaultSpecialFeatures; + + // TODO SERVER-53380 convert to tassertStatusOK. + auto statusWithFilter = MatchExpressionParser::parse( + filterElem.Obj(), expCtx, ExtensionsCallbackNoop{}, allowedFeatures); + tassert(5916303, + str::stream() << "Partial filter expression failed to parse: " + << statusWithFilter.getStatus(), + statusWithFilter.isOK()); + auto filter = std::move(statusWithFilter.getValue()); + + if (!expression::isOnlyDependentOn(*filter, + {std::string{timeseries::kBucketMetaFieldName}, + controlMinTimeField, + controlMaxTimeField, + idField})) { + // Partial filter expression depends on a non-time, non-metadata field. + return true; + } } return false; diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h index f51e52db0bc..ca9fc2e1a77 100644 --- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h +++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h @@ -77,11 +77,15 @@ bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeserie const BSONObj& bucketsIndex); /** - * Returns true if 'bucketsIndex' contains a key on a measurement field, excluding the time field. - * This is helpful to detect if the 'bucketsIndex' contains a key on a field that was allowed to - * have mixed-schema data in MongoDB versions < 5.2. + * Returns true if 'bucketsIndex' uses a measurement field, excluding the time field. Checks both + * the index key and the partialFilterExpression, if present. + * + * This is helpful to detect if the 'bucketsIndex' relies on a field that was allowed to have + * mixed-schema data in MongoDB versions < 5.2. */ -bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseriesOptions, - const BSONObj& bucketsIndex); +bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx, + const NamespaceString& bucketNs, + const TimeseriesOptions& timeseriesOptions, + const BSONObj& bucketsIndex); } // namespace mongo::timeseries diff --git a/src/mongo/shell/types.js b/src/mongo/shell/types.js index 571f40d8af6..6a0f78dad6a 100644 --- a/src/mongo/shell/types.js +++ b/src/mongo/shell/types.js @@ -276,6 +276,8 @@ Object.extend = function(dst, src, deep) { eval("v = " + tojson(v)); } else if ("floatApprox" in v) { // convert NumberLong properly eval("v = " + tojson(v)); + } else if (v.constructor === Date) { // convert Date properly + eval("v = " + tojson(v)); } else { v = Object.extend(typeof (v.length) == "number" ? [] : {}, v, true); } |