summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2021-08-23 19:53:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-21 04:24:53 +0000
commit610898a1ccec0afc3d38f7c29f2553d5b6102d30 (patch)
tree6f480562ae4bcee8ff3c4daf07eadba5a56ef9e0
parente446bef952d2ef42ae799e9b88c877b3fe0be6e4 (diff)
downloadmongo-610898a1ccec0afc3d38f7c29f2553d5b6102d30.tar.gz
SERVER-59163 Allow creating partial indexes on time-series collections
-rw-r--r--jstests/core/timeseries/timeseries_index_partial.js565
-rw-r--r--jstests/core/timeseries/timeseries_index_spec.js15
-rw-r--r--jstests/core/timeseries/timeseries_predicates.js221
-rw-r--r--jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js102
-rw-r--r--jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js20
-rw-r--r--jstests/sharding/timeseries_query.js9
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp4
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp17
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp3
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp26
-rw-r--r--src/mongo/db/exec/SConscript1
-rw-r--r--src/mongo/db/exec/bucket_unpacker.cpp279
-rw-r--r--src/mongo/db/exec/bucket_unpacker.h47
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp96
-rw-r--r--src/mongo/db/matcher/expression_algo.h4
-rw-r--r--src/mongo/db/matcher/expression_leaf.h4
-rw-r--r--src/mongo/db/pipeline/SConscript2
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp25
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h1
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp261
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp62
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp46
-rw-r--r--src/mongo/db/timeseries/SConscript1
-rw-r--r--src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp55
-rw-r--r--src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp66
-rw-r--r--src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h14
-rw-r--r--src/mongo/shell/types.js2
27 files changed, 1738 insertions, 210 deletions
diff --git a/jstests/core/timeseries/timeseries_index_partial.js b/jstests/core/timeseries/timeseries_index_partial.js
new file mode 100644
index 00000000000..3a637282253
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_index_partial.js
@@ -0,0 +1,565 @@
+/**
+ * Test creating and using partial indexes, on a time-series collection.
+ *
+ * @tags: [
+ * does_not_support_stepdowns,
+ * does_not_support_transactions,
+ * requires_fcv_52,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+load("jstests/libs/analyze_plan.js");
+
+if (!TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) {
+ jsTestLog(
+ "Skipped test as the featureFlagTimeseriesMetricIndexes feature flag is not enabled.");
+ return;
+}
+
+const coll = db.timeseries_index_partial;
+const timeField = 'time';
+const metaField = 'm';
+
+coll.drop();
+assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField, metaField}}));
+const buckets = db.getCollection('system.buckets.' + coll.getName());
+let extraIndexes = [];
+let extraBucketIndexes = [];
+if (FixtureHelpers.isSharded(buckets)) {
+ // If the collection is sharded, expect an implicitly-created index on time.
+ // It will appear differently in listIndexes depending on whether you look at the time-series
+ // collection or the buckets collection.
+ extraIndexes.push({
+ "v": 2,
+ "key": {"time": 1},
+ "name": "control.min.time_1",
+ });
+ extraBucketIndexes.push({
+ "v": 2,
+ "key": {"control.min.time": 1},
+ "name": "control.min.time_1",
+ });
+}
+assert.sameMembers(coll.getIndexes(), extraIndexes);
+assert.sameMembers(buckets.getIndexes(), extraBucketIndexes);
+
+assert.commandWorked(coll.insert([
+ // In bucket A, some but not all documents match the partial filter.
+ {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'A'}, a: 0, b: 20},
+ {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'A'}, a: 1, b: 16},
+ {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'A'}, a: 2, b: 12},
+ {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'A'}, a: 3, b: 8},
+ {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'A'}, a: 4, b: 4},
+
+ // In bucket B, no documents match the partial filter.
+ {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'B'}, a: 5, b: 99},
+ {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'B'}, a: 6, b: 99},
+ {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'B'}, a: 7, b: 99},
+ {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'B'}, a: 8, b: 99},
+ {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'B'}, a: 9, b: 99},
+
+ // In bucket C, every document matches the partial filter.
+ {[timeField]: ISODate('2000-01-01T00:00:00Z'), [metaField]: {bucket: 'C'}, a: 10, b: 0},
+ {[timeField]: ISODate('2000-01-01T00:00:01Z'), [metaField]: {bucket: 'C'}, a: 11, b: 0},
+ {[timeField]: ISODate('2000-01-01T00:00:02Z'), [metaField]: {bucket: 'C'}, a: 12, b: 0},
+ {[timeField]: ISODate('2000-01-01T00:00:03Z'), [metaField]: {bucket: 'C'}, a: 13, b: 0},
+ {[timeField]: ISODate('2000-01-01T00:00:04Z'), [metaField]: {bucket: 'C'}, a: 14, b: 0},
+]));
+assert.eq(15, coll.count());
+assert.eq(3, buckets.count());
+
+// Expected partialFilterExpression to be an object.
+assert.commandFailedWithCode(coll.createIndex({a: 1}, {partialFilterExpression: 123}), [10065]);
+
+// Test creating and using a partial index.
+{
+ // Make sure the query uses the {a: 1} index.
+ function checkPlan(predicate) {
+ const explain = coll.find(predicate).explain();
+ const scan = getAggPlanStage(explain, 'IXSCAN');
+ const indexes = buckets.getIndexes();
+ assert(scan,
+ "Expected an index scan for predicate: " + tojson(predicate) +
+ " but got: " + tojson(explain) + "\nAvailable indexes were: " + tojson(indexes));
+ assert.eq(scan.indexName, "a_1", scan);
+ }
+ // Make sure the query results match a collection-scan plan.
+ function checkResults(predicate) {
+ const result = coll.aggregate({$match: predicate}).toArray();
+ const unindexed =
+ coll.aggregate([{$_internalInhibitOptimization: {}}, {$match: predicate}]).toArray();
+ assert.docEq(result, unindexed);
+ }
+ function checkPlanAndResults(predicate) {
+ checkPlan(predicate);
+ checkResults(predicate);
+ }
+ const check = checkPlanAndResults;
+
+ // Test some predicates on a metric field.
+ {
+ assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$lt: 12}}}));
+ // Query predicate mentions partialFilterExpression exactly.
+ // The extra predicate on 'a' is necessary for the multiplanner to think an {a: 1} index is
+ // relevant.
+ check({a: {$lt: 999}, b: {$lt: 12}});
+ // Query predicate is a subset of partialFilterExpression.
+ check({a: {$lt: 999}, b: {$lt: 11}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$lte: 12}}}));
+ check({a: {$lt: 999}, b: {$lte: 12}});
+ check({a: {$lt: 999}, b: {$lte: 11}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$gt: 12}}}));
+ check({a: {$lt: 999}, b: {$gt: 12}});
+ check({a: {$lt: 999}, b: {$gt: 13}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {b: {$gte: 12}}}));
+ check({a: {$lt: 999}, b: {$gte: 12}});
+ check({a: {$lt: 999}, b: {$gte: 13}});
+ }
+
+ // Test some predicates on the time field.
+ {
+ const t0 = ISODate('2000-01-01T00:00:00Z');
+ const t1 = ISODate('2000-01-01T00:00:01Z');
+ const t2 = ISODate('2000-01-01T00:00:02Z');
+
+ // When the collection is sharded, there is an index on time that can win, instead of the
+ // partial index. So only check the results in that case, not the plan.
+ const check = FixtureHelpers.isSharded(buckets) ? checkResults : checkPlanAndResults;
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(
+ coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$lt: t1}}}));
+ check({a: {$lt: 999}, [timeField]: {$lt: t1}});
+ check({a: {$lt: 999}, [timeField]: {$lt: t0}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(
+ coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$lte: t1}}}));
+ check({a: {$lt: 999}, [timeField]: {$lte: t1}});
+ check({a: {$lt: 999}, [timeField]: {$lte: t0}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(
+ coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$gt: t1}}}));
+ check({a: {$lt: 999}, [timeField]: {$gt: t1}});
+ check({a: {$lt: 999}, [timeField]: {$gt: t2}});
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.commandWorked(
+ coll.createIndex({a: 1}, {partialFilterExpression: {[timeField]: {$gte: t1}}}));
+ check({a: {$lt: 999}, [timeField]: {$gte: t1}});
+ check({a: {$lt: 999}, [timeField]: {$gte: t2}});
+ }
+
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.sameMembers(coll.getIndexes(), extraIndexes);
+ assert.sameMembers(buckets.getIndexes(), extraBucketIndexes);
+}
+
+// Check that partialFilterExpression can use a mixture of metadata, time, and measurement fields,
+// and that this is translated to the bucket-level predicate we expect.
+assert.commandWorked(coll.createIndex({a: 1}, {
+ partialFilterExpression: {
+ $and: [
+ {time: {$gt: ISODate("2000-01-01")}},
+ {[metaField + '.bucket']: {$gte: "B"}},
+ {b: {$gte: 0}},
+ ]
+ }
+}));
+assert.docEq(buckets.getIndexes(), extraBucketIndexes.concat([
+ {
+ "v": 2,
+ "key": {"control.min.a": 1, "control.max.a": 1},
+ "name": "a_1",
+
+ "partialFilterExpression": {
+ // Meta predicates are pushed down verbatim.
+ ['meta.bucket']: {"$gte": "B"},
+ $and: [
+ {
+ $and: [
+ // $gt on time creates a bound on the max time.
+ {"control.max.time": {"$_internalExprGt": ISODate("2000-01-01T00:00:00Z")}},
+ // We also have a bound on the min time, derived from bucketMaxSpanSeconds.
+ {"control.min.time": {"$_internalExprGt": ISODate("1999-12-31T23:00:00Z")}},
+ // The min time is also encoded in the _id, so we have a bound on that as
+ // well.
+ {"_id": {"$gt": ObjectId("386d3570ffffffffffffffff")}},
+ ]
+ },
+ // $gt on a non-time field can only bound the control.max for that field.
+ {"control.max.b": {"$_internalExprGte": 0}},
+ ]
+ },
+ "originalSpec": {
+ key: {a: 1},
+ name: "a_1",
+ partialFilterExpression: {
+ $and: [
+ {time: {$gt: ISODate("2000-01-01")}},
+ {[metaField + '.bucket']: {$gte: "B"}},
+ {b: {$gte: 0}},
+ ]
+ },
+ v: 2,
+ }
+ },
+]));
+
+// Test how partialFilterExpression interacts with collation.
+{
+ const numericCollation = {locale: "en_US", numericOrdering: true};
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), {
+ timeseries: {timeField, metaField},
+ collation: numericCollation,
+ }));
+ assert.commandWorked(coll.insert([
+ {[timeField]: ISODate(), [metaField]: {x: "1000", y: 1}, a: "120"},
+ {[timeField]: ISODate(), [metaField]: {x: "1000", y: 2}, a: "3"},
+ {[timeField]: ISODate(), [metaField]: {x: "500", y: 3}, a: "120"},
+ {[timeField]: ISODate(), [metaField]: {x: "500", y: 4}, a: "3"},
+ ]));
+
+ // Queries on the collection use the collection's collation by default.
+ assert.docEq(
+ coll.find({}, {_id: 0, [metaField + '.x']: 1}).sort({[metaField + '.x']: 1}).toArray(), [
+ {[metaField]: {x: "500"}},
+ {[metaField]: {x: "500"}},
+ {[metaField]: {x: "1000"}},
+ {[metaField]: {x: "1000"}},
+ ]);
+ assert.docEq(coll.find({}, {_id: 0, a: 1}).sort({a: 1}).toArray(), [
+ {a: "3"},
+ {a: "3"},
+ {a: "120"},
+ {a: "120"},
+ ]);
+
+ // Specifying a collation and partialFilterExpression together fails, even if the collation
+ // matches the collection's default collation.
+ assert.commandFailedWithCode(coll.createIndex({a: 1}, {
+ name: "a_lt_25_simple",
+ collation: {locale: "simple"},
+ partialFilterExpression: {a: {$lt: "25"}}
+ }),
+ [5916300]);
+ assert.commandFailedWithCode(
+ coll.createIndex({a: 1}, {
+ name: "a_lt_25_numeric",
+ collation: numericCollation,
+ partialFilterExpression: {a: {$lt: "25"}}
+ }),
+ // The default collation is also numeric, so this index is equivalent to the previous.
+ [5916300]);
+
+ assert.commandWorked(coll.createIndex(
+ {a: 1}, {name: "a_lt_25_default", partialFilterExpression: {a: {$lt: "25"}}}));
+
+ // Verify that the index contains what we expect.
+ assert.docEq(coll.find({}, {_id: 0, a: 1}).hint("a_lt_25_default").toArray(),
+ [{a: "3"}, {a: "3"}]);
+
+ // Verify that the index is used when possible.
+ function checkPlanAndResult({predicate, collation, stageName, indexName, expectedResults}) {
+ let cur = coll.find(predicate, {_id: 0, a: 1});
+ if (collation) {
+ cur.collation(collation);
+ }
+
+ const plan = cur.explain();
+ const stage = getAggPlanStage(plan, stageName);
+ assert(stage, "Expected a " + stageName + " stage: " + tojson(plan));
+ if (indexName) {
+ assert.eq(stage.indexName, indexName, stage);
+ }
+
+ const results = cur.toArray();
+ assert.docEq(results, expectedResults);
+ }
+
+ // a < "25" can use the index, since the collations match.
+ checkPlanAndResult({
+ predicate: {a: {$lt: "25"}},
+ collation: null,
+ stageName: 'IXSCAN',
+ indexName: 'a_lt_25_default',
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+ checkPlanAndResult({
+ predicate: {a: {$lt: "25"}},
+ collation: numericCollation,
+ stageName: 'IXSCAN',
+ indexName: 'a_lt_25_default',
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+
+ // Likewise a < "24" can use the index.
+ checkPlanAndResult({
+ predicate: {a: {$lt: "24"}},
+ collation: null,
+ stageName: 'IXSCAN',
+ indexName: 'a_lt_25_default',
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+ checkPlanAndResult({
+ predicate: {a: {$lt: "24"}},
+ collation: numericCollation,
+ stageName: 'IXSCAN',
+ indexName: 'a_lt_25_default',
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+
+ // a < "30" can't use the index; it's not a subset.
+ checkPlanAndResult({
+ predicate: {a: {$lt: "30"}},
+ collation: null,
+ stageName: 'COLLSCAN',
+ indexName: null,
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+ checkPlanAndResult({
+ predicate: {a: {$lt: "30"}},
+ collation: numericCollation,
+ stageName: 'COLLSCAN',
+ indexName: null,
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+
+ // a < "100" also can't use the index, because according to the numeric collation, "20" < "100".
+ // Note that if we were using a simple collation we'd get the opposite outcome: "100" < "20",
+ // because it would compare strings lexicographically instead of numerically.
+ checkPlanAndResult({
+ predicate: {a: {$lt: "100"}},
+ collation: null,
+ stageName: 'COLLSCAN',
+ indexName: null,
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+ checkPlanAndResult({
+ predicate: {a: {$lt: "100"}},
+ collation: numericCollation,
+ stageName: 'COLLSCAN',
+ indexName: null,
+ expectedResults: [{a: "3"}, {a: "3"}],
+ });
+}
+
+// Test which types of predicates are allowed, and test that the bucket-level
+// partialFilterExpression is what we expect.
+{
+ assert.commandWorked(coll.dropIndex({a: 1}));
+ assert.eq(coll.getIndexes(), extraIndexes);
+ function checkPredicateDisallowed(predicate) {
+ assert.commandFailedWithCode(coll.createIndex({a: 1}, {partialFilterExpression: predicate}),
+ [5916301]);
+ }
+ function checkPredicateOK({input: predicate, output: expectedBucketPredicate}) {
+ const name = 'example_pushdown_index';
+ assert.commandWorked(coll.createIndex({a: 1}, {name, partialFilterExpression: predicate}));
+ const indexes = buckets.getIndexes().filter(ix => ix.name === name);
+ assert.eq(1, indexes.length, "Expected 1 index but got " + tojson(indexes));
+ const actualBucketPredicate = indexes[0].partialFilterExpression;
+ assert.eq(actualBucketPredicate,
+ expectedBucketPredicate,
+ "Expected the bucket-level predicate to be " + tojson(expectedBucketPredicate) +
+ " but it was " + tojson(actualBucketPredicate));
+ assert.commandWorked(coll.dropIndex(name));
+ }
+ // A trivial, empty predicate is fine.
+ // It doesn't seem useful but it's allowed on a normal collection.
+ checkPredicateOK({
+ input: {},
+ output: {},
+ });
+ // Comparison with non-scalar.
+ checkPredicateDisallowed({a: {}});
+ checkPredicateDisallowed({a: {b: 3}});
+ checkPredicateDisallowed({a: []});
+ checkPredicateDisallowed({a: [1]});
+ checkPredicateDisallowed({a: [1, 2]});
+ // Always-false predicate on time (which is always a Date).
+ checkPredicateDisallowed({time: 7});
+ checkPredicateDisallowed({time: "abc"});
+
+ // Scalar $eq is equivalent to a conjunction of $lte and $gte.
+ checkPredicateOK({
+ input: {a: 5},
+ output: {
+ $and: [
+ {'control.min.a': {$_internalExprLte: 5}},
+ {'control.max.a': {$_internalExprGte: 5}},
+ ]
+ }
+ });
+
+ // Comparisons with null/missing are not implemented. These would be slightly more complicated
+ // because {$eq: null} actually means "null, or missing, or undefined",
+ // while {$_internalExprEq: null} only matches null.
+ checkPredicateDisallowed({a: null});
+ checkPredicateDisallowed({a: {$eq: null}});
+ checkPredicateDisallowed({a: {$ne: null}});
+
+ // Regex queries are not allowed, but {$eq: /.../} is a simple scalar match, not a regex query.
+ checkPredicateDisallowed({a: /a/});
+ checkPredicateOK({
+ input: {a: {$eq: /a/}},
+ output: {
+ $and: [
+ {'control.min.a': {$_internalExprLte: /a/}},
+ {'control.max.a': {$_internalExprGte: /a/}},
+ ]
+ }
+ });
+
+ // The min/max for a field is present iff at least one event in the bucket has that field.
+ // So {$exists: true} queries can be mapped to the min/max for that field.
+ // This can be used as an alternative to sparse indexes.
+ checkPredicateOK({
+ input: {a: {$exists: true}},
+ output: {
+ $and: [
+ {'control.min.a': {$exists: true}},
+ {'control.max.a': {$exists: true}},
+ ]
+ }
+ });
+ // However, this means we can't push down {$exists: false}. A bucket where the min/max for a
+ // field is non-missing may contain a mixture of missing / non-missing, so we can't exclude it
+ // on the basis of the control fields.
+ checkPredicateDisallowed({a: {$exists: false}});
+
+ // $or on metadata, metric, or both.
+ checkPredicateOK({
+ input: {$or: [{[metaField + '.a']: {$lt: 5}}, {[metaField + '.b']: {$lt: 6}}]},
+ output: {$or: [{'meta.a': {$lt: 5}}, {'meta.b': {$lt: 6}}]},
+ });
+ checkPredicateOK({
+ input: {$or: [{'a': {$lt: 5}}, {b: {$lt: 6}}]},
+ output: {
+ $or: [
+ {'control.min.a': {$_internalExprLt: 5}},
+ {'control.min.b': {$_internalExprLt: 6}},
+ ]
+ }
+ });
+ checkPredicateOK({
+ input: {$or: [{'a': {$lt: 5}}, {[metaField + '.b']: {$lt: 6}}]},
+ output: {
+ $or: [
+ {'control.min.a': {$_internalExprLt: 5}},
+ {'meta.b': {$lt: 6}},
+ ]
+ }
+ });
+
+ // If any argument of the $or is disallowed, we report the error even when an always-true
+ // predicate appears in it.
+ checkPredicateDisallowed({$or: [{}, {a: {}}]});
+
+ // $in on metadata is fine.
+ checkPredicateOK({
+ input: {[metaField + '.a']: {$in: [1, 2, 5]}},
+ output: {'meta.a': {$in: [1, 2, 5]}},
+ });
+
+ // $in on a metric is slightly complicated. $in is equivalent to a disjunction of {a: _}.
+ // In a typical case this is the same as a disjunction of $eq:
+ checkPredicateOK({
+ input: {a: {$in: [1, 2, 5]}},
+ output: {
+ $or: [
+ {
+ $and: [
+ {'control.min.a': {$_internalExprLte: 1}},
+ {'control.max.a': {$_internalExprGte: 1}},
+ ]
+ },
+ {
+ $and: [
+ {'control.min.a': {$_internalExprLte: 2}},
+ {'control.max.a': {$_internalExprGte: 2}},
+ ]
+ },
+ {
+ $and: [
+ {'control.min.a': {$_internalExprLte: 5}},
+ {'control.max.a': {$_internalExprGte: 5}},
+ ]
+ },
+ ]
+ },
+ });
+ // Since {a: null} is not implemented, neither is {$in: [null]}.
+ checkPredicateDisallowed({a: {$in: [null]}});
+ // {a: {$in: [/abc/]}} is equivalent to {a: /abc/} which executes the regex, and is not allowed.
+ checkPredicateDisallowed({a: {$in: [/abc/]}});
+
+ // Predicates on time are pushed down, and also some extra predicates are inferred.
+ // These inferred predicates don't necessarily result in fewer buckets being indexed:
+ // we're just following the same rules for createIndex that we use when optimizing a query.
+ checkPredicateOK({
+ input: {[timeField]: {$lt: ISODate('2020-01-01T00:00:00Z')}},
+ output: {
+ $and: [
+ {"control.min.time": {"$_internalExprLt": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.max.time": {"$_internalExprLt": ISODate("2020-01-01T01:00:00Z")}},
+ {"_id": {"$lt": ObjectId("5e0be1000000000000000000")}},
+ ]
+ }
+ });
+ checkPredicateOK({
+ input: {[timeField]: {$gt: ISODate('2020-01-01T00:00:00Z')}},
+ output: {
+ $and: [
+ {"control.max.time": {"$_internalExprGt": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.min.time": {"$_internalExprGt": ISODate("2019-12-31T23:00:00Z")}},
+ {"_id": {"$gt": ObjectId("5e0bd2f0ffffffffffffffff")}},
+ ]
+ }
+ });
+ checkPredicateOK({
+ input: {[timeField]: {$lte: ISODate('2020-01-01T00:00:00Z')}},
+ output: {
+ $and: [
+ {"control.min.time": {"$_internalExprLte": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.max.time": {"$_internalExprLte": ISODate("2020-01-01T01:00:00Z")}},
+ {"_id": {"$lte": ObjectId("5e0be100ffffffffffffffff")}},
+ ]
+ }
+ });
+ checkPredicateOK({
+ input: {[timeField]: {$gte: ISODate('2020-01-01T00:00:00Z')}},
+ output: {
+ $and: [
+ {"control.max.time": {"$_internalExprGte": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.min.time": {"$_internalExprGte": ISODate("2019-12-31T23:00:00Z")}},
+ {"_id": {"$gte": ObjectId("5e0bd2f00000000000000000")}},
+ ]
+ }
+ });
+ checkPredicateOK({
+ input: {[timeField]: {$eq: ISODate('2020-01-01T00:00:00Z')}},
+ output: {
+ $and: [
+ {"control.min.time": {"$_internalExprLte": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.min.time": {"$_internalExprGte": ISODate("2019-12-31T23:00:00Z")}},
+ {"control.max.time": {"$_internalExprGte": ISODate("2020-01-01T00:00:00Z")}},
+ {"control.max.time": {"$_internalExprLte": ISODate("2020-01-01T01:00:00Z")}},
+ {"_id": {"$lte": ObjectId("5e0be100ffffffffffffffff")}},
+ {"_id": {"$gte": ObjectId("5e0bd2f00000000000000000")}},
+ ]
+ }
+ });
+}
+})();
diff --git a/jstests/core/timeseries/timeseries_index_spec.js b/jstests/core/timeseries/timeseries_index_spec.js
index eaf7065d3e9..109be8f856a 100644
--- a/jstests/core/timeseries/timeseries_index_spec.js
+++ b/jstests/core/timeseries/timeseries_index_spec.js
@@ -77,20 +77,19 @@ TimeseriesTest.run(() => {
assert.commandWorked(coll.createIndex({x: 1}));
verifyAndDropIndex(/*isDowngradeCompatible=*/false);
- assert.commandWorked(
- coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+ assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
verifyAndDropIndex(/*isDowngradeCompatible=*/false);
- assert.commandWorked(coll.createIndex({[timeFieldName]: 1},
- {partialFilterExpression: {x: {$type: "number"}}}));
+ assert.commandWorked(
+ coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
verifyAndDropIndex(/*isDowngradeCompatible=*/false);
- assert.commandWorked(coll.createIndex({[metaFieldName]: 1},
- {partialFilterExpression: {x: {$type: "number"}}}));
+ assert.commandWorked(
+ coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
verifyAndDropIndex(/*isDowngradeCompatible=*/false);
- assert.commandWorked(coll.createIndex({[metaFieldName]: 1, x: 1},
- {partialFilterExpression: {x: {$type: "number"}}}));
+ assert.commandWorked(
+ coll.createIndex({[metaFieldName]: 1, x: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
verifyAndDropIndex(/*isDowngradeCompatible=*/false);
}
diff --git a/jstests/core/timeseries/timeseries_predicates.js b/jstests/core/timeseries/timeseries_predicates.js
new file mode 100644
index 00000000000..93a1e479b1a
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_predicates.js
@@ -0,0 +1,221 @@
+/**
+ * Test the input/output behavior of some predicates on time-series collections.
+ *
+ * @tags: [
+ * does_not_support_stepdowns,
+ * does_not_support_transactions,
+ * requires_fcv_52,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+
+const coll = db.timeseries_predicates_normal;
+const tsColl = db.timeseries_predicates_timeseries;
+coll.drop();
+tsColl.drop();
+assert.commandWorked(
+ db.createCollection(tsColl.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}}));
+const bucketsColl = db.getCollection('system.buckets.' + tsColl.getName());
+
+// Test that 'predicate' behaves correctly on the example documents,
+// by comparing the result on a time-series collection against a normal collection.
+function checkPredicateResult(predicate, documents) {
+ assert.commandWorked(coll.deleteMany({}));
+ bucketsColl.deleteMany({});
+ assert.commandWorked(coll.insert(documents));
+ assert.commandWorked(tsColl.insert(documents));
+
+ const normalResult = coll.aggregate({$match: predicate}).toArray();
+ const tsResult = tsColl.aggregate({$match: predicate}).toArray();
+ assert.sameMembers(normalResult, tsResult);
+}
+
+// Test that 'predicate' behaves correctly, no matter how the documents are bucketed:
+// insert the documents with different combinations of metadata to change how they are bucketed.
+// 'documents' should be small, since this runs 2^N tests.
+function checkAllBucketings(predicate, documents) {
+ for (const doc of documents) {
+ doc._id = ObjectId();
+ doc.time = ISODate();
+ }
+
+ // For N documents, there are 2^N ways to assign them to buckets A and B.
+ const numDocs = documents.length;
+ const numBucketings = 1 << numDocs;
+ for (let bucketing = 0; bucketing < numBucketings; ++bucketing) {
+ // The ith bit tells you how to assign documents[i].
+ const labeledDocs = documents.map(
+ (doc, i) => Object.merge(doc, {meta: {bucket: bucketing & (1 << i)}}, true /*deep*/));
+ checkPredicateResult(predicate, labeledDocs);
+ }
+}
+
+// $in
+checkAllBucketings({x: {$in: [2, 3, 4]}}, [
+ {x: 1},
+ {x: 2},
+ {x: 3},
+ {x: 42},
+]);
+
+// $exists: true
+checkAllBucketings({x: {$exists: true}}, [
+ {},
+ {x: 2},
+ {x: ISODate()},
+ {x: null},
+ {x: undefined},
+]);
+
+// Test $or...
+{
+ // ... on metric + meta.
+ checkAllBucketings({
+ $or: [
+ {x: {$lt: 0}},
+ {'meta.y': {$gt: 0}},
+ ]
+ },
+ [
+ {x: +1, meta: {y: -1}},
+ {x: +1, meta: {y: +1}},
+ {x: -1, meta: {y: -1}},
+ {x: -1, meta: {y: +1}},
+ ]);
+
+ // ... when one argument can't be pushed down.
+ checkAllBucketings({
+ $or: [
+ {x: {$lt: 0}},
+ {y: {$exists: false}},
+ ]
+ },
+ [
+ {x: -1},
+ {x: +1},
+ {x: -1, y: 'asdf'},
+ {x: +1, y: 'asdf'},
+ ]);
+
+ // ... when neither argument can be pushed down.
+ checkAllBucketings({
+ $or: [
+ {x: {$exists: false}},
+ {y: {$exists: false}},
+ ]
+ },
+ [
+ {},
+ {x: 'qwer'},
+ {y: 'asdf'},
+ {x: 'qwer', y: 'asdf'},
+ ]);
+}
+
+// Test $and...
+{
+ // ... on metric + meta.
+ checkAllBucketings({
+ $and: [
+ {x: {$lt: 0}},
+ {'meta.y': {$gt: 0}},
+ ]
+ },
+ [
+ {x: +1, meta: {y: -1}},
+ {x: +1, meta: {y: +1}},
+ {x: -1, meta: {y: -1}},
+ {x: -1, meta: {y: +1}},
+ ]);
+
+ // ... when one argument can't be pushed down.
+ checkAllBucketings({
+ $and: [
+ {x: {$lt: 0}},
+ {y: {$exists: false}},
+ ]
+ },
+ [
+ {x: -1},
+ {x: +1},
+ {x: -1, y: 'asdf'},
+ {x: +1, y: 'asdf'},
+ ]);
+
+ // ... when neither argument can be pushed down.
+ checkAllBucketings({
+ $and: [
+ {x: {$exists: false}},
+ {y: {$exists: false}},
+ ]
+ },
+ [
+ {},
+ {x: 'qwer'},
+ {y: 'asdf'},
+ {x: 'qwer', y: 'asdf'},
+ ]);
+}
+
+// Test nested $and / $or.
+checkAllBucketings({
+ // The top-level $or prevents us from splitting into 2 top-level $match stages.
+ $or: [
+ {
+ $and: [
+ {'meta.a': {$gt: 0}},
+ {'x': {$lt: 0}},
+ ]
+ },
+ {
+ $and: [
+ {'meta.b': {$gte: 0}},
+ {time: {$gt: ISODate('2020-01-01')}},
+ ]
+ },
+ ]
+},
+ [
+ {meta: {a: -1, b: -1}, x: -1, time: ISODate('2020-02-01')},
+ {meta: {a: -1, b: -1}, x: -1, time: ISODate('2019-12-31')},
+ {meta: {a: -1, b: -1}, x: +1, time: ISODate('2020-02-01')},
+ {meta: {a: -1, b: -1}, x: +1, time: ISODate('2019-12-31')},
+
+ {meta: {a: +1, b: -1}, x: -1, time: ISODate('2020-02-01')},
+ {meta: {a: +1, b: -1}, x: -1, time: ISODate('2019-12-31')},
+ {meta: {a: +1, b: -1}, x: +1, time: ISODate('2020-02-01')},
+ {meta: {a: +1, b: -1}, x: +1, time: ISODate('2019-12-31')},
+ ]);
+
+// Test nested $and / $or where some leaf predicates cannot be pushed down.
+checkAllBucketings({
+ $or: [
+ {
+ $and: [
+ {'meta.a': {$gt: 0}},
+ {'x': {$exists: false}},
+ ]
+ },
+ {
+ $and: [
+ {'meta.b': {$gte: 0}},
+ {time: {$gt: ISODate('2020-01-01')}},
+ ]
+ },
+ ]
+},
+ [
+ {meta: {a: -1, b: -1}, time: ISODate('2020-02-01')},
+ {meta: {a: -1, b: -1}, time: ISODate('2019-12-31')},
+ {meta: {a: -1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')},
+ {meta: {a: -1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')},
+
+ {meta: {a: +1, b: -1}, time: ISODate('2020-02-01')},
+ {meta: {a: +1, b: -1}, time: ISODate('2019-12-31')},
+ {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')},
+ {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')},
+ ]);
+})();
diff --git a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
index affcfe4e3bd..e714a38d698 100644
--- a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
+++ b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
@@ -1,8 +1,11 @@
/**
* Tests that upgrading time-series collections created using the last-lts binary warns about
* potentially mixed-schema data when building secondary indexes on time-series measurements on the
- * latest binary. Additionally, tests that downgrading FCV from 5.2 removes the
+ * latest binary. Additionally, tests that downgrading FCV from 5.3 removes the
* 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag from time-series collections.
+ *
+ * Also, tests that upgrading a time-series collection with no mixed-schema data allows metric
+ * indexes to be created.
*/
(function() {
"use strict";
@@ -50,7 +53,7 @@ if (!TimeseriesTest.timeseriesMetricIndexesEnabled(primary)) {
return;
}
-// Building indexes on time-series measurements is only supported in FCV >= 5.2.
+// Building indexes on time-series measurements is only supported in FCV >= 5.3.
jsTest.log("Setting FCV to 'latestFCV'");
assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
@@ -62,34 +65,62 @@ assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: true}, /*
assert(
checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: true}, /*expectedCount=*/1));
+// Creating an index on time does not involve checking for mixed-schema data.
assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"}));
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
+// Creating an index on metadata does not involve checking for mixed-schema data.
assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"}));
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
-assert.commandFailedWithCode(coll.createIndex({x: 1}, {name: "x_1"}), ErrorCodes.CannotCreateIndex);
-
-// May have mixed-schema data.
+// Creating a partial index, on metadata and time only, does not involve checking for mixed-schema
+// data.
+assert.commandWorked(coll.createIndex({[timeField]: 1, [metaField]: 1}, {
+ name: "time_1_meta_1_partial",
+ partialFilterExpression: {[timeField]: {$gt: ISODate()}, [metaField]: 1}
+}));
assert(checkLog.checkContainsWithCountJson(
- primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/1));
-
-// Mixed-schema data detected.
-assert(checkLog.checkContainsWithCountJson(
- primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/1));
+ primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
+// Creating a metric index requires checking for mixed-schema data.
+assert.commandFailedWithCode(coll.createIndex({x: 1}, {name: "x_1"}), ErrorCodes.CannotCreateIndex);
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057502 /* May have mixed-schema data. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/1));
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057700 /* Mixed-schema data detected. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/1));
+
+// A compound index that includes a metric counts as a metric index.
assert.commandFailedWithCode(coll.createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}),
ErrorCodes.CannotCreateIndex);
-
-// May have mixed-schema data.
-assert(checkLog.checkContainsWithCountJson(
- primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/2));
-
-// Mixed-schema data detected.
-assert(checkLog.checkContainsWithCountJson(
- primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/2));
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057502 /* May have mixed-schema data. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/2));
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057700 /* Mixed-schema data detected. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/2));
+
+// A partialFilterExperssion on a metric makes it a metric index, even if the index key is
+// metadata+time only.
+assert.commandFailedWithCode(
+ coll.createIndex({[timeField]: 1, [metaField]: 1},
+ {name: "time_1_meta_1_partial_metric", partialFilterExpression: {x: 1}}),
+ ErrorCodes.CannotCreateIndex);
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057502 /* May have mixed-schema data. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/3));
+assert(checkLog.checkContainsWithCountJson(primary,
+ 6057700 /* Mixed-schema data detected. */,
+ {namespace: bucketCollName},
+ /*expectedCount=*/3));
// Check that the catalog entry flag doesn't get set to false.
assert(
@@ -99,10 +130,41 @@ assert(
// The FCV downgrade process removes the catalog entry flag from time-series collections.
jsTest.log("Setting FCV to 'lastLTSFCV'");
-assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
+{
+ // However, the first attempt at downgrading fails because a partial index still exists.
+ assert.commandFailedWithCode(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}),
+ ErrorCodes.CannotDowngrade);
+ // Once we remove the index, downgrading succeeds.
+ assert.commandWorked(coll.dropIndex("time_1_meta_1_partial"));
+ assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
+}
assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: null}, /*expectedCount=*/1));
assert(
checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: null}, /*expectedCount=*/1));
+// Check that upgrading a clean collection allows all kinds of indexes to be created
+// (time, metadata, metric).
+jsTest.log("Testing upgrade of a non-mixed-schema collection.");
+coll.drop();
+assert.commandWorked(
+ db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}}));
+assert.commandWorked(coll.insert([
+ {[timeField]: ISODate(), [metaField]: 1, x: 1},
+ {[timeField]: ISODate(), [metaField]: 1, x: 2},
+ {[timeField]: ISODate(), [metaField]: 1, x: 3},
+]));
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"}));
+assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"}));
+assert.commandWorked(coll.createIndex({[timeField]: 1, [metaField]: 1}, {
+ name: "time_1_meta_1_partial",
+ partialFilterExpression: {[timeField]: {$gt: ISODate()}, [metaField]: 1}
+}));
+assert.commandWorked(coll.createIndex({x: 1}, {name: "x_1"}));
+assert.commandWorked(coll.createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}));
+assert.commandWorked(
+ coll.createIndex({[timeField]: 1, [metaField]: 1},
+ {name: "time_1_meta_1_partial_metric", partialFilterExpression: {x: 1}}));
+
rst.stopSet();
-}()); \ No newline at end of file
+}());
diff --git a/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js b/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js
index 4889af415e6..8ada752669a 100644
--- a/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js
+++ b/jstests/noPassthrough/timeseries_measurement_indexes_downgrade.js
@@ -117,33 +117,33 @@ checkIndexForDowngrade(lastContinuousFCV, true, false);
// Partial indexes are not supported in versions earlier than v5.2.
assert.commandWorked(
- coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+ coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastLTSFCV, false, false);
assert.commandWorked(
- coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+ coll.createIndex({[timeFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastContinuousFCV, true, false);
assert.commandWorked(
- coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+ coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastLTSFCV, false, false);
assert.commandWorked(
- coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+ coll.createIndex({[metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastContinuousFCV, true, false);
-assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastLTSFCV, false, false);
-assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$type: "number"}}}));
+assert.commandWorked(coll.createIndex({x: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastContinuousFCV, true, false);
-assert.commandWorked(coll.createIndex({[metaFieldName]: 1, x: 1},
- {partialFilterExpression: {x: {$type: "number"}}}));
+assert.commandWorked(
+ coll.createIndex({[metaFieldName]: 1, x: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastLTSFCV, false, false);
-assert.commandWorked(coll.createIndex({x: 1, [metaFieldName]: 1},
- {partialFilterExpression: {x: {$type: "number"}}}));
+assert.commandWorked(
+ coll.createIndex({x: 1, [metaFieldName]: 1}, {partialFilterExpression: {x: {$gt: 5}}}));
checkIndexForDowngrade(lastContinuousFCV, true, false);
MongoRunner.stopMongod(conn);
diff --git a/jstests/sharding/timeseries_query.js b/jstests/sharding/timeseries_query.js
index f350522cf6f..4cf3b1c28c5 100644
--- a/jstests/sharding/timeseries_query.js
+++ b/jstests/sharding/timeseries_query.js
@@ -291,14 +291,13 @@ function runQuery(
expectedShards: [otherShard.shardName]
});
- // OR queries are currently not re-written. So expect the query to be sent to both the shards,
- // and do a full collection scan on each shard.
+ // OR queries are rewritten if all their arguments are.
runQuery({
query: {$or: [{time: ISODate("2019-11-11")}, {time: ISODate("2019-11-12")}]},
expectedDocs: 2,
- expectedShards: [primaryShard.shardName, otherShard.shardName],
- expectQueryRewrite: false,
- expectCollScan: true,
+ expectedShards: [primaryShard.shardName],
+ expectQueryRewrite: true,
+ expectCollScan: false,
});
assert(coll.drop());
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index eb18ed1212c..8ed4e53ae99 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -2017,8 +2017,8 @@ Status CollectionImpl::prepareForIndexBuild(OperationContext* opCtx,
if (getTimeseriesOptions() && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
- timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*getTimeseriesOptions(),
- spec->infoObj())) {
+ timeseries::doesBucketsIndexIncludeMeasurement(
+ opCtx, ns(), *getTimeseriesOptions(), spec->infoObj())) {
invariant(_metadata->timeseriesBucketsMayHaveMixedSchemaData);
if (*_metadata->timeseriesBucketsMayHaveMixedSchemaData) {
LOGV2(6057502,
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 09ae09ed303..093a1055e76 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -582,19 +582,12 @@ Status _checkValidFilterExpressions(const MatchExpression* expression,
}
return Status::OK();
case MatchExpression::GEO:
- if (timeseriesMetricIndexesFeatureFlagEnabled) {
- return Status::OK();
- }
- return Status(ErrorCodes::CannotCreateIndex,
- str::stream() << "Expression not supported in partial index: "
- << expression->debugString());
case MatchExpression::INTERNAL_BUCKET_GEO_WITHIN:
- if (timeseriesMetricIndexesFeatureFlagEnabled) {
- return Status::OK();
- }
- return Status(ErrorCodes::CannotCreateIndex,
- str::stream() << "Expression not supported in partial index: "
- << expression->debugString());
+ case MatchExpression::INTERNAL_EXPR_EQ:
+ case MatchExpression::INTERNAL_EXPR_LT:
+ case MatchExpression::INTERNAL_EXPR_LTE:
+ case MatchExpression::INTERNAL_EXPR_GT:
+ case MatchExpression::INTERNAL_EXPR_GTE:
case MatchExpression::MATCH_IN:
if (timeseriesMetricIndexesFeatureFlagEnabled) {
return Status::OK();
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 91d8f1224e3..2ffc1ad92b0 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -275,7 +275,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
boost::optional<TimeseriesOptions> options = collection->getTimeseriesOptions();
if (options &&
serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
- timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*options, info)) {
+ timeseries::doesBucketsIndexIncludeMeasurement(
+ opCtx, collection->ns(), *options, info)) {
invariant(collection->getTimeseriesBucketsMayHaveMixedSchemaData());
_containsIndexBuildOnTimeseriesMeasurement = true;
}
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index 7b3a65167de..9b59a40ee76 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -609,18 +609,20 @@ private:
// in 5.2 and up. If the user tries to downgrade the cluster to an
// earlier version, they must first remove all incompatible secondary
// indexes on time-series measurements.
- uassert(ErrorCodes::CannotDowngrade,
- str::stream()
- << "Cannot downgrade the cluster when there are secondary "
- "indexes on time-series measurements present. Drop all "
- "secondary indexes on time-series measurements before "
- "downgrading. First detected incompatible index name: '"
- << indexEntry->descriptor()->indexName()
- << "' on collection: '"
- << collection->ns().getTimeseriesViewNamespace() << "'",
- timeseries::isBucketsIndexSpecCompatibleForDowngrade(
- *collection->getTimeseriesOptions(),
- indexEntry->descriptor()->infoObj()));
+ uassert(
+ ErrorCodes::CannotDowngrade,
+ str::stream()
+ << "Cannot downgrade the cluster when there are secondary "
+ "indexes on time-series measurements present, or when there "
+ "are partial indexes on a time-series collection. Drop all "
+ "secondary indexes on time-series measurements, and all "
+ "partial indexes on time-series collections, before "
+ "downgrading. First detected incompatible index name: '"
+ << indexEntry->descriptor()->indexName() << "' on collection: '"
+ << collection->ns().getTimeseriesViewNamespace() << "'",
+ timeseries::isBucketsIndexSpecCompatibleForDowngrade(
+ *collection->getTimeseriesOptions(),
+ indexEntry->descriptor()->infoObj()));
if (auto filter = indexEntry->getFilterExpression()) {
auto status = IndexCatalogImpl::checkValidFilterExpressions(
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index 1a920757dad..89c4d304015 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -58,6 +58,7 @@ env.Library(
],
LIBDEPS = [
"$BUILD_DIR/mongo/db/matcher/expressions",
+ "$BUILD_DIR/mongo/db/timeseries/timeseries_options",
"document_value/document_value",
],
LIBDEPS_PRIVATE = [
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index f663b49f46d..2627b680e3c 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -36,12 +36,17 @@
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
#include "mongo/db/matcher/expression_internal_expr_comparison.h"
+#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/matcher/expression_tree.h"
+#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/timeseries/timeseries_constants.h"
+#include "mongo/db/timeseries/timeseries_options.h"
namespace mongo {
+using IneligiblePredicatePolicy = BucketSpec::IneligiblePredicatePolicy;
+
bool BucketSpec::fieldIsComputed(StringData field) const {
return std::any_of(computedMetaProjFields.begin(), computedMetaProjFields.end(), [&](auto& s) {
return s == field || expression::isPathPrefixOf(field, s) ||
@@ -102,6 +107,42 @@ auto constructObjectIdValue(const BSONElement& rhs, int bucketMaxSpanSeconds) {
MONGO_UNREACHABLE_TASSERT(5756800);
}
+/**
+ * Makes a disjunction of the given predicates.
+ *
+ * - The result is non-null; it may be an OrMatchExpression with zero children.
+ * - Any trivially-false arguments are omitted.
+ * - If only one argument is nontrivial, returns that argument rather than adding an extra
+ * OrMatchExpression around it.
+ */
+std::unique_ptr<MatchExpression> makeOr(std::vector<std::unique_ptr<MatchExpression>> predicates) {
+ std::vector<std::unique_ptr<MatchExpression>> nontrivial;
+ for (auto&& p : predicates) {
+ if (!p->isTriviallyFalse())
+ nontrivial.push_back(std::move(p));
+ }
+
+ if (nontrivial.size() == 1)
+ return std::move(nontrivial[0]);
+
+ return std::make_unique<OrMatchExpression>(std::move(nontrivial));
+}
+
+std::unique_ptr<MatchExpression> handleIneligible(IneligiblePredicatePolicy policy,
+ const MatchExpression* matchExpr,
+ StringData message) {
+ switch (policy) {
+ case IneligiblePredicatePolicy::kError:
+ uasserted(
+ 5916301,
+ "Error translating non-metadata time-series predicate to operate on buckets: " +
+ message + ": " + matchExpr->serialize().toString());
+ case IneligiblePredicatePolicy::kIgnore:
+ return nullptr;
+ }
+ MONGO_UNREACHABLE_TASSERT(5916307);
+}
+
/*
* Creates a predicate that ensures that if there exists a subpath of matchExprPath such that the
* type of `control.min.subpath` is not the same as `control.max.subpath` then we will match that
@@ -118,7 +159,7 @@ std::unique_ptr<MatchExpression> createTypeEqualityPredicate(
std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates;
if (assumeNoMixedSchemaData)
- return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates));
+ return makeOr(std::move(typeEqualityPredicates));
FieldPath matchExprField(matchExprPath);
using namespace timeseries;
@@ -158,7 +199,7 @@ std::unique_ptr<MatchExpression> createTypeEqualityPredicate(
pExpCtx.get(), maxPath, pExpCtx->variablesParseState))))),
pExpCtx));
}
- return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates));
+ return makeOr(std::move(typeEqualityPredicates));
}
std::unique_ptr<MatchExpression> createComparisonPredicate(
@@ -167,7 +208,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
int bucketMaxSpanSeconds,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
boost::intrusive_ptr<ExpressionContext> pExpCtx,
- bool assumeNoMixedSchemaData) {
+ bool haveComputedMetaField,
+ bool assumeNoMixedSchemaData,
+ IneligiblePredicatePolicy policy) {
using namespace timeseries;
const auto matchExprPath = matchExpr->path();
const auto matchExprData = matchExpr->getData();
@@ -176,39 +219,59 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
// MatchExpressions use a comparator that treats field-order as significant. Because of this we
// will not perform this optimization on queries with operands of compound types.
if (matchExprData.type() == BSONType::Object || matchExprData.type() == BSONType::Array)
- return nullptr;
+ return handleIneligible(policy, matchExpr, "operand can't be an object or array"_sd);
// MatchExpressions have special comparison semantics regarding null, in that {$eq: null} will
// match all documents where the field is either null or missing. Because this is different
// from both the comparison semantics that InternalExprComparison expressions and the control's
// min and max fields use, we will not perform this optimization on queries with null operands.
if (matchExprData.type() == BSONType::jstNULL)
- return nullptr;
+ return handleIneligible(policy, matchExpr, "can't handle {$eq: null}"_sd);
// The control field's min and max are chosen based on the collation of the collection. If the
// query's collation does not match the collection's collation and the query operand is a
// string or compound type (skipped above) we will not perform this optimization.
if (collationMatchesDefault == ExpressionContext::CollationMatchesDefault::kNo &&
matchExprData.type() == BSONType::String) {
- return nullptr;
+ return handleIneligible(
+ policy, matchExpr, "can't handle string comparison with a non-default collation"_sd);
}
- // We must avoid mapping predicates on the meta field onto the control field.
+ // We must avoid mapping predicates on the meta field onto the control field. These should be
+ // mapped to the meta field instead.
+ //
+ // You might think these were handled earlier, by splitting the match expression into a
+ // metadata-only part, and measurement/time-only part. However, splitting a $match into two
+ // sequential $matches only works when splitting a conjunction. A predicate like
+ // {$or: [ {a: 5}, {meta.b: 5} ]} cannot be split, and can't be metadata-only, so we have to
+ // handle it here.
if (bucketSpec.metaField() &&
(matchExprPath == bucketSpec.metaField().get() ||
- expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath)))
- return nullptr;
+ expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) {
+
+ if (haveComputedMetaField)
+ return handleIneligible(policy, matchExpr, "can't handle a computed meta field");
+
+ auto result = matchExpr->shallowClone();
+ expression::applyRenamesToExpression(
+ result.get(),
+ {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}});
+ return result;
+ }
// We must avoid mapping predicates on fields computed via $addFields or a computed $project.
if (bucketSpec.fieldIsComputed(matchExprPath.toString())) {
- return nullptr;
+ return handleIneligible(policy, matchExpr, "can't handle a computed field");
}
const auto isTimeField = (matchExprPath == bucketSpec.timeField());
if (isTimeField && matchExprData.type() != BSONType::Date) {
// Users are not allowed to insert non-date measurements into time field. So this query
// would not match anything. We do not need to optimize for this case.
- return nullptr;
+ return handleIneligible(
+ policy,
+ matchExpr,
+ "This predicate will never be true, because the time field always contains a Date");
}
BSONObj minTime;
@@ -255,7 +318,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
kBucketIdFieldName,
constructObjectIdValue<GTEMatchExpression>(matchExprData,
bucketMaxSpanSeconds)))
- : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
+ : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
makePredicate(MatchExprPredicate<InternalExprLTEMatchExpression>(
minPath, matchExprData),
MatchExprPredicate<InternalExprGTEMatchExpression>(
@@ -284,7 +347,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
kBucketIdFieldName,
constructObjectIdValue<GTMatchExpression>(matchExprData,
bucketMaxSpanSeconds)))
- : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
+ : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData),
createTypeEqualityPredicate(
pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
@@ -310,7 +373,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
kBucketIdFieldName,
constructObjectIdValue<GTEMatchExpression>(matchExprData,
bucketMaxSpanSeconds)))
- : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
+ : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData),
createTypeEqualityPredicate(
pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
@@ -335,7 +398,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
kBucketIdFieldName,
constructObjectIdValue<LTMatchExpression>(matchExprData,
bucketMaxSpanSeconds)))
- : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
+ : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData),
createTypeEqualityPredicate(
pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
@@ -360,7 +423,7 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
kBucketIdFieldName,
constructObjectIdValue<LTEMatchExpression>(matchExprData,
bucketMaxSpanSeconds)))
- : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
+ : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData),
createTypeEqualityPredicate(
pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
@@ -380,19 +443,25 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
int bucketMaxSpanSeconds,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
const boost::intrusive_ptr<ExpressionContext>& pExpCtx,
- bool assumeNoMixedSchemaData) {
+ bool haveComputedMetaField,
+ bool assumeNoMixedSchemaData,
+ IneligiblePredicatePolicy policy) {
+
+ tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr);
+
if (matchExpr->matchType() == MatchExpression::AND) {
auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr);
auto andMatchExpr = std::make_unique<AndMatchExpression>();
for (size_t i = 0; i < nextAnd->numChildren(); i++) {
- if (auto child = createPredicatesOnBucketLevelField(
- nextAnd->getChild(i),
- bucketSpec,
- bucketMaxSpanSeconds,
- collationMatchesDefault,
- pExpCtx,
- assumeNoMixedSchemaData)) {
+ if (auto child = createPredicatesOnBucketLevelField(nextAnd->getChild(i),
+ bucketSpec,
+ bucketMaxSpanSeconds,
+ collationMatchesDefault,
+ pExpCtx,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ policy)) {
andMatchExpr->add(std::move(child));
}
}
@@ -402,15 +471,55 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
if (andMatchExpr->numChildren() > 0) {
return andMatchExpr;
}
+
+ // No error message here: an empty AND is valid.
+ return nullptr;
+ } else if (matchExpr->matchType() == MatchExpression::OR) {
+ // Given {$or: [A, B]}, suppose A, B can be pushed down as A', B'.
+ // If an event matches {$or: [A, B]} then either:
+ // - it matches A, which means any bucket containing it matches A'
+ // - it matches B, which means any bucket containing it matches B'
+ // So {$or: [A', B']} will capture all the buckets we need to satisfy {$or: [A, B]}.
+ auto nextOr = static_cast<const OrMatchExpression*>(matchExpr);
+ auto result = std::make_unique<OrMatchExpression>();
+
+ bool alwaysTrue = false;
+ for (size_t i = 0; i < nextOr->numChildren(); i++) {
+ auto child = createPredicatesOnBucketLevelField(nextOr->getChild(i),
+ bucketSpec,
+ bucketMaxSpanSeconds,
+ collationMatchesDefault,
+ pExpCtx,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ policy);
+ if (child) {
+ result->add(std::move(child));
+ } else {
+ // Since this argument is always-true, the entire OR is always-true.
+ alwaysTrue = true;
+
+ // Only short circuit if we're uninterested in reporting errors.
+ if (policy == IneligiblePredicatePolicy::kIgnore)
+ break;
+ }
+ }
+ if (alwaysTrue)
+ return nullptr;
+
+ // No special case for an empty OR: returning nullptr would be incorrect because it
+ // means 'always-true', here.
+ return result;
} else if (ComparisonMatchExpression::isComparisonMatchExpression(matchExpr) ||
ComparisonMatchExpressionBase::isInternalExprComparison(matchExpr->matchType())) {
- return createComparisonPredicate(
- static_cast<const ComparisonMatchExpressionBase*>(matchExpr),
- bucketSpec,
- bucketMaxSpanSeconds,
- pExpCtx->collationMatchesDefault,
- pExpCtx,
- assumeNoMixedSchemaData);
+ return createComparisonPredicate(static_cast<const ComparisonMatchExpression*>(matchExpr),
+ bucketSpec,
+ bucketMaxSpanSeconds,
+ collationMatchesDefault,
+ pExpCtx,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ policy);
} else if (matchExpr->matchType() == MatchExpression::GEO) {
auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression();
if (geoExpr.getPred() == GeoExpression::WITHIN ||
@@ -418,9 +527,115 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
return std::make_unique<InternalBucketGeoWithinMatchExpression>(
geoExpr.getGeometryPtr(), geoExpr.getField());
}
+ } else if (matchExpr->matchType() == MatchExpression::EXISTS) {
+ if (assumeNoMixedSchemaData) {
+ // We know that every field that appears in an event will also appear in the min/max.
+ auto result = std::make_unique<AndMatchExpression>();
+ result->add(std::make_unique<ExistsMatchExpression>(
+ std::string{timeseries::kControlMinFieldNamePrefix} + matchExpr->path()));
+ result->add(std::make_unique<ExistsMatchExpression>(
+ std::string{timeseries::kControlMaxFieldNamePrefix} + matchExpr->path()));
+ return result;
+ } else {
+ // At time of writing, we only pass 'kError' when creating a partial index, and
+ // we know the collection will have no mixed-schema buckets by the time the index is
+ // done building.
+ tassert(5916305,
+ "Can't push down {$exists: true} when the collection may have mixed-schema "
+ "buckets.",
+ policy != IneligiblePredicatePolicy::kError);
+ return nullptr;
+ }
+ } else if (matchExpr->matchType() == MatchExpression::MATCH_IN) {
+ // {a: {$in: [X, Y]}} is equivalent to {$or: [ {a: X}, {a: Y} ]}.
+ // {$in: [/a/]} is interpreted as a regex query.
+ // {$in: [null]} matches any nullish value.
+ const auto* inExpr = static_cast<const InMatchExpression*>(matchExpr);
+ if (inExpr->hasRegex())
+ return handleIneligible(
+ policy, matchExpr, "can't handle $regex predicate (inside $in predicate)");
+ if (inExpr->hasNull())
+ return handleIneligible(
+ policy, matchExpr, "can't handle {$eq: null} predicate (inside $in predicate)");
+
+ auto result = std::make_unique<OrMatchExpression>();
+ for (auto&& elem : inExpr->getEqualities()) {
+ // If inExpr is {$in: [X, Y]} then the elems are '0: X' and '1: Y'.
+ auto eq = std::make_unique<EqualityMatchExpression>(
+ inExpr->path(), elem, nullptr /*annotation*/, inExpr->getCollator());
+ result->add(createComparisonPredicate(eq.get(),
+ bucketSpec,
+ bucketMaxSpanSeconds,
+ collationMatchesDefault,
+ pExpCtx,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ policy));
+ }
+ return result;
}
+ return handleIneligible(policy, matchExpr, "can't handle this predicate");
+}
+
+BSONObj BucketSpec::pushdownPredicate(
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ const TimeseriesOptions& tsOptions,
+ ExpressionContext::CollationMatchesDefault collationMatchesDefault,
+ const BSONObj& predicate,
+ bool haveComputedMetaField,
+ bool assumeNoMixedSchemaData,
+ IneligiblePredicatePolicy policy) {
+
+ auto allowedFeatures = MatchExpressionParser::kDefaultSpecialFeatures;
+ auto matchExpr = uassertStatusOK(
+ MatchExpressionParser::parse(predicate, expCtx, ExtensionsCallbackNoop(), allowedFeatures));
+
+ auto metaField = haveComputedMetaField ? boost::none : tsOptions.getMetaField();
+ auto [metaOnlyPredicate, metricPredicate] = [&] {
+ if (!metaField) {
+ // If there's no metadata field, then none of the predicates are metadata-only
+ // predicates.
+ return std::make_pair(std::unique_ptr<MatchExpression>(nullptr), std::move(matchExpr));
+ }
- return nullptr;
+ return expression::splitMatchExpressionBy(
+ std::move(matchExpr),
+ {metaField->toString()},
+ {{metaField->toString(), timeseries::kBucketMetaFieldName.toString()}},
+ expression::isOnlyDependentOn);
+ }();
+
+ int maxSpanSeconds = tsOptions.getBucketMaxSpanSeconds()
+ ? *tsOptions.getBucketMaxSpanSeconds()
+ : timeseries::getMaxSpanSecondsFromGranularity(tsOptions.getGranularity());
+
+ std::unique_ptr<MatchExpression> bucketMetricPredicate = metricPredicate
+ ? createPredicatesOnBucketLevelField(
+ metricPredicate.get(),
+ BucketSpec{
+ tsOptions.getTimeField().toString(),
+ metaField.map([](StringData s) { return s.toString(); }),
+ // Since we are operating on a collection, not a query-result, there are no
+ // inclusion/exclusion projections we need to apply to the buckets before
+ // unpacking.
+ {},
+ // And there are no computed projections.
+ {},
+ },
+ maxSpanSeconds,
+ collationMatchesDefault,
+ expCtx,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ policy)
+ : nullptr;
+
+ BSONObjBuilder result;
+ if (metaOnlyPredicate)
+ metaOnlyPredicate->serialize(&result);
+ if (bucketMetricPredicate)
+ bucketMetricPredicate->serialize(&result);
+ return result.obj();
}
class BucketUnpacker::UnpackingImpl {
diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h
index 05223ace00d..bd1894c2ac1 100644
--- a/src/mongo/db/exec/bucket_unpacker.h
+++ b/src/mongo/db/exec/bucket_unpacker.h
@@ -69,6 +69,17 @@ public:
// Returns whether 'field' depends on a pushed down $addFields or computed $project.
bool fieldIsComputed(StringData field) const;
+ // Says what to do when an event-level predicate cannot be mapped to a bucket-level predicate.
+ enum class IneligiblePredicatePolicy {
+ // When optimizing a query, it's fine if some predicates can't be pushed down. We'll still
+ // run the predicate after unpacking, so the results will be correct.
+ kIgnore,
+ // When creating a partial index, it's misleading if we can't handle a predicate: the user
+ // expects every predicate in the partialFilterExpression to contribute, somehow, to making
+ // the index smaller.
+ kError,
+ };
+
/**
* Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and
* attempts to map it to a new predicate on the 'control' field. For example, the predicate
@@ -84,6 +95,10 @@ public:
* ]}
*
* If the provided predicate is ineligible for this mapping, the function will return a nullptr.
+ * This should be interpreted as an always-true predicate.
+ *
+ * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it
+ * returns null. When using IneligiblePredicatePolicy::kError it raises a user error.
*/
static std::unique_ptr<MatchExpression> createPredicatesOnBucketLevelField(
const MatchExpression* matchExpr,
@@ -91,7 +106,37 @@ public:
int bucketMaxSpanSeconds,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
const boost::intrusive_ptr<ExpressionContext>& pExpCtx,
- bool assumeNoMixedSchemaData);
+ bool haveComputedMetaField,
+ bool assumeNoMixedSchemaData,
+ IneligiblePredicatePolicy policy);
+
+ /**
+ * Converts an event-level predicate to a bucket-level predicate, such that
+ *
+ * {$unpackBucket ...} {$match: <event-level predicate>}
+ *
+ * gives the same result as
+ *
+ * {$match: <bucket-level predict>} {$unpackBucket ...} {$match: <event-level predicate>}
+ *
+ * This means the bucket-level predicate must include every bucket that might contain an event
+ * matching the event-level predicate.
+ *
+ * This helper is used when creating a partial index on a time-series collection: logically,
+ * we index only events that match the event-level partialFilterExpression, but physically we
+ * index any bucket that matches the bucket-level partialFilterExpression.
+ *
+ * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it
+ * returns null. When using IneligiblePredicatePolicy::kError it raises a user error.
+ */
+ static BSONObj pushdownPredicate(
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ const TimeseriesOptions& tsOptions,
+ ExpressionContext::CollationMatchesDefault collationMatchesDefault,
+ const BSONObj& predicate,
+ bool haveComputedMetaField,
+ bool assumeNoMixedSchemaData,
+ IneligiblePredicatePolicy policy);
// The set of field names in the data region that should be included or excluded.
std::set<std::string> fieldSet;
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index c0595c7f825..c3d0e777c1f 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
#include "mongo/platform/basic.h"
#include "mongo/base/checked_cast.h"
@@ -42,6 +44,7 @@
#include "mongo/db/pipeline/dependencies.h"
#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/query/collation/collator_interface.h"
+#include "mongo/logv2/log.h"
namespace mongo {
@@ -133,9 +136,88 @@ bool _isSubsetOf(const ComparisonMatchExpression* lhs, const ComparisonMatchExpr
}
}
+bool _isSubsetOfInternalExpr(const ComparisonMatchExpressionBase* lhs,
+ const ComparisonMatchExpressionBase* rhs) {
+ // An expression can only match a subset of the documents matched by another if they are
+ // comparing the same field.
+ if (lhs->path() != rhs->path()) {
+ return false;
+ }
+
+ const BSONElement lhsData = lhs->getData();
+ const BSONElement rhsData = rhs->getData();
+
+ if (!CollatorInterface::collatorsMatch(lhs->getCollator(), rhs->getCollator()) &&
+ CollationIndexKey::isCollatableType(lhsData.type())) {
+ return false;
+ }
+
+ int cmp = lhsData.woCompare(
+ rhsData, BSONElement::ComparisonRules::kConsiderFieldName, rhs->getCollator());
+
+ // Check whether the two expressions are equivalent.
+ if (lhs->matchType() == rhs->matchType() && cmp == 0) {
+ return true;
+ }
+
+ switch (rhs->matchType()) {
+ case MatchExpression::INTERNAL_EXPR_LT:
+ case MatchExpression::INTERNAL_EXPR_LTE:
+ switch (lhs->matchType()) {
+ case MatchExpression::INTERNAL_EXPR_LT:
+ case MatchExpression::INTERNAL_EXPR_LTE:
+ case MatchExpression::INTERNAL_EXPR_EQ:
+ //
+ if (rhs->matchType() == MatchExpression::LTE) {
+ return cmp <= 0;
+ }
+ return cmp < 0;
+ default:
+ return false;
+ }
+ case MatchExpression::INTERNAL_EXPR_GT:
+ case MatchExpression::INTERNAL_EXPR_GTE:
+ switch (lhs->matchType()) {
+ case MatchExpression::INTERNAL_EXPR_GT:
+ case MatchExpression::INTERNAL_EXPR_GTE:
+ case MatchExpression::INTERNAL_EXPR_EQ:
+ if (rhs->matchType() == MatchExpression::GTE) {
+ return cmp >= 0;
+ }
+ return cmp > 0;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
/**
* Returns true if the documents matched by 'lhs' are a subset of the documents matched by
* 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise.
+ *
+ * This overload handles the $_internalExpr family of comparisons.
+ */
+bool _isSubsetOfInternalExpr(const MatchExpression* lhs, const ComparisonMatchExpressionBase* rhs) {
+ // An expression can only match a subset of the documents matched by another if they are
+ // comparing the same field.
+ if (lhs->path() != rhs->path()) {
+ return false;
+ }
+
+ if (ComparisonMatchExpressionBase::isInternalExprComparison(lhs->matchType())) {
+ return _isSubsetOfInternalExpr(static_cast<const ComparisonMatchExpressionBase*>(lhs), rhs);
+ }
+
+ return false;
+}
+
+/**
+ * Returns true if the documents matched by 'lhs' are a subset of the documents matched by
+ * 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise.
+ *
+ * This overload handles comparisons such as $lt, $eq, $gte, but not $_internalExprLt, etc.
*/
bool _isSubsetOf(const MatchExpression* lhs, const ComparisonMatchExpression* rhs) {
// An expression can only match a subset of the documents matched by another if they are
@@ -508,6 +590,10 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
return _isSubsetOf(lhs, static_cast<const ComparisonMatchExpression*>(rhs));
}
+ if (ComparisonMatchExpressionBase::isInternalExprComparison(rhs->matchType())) {
+ return _isSubsetOfInternalExpr(lhs, static_cast<const ComparisonMatchExpressionBase*>(rhs));
+ }
+
if (rhs->matchType() == MatchExpression::EXISTS) {
return _isSubsetOf(lhs, static_cast<const ExistsMatchExpression*>(rhs));
}
@@ -555,7 +641,7 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p
});
}
-bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) {
+bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& pathSet) {
// Any expression types that do not have renaming implemented cannot have their independence
// evaluated here. See applyRenamesToExpression().
if (!hasOnlyRenameableMatchExpressionChildren(expr)) {
@@ -564,11 +650,11 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>&
auto depsTracker = DepsTracker{};
expr.addDependencies(&depsTracker);
- return std::all_of(
- depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) {
- auto fieldRef = FieldRef{field};
- return !fieldRef.empty() && roots.find(fieldRef.getPart(0).toString()) != roots.end();
+ return std::all_of(depsTracker.fields.begin(), depsTracker.fields.end(), [&](auto&& field) {
+ return std::any_of(pathSet.begin(), pathSet.end(), [&](auto&& path) {
+ return path == field || isPathPrefixOf(path, field);
});
+ });
}
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy(
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index fb703066206..31bc8f9e227 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -93,9 +93,9 @@ bool isSplittableBy(const MatchExpression& expr, const std::set<std::string>& pa
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet);
/**
- * Determine if 'expr' is reliant only upon paths rooted in 'roots'.
+ * Determine if 'expr' is reliant only upon paths from 'pathSet'.
*/
-bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots);
+bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& pathSet);
/**
* Returns whether the path represented by 'first' is an prefix of the path represented by 'second'.
diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h
index fdcc14e10b0..015dae13010 100644
--- a/src/mongo/db/matcher/expression_leaf.h
+++ b/src/mongo/db/matcher/expression_leaf.h
@@ -702,6 +702,10 @@ public:
return _hasNull;
}
+ bool hasRegex() const {
+ return !_regexes.empty();
+ }
+
bool hasEmptyArray() const {
return _hasEmptyArray;
}
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index fd3b9ecbb49..0492feff232 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -356,6 +356,7 @@ pipelineEnv.Library(
'$BUILD_DIR/mongo/db/storage/encryption_hooks',
'$BUILD_DIR/mongo/db/storage/index_entry_comparison',
'$BUILD_DIR/mongo/db/storage/storage_options',
+ '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
'$BUILD_DIR/mongo/db/update/update_document_diff',
'$BUILD_DIR/mongo/db/views/resolved_view',
'$BUILD_DIR/mongo/s/is_mongos',
@@ -375,7 +376,6 @@ pipelineEnv.Library(
'$BUILD_DIR/mongo/db/repl/image_collection_entry',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
'$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util',
- '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
'$BUILD_DIR/mongo/rpc/command_status',
]
)
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index 055b7be6fa6..1fa5ff224dd 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -58,6 +58,7 @@
#include "mongo/db/pipeline/lite_parsed_document_source.h"
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/db/timeseries/timeseries_constants.h"
+#include "mongo/db/timeseries/timeseries_options.h"
#include "mongo/logv2/log.h"
#include "mongo/util/duration.h"
#include "mongo/util/time_support.h"
@@ -511,12 +512,15 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje
std::unique_ptr<MatchExpression>
DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField(
const MatchExpression* matchExpr) const {
- return BucketSpec::createPredicatesOnBucketLevelField(matchExpr,
- _bucketUnpacker.bucketSpec(),
- _bucketMaxSpanSeconds,
- pExpCtx->collationMatchesDefault,
- pExpCtx,
- _assumeNoMixedSchemaData);
+ return BucketSpec::createPredicatesOnBucketLevelField(
+ matchExpr,
+ _bucketUnpacker.bucketSpec(),
+ _bucketMaxSpanSeconds,
+ pExpCtx->collationMatchesDefault,
+ pExpCtx,
+ haveComputedMetaField(),
+ _assumeNoMixedSchemaData,
+ BucketSpec::IneligiblePredicatePolicy::kIgnore);
}
std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
@@ -645,6 +649,12 @@ DocumentSourceInternalUnpackBucket::rewriteGroupByMinMax(Pipeline::SourceContain
return {};
}
+bool DocumentSourceInternalUnpackBucket::haveComputedMetaField() const {
+ return _bucketUnpacker.bucketSpec().metaField() &&
+ _bucketUnpacker.bucketSpec().fieldIsComputed(
+ _bucketUnpacker.bucketSpec().metaField().get());
+}
+
Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
@@ -655,8 +665,7 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi
// Some optimizations may not be safe to do if we have computed the metaField via an $addFields
// or a computed $project. We won't do those optimizations if 'haveComputedMetaField' is true.
- bool haveComputedMetaField = _bucketUnpacker.bucketSpec().metaField() &&
- _bucketUnpacker.bucketSpec().fieldIsComputed(_bucketUnpacker.bucketSpec().metaField().get());
+ bool haveComputedMetaField = this->haveComputedMetaField();
// Before any other rewrites for the current stage, consider reordering with $sort.
if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get())) {
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 0c5a17b4e23..bebe67b4a20 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -190,6 +190,7 @@ public:
private:
GetNextResult doGetNext() final;
+ bool haveComputedMetaField() const;
// If buckets contained a mixed type schema along some path, we have to push down special
// predicates in order to ensure correctness.
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
index d3a3030be8d..ffe291dcbd0 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
@@ -57,8 +57,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -78,8 +78,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -99,8 +99,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -120,8 +120,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -142,8 +142,50 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}},"
"{'control.max.a': {$_internalExprGte: 1}}]},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
+}
+
+TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
+ OptimizeMapsINPredicatesOnControlField) {
+ auto pipeline =
+ Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: "
+ "'time', bucketMaxSpanSeconds: 3600}}"),
+ fromjson("{$match: {a: {$in: [1, 2]}}}")),
+ getExpCtx());
+ auto& container = pipeline->getSources();
+
+ ASSERT_EQ(pipeline->getSources().size(), 2U);
+
+ auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
+ auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
+ ->createPredicatesOnBucketLevelField(original->getMatchExpression());
+
+ ASSERT(predicate);
+ auto expected = fromjson(
+ "{$or: ["
+ " {$or: ["
+ " {$and: ["
+ " {'control.min.a': {$_internalExprLte: 1}},"
+ " {'control.max.a': {$_internalExprGte: 1}}"
+ " ]},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]},"
+ " {$or: ["
+ " {$and: ["
+ " {'control.min.a': {$_internalExprLte: 2}},"
+ " {'control.max.a': {$_internalExprGte: 2}}"
+ " ]},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]}"
+ "]}");
+ ASSERT_BSONOBJ_EQ(predicate->serialize(true), expected);
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -168,8 +210,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -194,8 +236,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -220,8 +262,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -246,8 +288,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -273,8 +315,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}},"
"{'control.max.a': {$_internalExprGte: 1}}]},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -294,11 +336,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
- "{$type: [ \"$control.max.b\" ]} ]}} ]} ]},"
+ "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
+ "{$type: [ \"$control.max.b\" ]} ]}} ]},"
"{$or: [ {'control.min.a': {$_internalExprLt: 5}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -335,9 +377,13 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
->createPredicatesOnBucketLevelField(original->getMatchExpression());
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
- fromjson("{$or: [ {'control.max.b': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
- "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}"));
+ fromjson("{$or: ["
+ " {'control.max.b': {$_internalExprGt: 1}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.b\" ]},"
+ " {$type: [ \"$control.max.b\" ]}"
+ " ]}}"
+ "]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -358,14 +404,132 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
- "{$type: [ \"$control.max.b\" ]} ]}} ]} ]},"
+ "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
+ "{$type: [ \"$control.max.b\" ]} ]}} ]},"
"{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
- "{$type: [ \"$control.max.b\" ]} ]}} ]} ]},"
+ "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
+ "{$type: [ \"$control.max.b\" ]} ]}} ]},"
"{$or: [ {'control.min.a': {$_internalExprLt: 5}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]} ]}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}"));
+}
+
+TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
+ OptimizeMapsOrWithPushableChildrenOnControlField) {
+ auto pipeline =
+ Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: "
+ "'time', bucketMaxSpanSeconds: 3600}}"),
+ fromjson("{$match: {$or: [{b: {$gt: 1}}, {a: {$lt: 5}}]}}")),
+ getExpCtx());
+ auto& container = pipeline->getSources();
+
+ ASSERT_EQ(pipeline->getSources().size(), 2U);
+
+ auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
+ auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
+ ->createPredicatesOnBucketLevelField(original->getMatchExpression());
+
+ ASSERT(predicate);
+ ASSERT_BSONOBJ_EQ(predicate->serialize(true),
+ fromjson("{$or: ["
+ " {$or: ["
+ " {'control.max.b': {$_internalExprGt: 1}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.b\" ]},"
+ " {$type: [ \"$control.max.b\" ]}"
+ " ]}}"
+ " ]},"
+ " {$or: ["
+ " {'control.min.a': {$_internalExprLt: 5}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]}"
+ "]}"));
+}
+
+TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
+ OptimizeDoesNotMapOrWithUnpushableChildrenOnControlField) {
+ auto pipeline =
+ Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: "
+ "'time', bucketMaxSpanSeconds: 3600}}"),
+ fromjson("{$match: {$or: [{b: {$ne: 1}}, {a: {$ne: 5}}]}}")),
+ getExpCtx());
+ auto& container = pipeline->getSources();
+
+ ASSERT_EQ(pipeline->getSources().size(), 2U);
+
+ auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
+ auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
+ ->createPredicatesOnBucketLevelField(original->getMatchExpression());
+
+ ASSERT(predicate == nullptr);
+}
+
+TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
+ OptimizeDoesNotMapOrWithPushableAndUnpushableChildrenOnControlField) {
+ auto pipeline =
+ Pipeline::parse(makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: "
+ "'time', bucketMaxSpanSeconds: 3600}}"),
+ fromjson("{$match: {$or: [{b: {$gt: 1}}, {a: {$ne: 5}}]}}")),
+ getExpCtx());
+ auto& container = pipeline->getSources();
+
+ ASSERT_EQ(pipeline->getSources().size(), 2U);
+
+ auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
+ auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
+ ->createPredicatesOnBucketLevelField(original->getMatchExpression());
+
+ // When a predicate can't be pushed down, it's the same as pushing down a trivially-true
+ // predicate. So when any child of an $or can't be pushed down, we could generate something like
+ // {$or: [ ... {$alwaysTrue: {}}, ... ]}, but then we might as well not push down the whole $or.
+ ASSERT(predicate == nullptr);
+}
+
+TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
+ OptimizeMapsNestedOrWithPushableChildrenOnControlField) {
+ auto pipeline = Pipeline::parse(
+ makeVector(
+ fromjson("{$_internalUnpackBucket: {exclude: [], timeField: 'time', "
+ "bucketMaxSpanSeconds: 3600}}"),
+ fromjson("{$match: {$or: [{b: {$gte: 2}}, {$or: [{b: {$gt: 1}}, {a: {$lt: 5}}]}]}}")),
+ getExpCtx());
+ auto& container = pipeline->getSources();
+
+ ASSERT_EQ(pipeline->getSources().size(), 2U);
+
+ auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
+ auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
+ ->createPredicatesOnBucketLevelField(original->getMatchExpression());
+
+ ASSERT_BSONOBJ_EQ(predicate->serialize(true),
+ fromjson("{$or: ["
+ " {$or: ["
+ " {'control.max.b': {$_internalExprGte: 2}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.b\" ]},"
+ " {$type: [ \"$control.max.b\" ]}"
+ " ]}}"
+ " ]},"
+ " {$or: ["
+ " {$or: ["
+ " {'control.max.b': {$_internalExprGt: 1}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.b\" ]},"
+ " {$type: [ \"$control.max.b\" ]}"
+ " ]}}"
+ " ]},"
+ " {$or: ["
+ " {'control.min.a': {$_internalExprLt: 5}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]}"
+ " ]}"
+ "]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -410,14 +574,14 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
ASSERT_BSONOBJ_EQ(
stages[0],
fromjson("{$match: {$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
- "{$type: [ \"$control.max.b\" ]} ]}} ]} ]},"
+ "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]},"
+ "{$type: [ \"$control.max.b\" ]} ]}} ]},"
"{$or: [ {'control.max.c': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.c\" ]},"
- "{$type: [ \"$control.max.c\" ]} ]}} ]} ]},"
+ "{$expr: {$ne: [ {$type: [ \"$control.min.c\" ]},"
+ "{$type: [ \"$control.max.c\" ]} ]}} ]},"
"{$or: [ {'control.min.a': {$_internalExprLt: 5}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"));
+ "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"));
ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj);
ASSERT_BSONOBJ_EQ(stages[2], matchObj);
}
@@ -491,7 +655,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
->createPredicatesOnBucketLevelField(original->getMatchExpression());
- ASSERT(predicate == nullptr);
+ // Meta predicates are mapped to the meta field, not the control min/max fields.
+ ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{meta: {$gt: 5}}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -509,7 +674,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
->createPredicatesOnBucketLevelField(original->getMatchExpression());
- ASSERT(predicate == nullptr);
+ // Meta predicates are mapped to the meta field, not the control min/max fields.
+ ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{'meta.foo': {$gt: 5}}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
@@ -528,9 +694,16 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
->createPredicatesOnBucketLevelField(original->getMatchExpression());
ASSERT_BSONOBJ_EQ(predicate->serialize(true),
- fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}"));
+ fromjson("{$and: ["
+ " {$or: ["
+ " {'control.max.a': {$_internalExprGt: 1}},"
+ " {$expr: {$ne: ["
+ " {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]},"
+ " {meta: {$eq: 5}}"
+ "]}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePredicatesOnId) {
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp
index 7bf4bde0eb0..cfe92c09f5f 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp
@@ -84,26 +84,45 @@ TEST_F(OptimizePipeline, MetaMatchPushedDown) {
ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
}
-TEST_F(OptimizePipeline, MixedMatchOnlyControlPredicatesPushedDown) {
+TEST_F(OptimizePipeline, MixedMatchOr) {
auto unpack = fromjson(
"{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta', "
"bucketMaxSpanSeconds: 3600}}");
auto match = fromjson(
- "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': "
- "{$gt: 1}}, {y: {$lt: 1}}]}]}}");
+ "{$match: {$and: ["
+ " {x: {$lte: 1}},"
+ " {$or: ["
+ // This $or is mixed: it contains both metadata and metric predicates.
+ " {'myMeta.a': {$gt: 1}},"
+ " {y: {$lt: 1}}"
+ " ]}"
+ "]}}");
auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx());
ASSERT_EQ(2u, pipeline->getSources().size());
pipeline->optimizePipeline();
- // We should fail to push down the $match on meta because of the $or clause. We should still be
- // able to map the predicate on 'x' to a predicate on the control field.
auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner);
ASSERT_EQ(3u, stages.size());
- ASSERT_BSONOBJ_EQ(fromjson("{$match: {$or: [ {'control.min.x': {$_internalExprLte: 1}},"
- "{$expr: {$ne: [ {$type: [ \"$control.min.x\" ]},"
- "{$type: [ \"$control.max.x\" ] } ] } } ] }}"),
- stages[0].getDocument().toBson());
+ auto expected = fromjson(
+ "{$match: {$and: ["
+ // Result of pushing down {x: {$lte: 1}}.
+ " {$or: ["
+ " {'control.min.x': {$_internalExprLte: 1}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]},"
+ " {$type: [ \"$control.max.x\" ]}"
+ " ]}}"
+ " ]},"
+ // Result of pushing down the $or predicate.
+ " {$or: ["
+ " {'meta.a': {$gt: 1}},"
+ " {'control.min.y': {$_internalExprLt: 1}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.y\" ]},"
+ " {$type: [ \"$control.max.y\" ]}"
+ " ]}}"
+ " ]}"
+ "]}}");
+ ASSERT_BSONOBJ_EQ(expected, stages[0].getDocument().toBson());
ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson());
ASSERT_BSONOBJ_EQ(match, stages[2].getDocument().toBson());
}
@@ -113,20 +132,21 @@ TEST_F(OptimizePipeline, MixedMatchOnlyMetaMatchPushedDown) {
"{$_internalUnpackBucket: { exclude: [], timeField: 'time', metaField: 'myMeta', "
"bucketMaxSpanSeconds: 3600}}");
auto pipeline = Pipeline::parse(
- makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$in: [1, 2, 3]}}}")),
+ makeVector(unpack,
+ fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$type: \"string\"}}}")),
getExpCtx());
ASSERT_EQ(2u, pipeline->getSources().size());
pipeline->optimizePipeline();
// We should push down the $match on the metaField but not the predicate on '$a', which is
- // ineligible because of the $in.
+ // ineligible because of the $type.
auto serialized = pipeline->serializeToBson();
ASSERT_EQ(3u, serialized.size());
ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}}"),
serialized[0]);
ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
- ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$in: [1, 2, 3]}}}"), serialized[2]);
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$type: [ 2 ]}}}"), serialized[2]);
}
TEST_F(OptimizePipeline, MultipleMatchesPushedDown) {
@@ -242,11 +262,17 @@ TEST_F(OptimizePipeline, SortThenMixedMatchPushedDown) {
// We should push down both the $sort and parts of the $match.
auto serialized = pipeline->serializeToBson();
ASSERT_EQ(4u, serialized.size());
- ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: 'abc'}},"
- "{$or: [ {'control.max.a': {$_internalExprGte: 5}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"),
- serialized[0]);
+ auto expected = fromjson(
+ "{$match: {$and: ["
+ " {meta: {$eq: 'abc'}},"
+ " {$or: ["
+ " {'control.max.a': {$_internalExprGte: 5}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]}"
+ "]}}");
+ ASSERT_BSONOBJ_EQ(expected, serialized[0]);
ASSERT_BSONOBJ_EQ(fromjson("{$sort: {meta: -1}}"), serialized[1]);
ASSERT_BSONOBJ_EQ(unpack, serialized[2]);
ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$gte: 5}}}"), serialized[3]);
@@ -437,7 +463,7 @@ TEST_F(OptimizePipeline, ComputedProjectThenMetaMatchNotPushedDown) {
pipeline->optimizePipeline();
- // We should push down both the project and internalize the remaining project, but we can't
+ // We should both push down the project and internalize the remaining project, but we can't
// push down the meta match due to the (now invalid) renaming.
auto serialized = pipeline->serializeToBson();
ASSERT_EQ(3u, serialized.size());
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
index 5e492dba424..8f31d177a53 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
@@ -261,10 +261,16 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMaps
// $_internalUnpackBucket and merged.
auto serialized = pipeline->serializeToBson();
ASSERT_EQ(3u, serialized.size());
- ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, "
- "{$or: [ {'control.min.a': {$_internalExprLte: 4}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
- "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"),
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: ["
+ " {meta: {$gte: 0}},"
+ " {meta: {$lte: 5}},"
+ " {$or: ["
+ " {'control.min.a': {$_internalExprLte: 4}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]},"
+ " {$type: [ \"$control.max.a\" ]}"
+ " ]}}"
+ " ]}"
+ "]}}"),
serialized[0]);
ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]);
@@ -293,8 +299,13 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
"{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta', "
"bucketMaxSpanSeconds: 3600}}");
auto match = fromjson(
- "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': "
- "{$gt: 1}}, {y: {$lt: 1}}]}]}}");
+ "{$match: {$and: ["
+ " {x: {$lte: 1}},"
+ " {$or: ["
+ " {'myMeta.a': {$gt: 1}},"
+ " {y: {$lt: 1}}"
+ " ]}"
+ "]}}");
auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx());
ASSERT_EQ(2u, pipeline->getSources().size());
@@ -304,10 +315,25 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
// map the predicate on 'x' to a predicate on the control field.
auto serialized = pipeline->serializeToBson();
ASSERT_EQ(3u, serialized.size());
- ASSERT_BSONOBJ_EQ(fromjson("{$match: {$or: [ {'control.min.x': {$_internalExprLte: 1}},"
- "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]},"
- "{$type: [ \"$control.max.x\" ]} ]}} ]} ]}}"),
- serialized[0]);
+ auto expected = fromjson(
+ "{$match: {$and: ["
+ // Result of pushing down {x: {$lte: 1}}.
+ " {$or: ["
+ " {'control.min.x': {$_internalExprLte: 1}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]},"
+ " {$type: [ \"$control.max.x\" ]} ]}}"
+ " ]},"
+ // Result of pushing down {$or ... myMeta.a ... y ...}.
+ " {$or: ["
+ " {'meta.a': {$gt: 1}},"
+ " {$or: ["
+ " {'control.min.y': {$_internalExprLt: 1}},"
+ " {$expr: {$ne: [ {$type: [ \"$control.min.y\" ]},"
+ " {$type: [ \"$control.max.y\" ]} ]}}"
+ " ]}"
+ " ]}"
+ "]}}");
+ ASSERT_BSONOBJ_EQ(expected, serialized[0]);
ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
ASSERT_BSONOBJ_EQ(match, serialized[2]);
}
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index a9cc40ef985..84745946b65 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -56,6 +56,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/pipeline/pipeline',
],
)
diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
index e857ab8a3a8..dc6714f9ab1 100644
--- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
+++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
@@ -33,6 +33,9 @@
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_names.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/storage/storage_parameters_gen.h"
#include "mongo/db/timeseries/timeseries_constants.h"
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
@@ -83,12 +86,59 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
if (elem.fieldNameStringData() == IndexDescriptor::kPartialFilterExprFieldName) {
if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest()) {
- // Partial indexes are not supported in FCV < 5.2.
isBucketsIndexSpecCompatibleForDowngrade = false;
} else {
uasserted(ErrorCodes::InvalidOptions,
"Partial indexes are not supported on time-series collections");
}
+
+ uassert(ErrorCodes::CannotCreateIndex,
+ "Partial indexes on time-series collections require FCV 5.3",
+ feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility));
+ BSONObj pred = elem.Obj();
+
+ // If the createIndexes command specifies a collation for this index, then that
+ // collation affects how we should interpret expressions in the partial filter
+ // ($gt, $lt, etc).
+ if (auto collatorSpec = origIndex[NewIndexSpec::kCollationFieldName]) {
+ uasserted(
+ 5916300,
+ std::string{"On a time-series collection, partialFilterExpression and "} +
+ NewIndexSpec::kCollationFieldName + " arguments are incompatible"_sd);
+ }
+ // Since no collation was specified in the command, we know the index collation will
+ // match the collection's collation.
+ auto collationMatchesDefault = ExpressionContext::CollationMatchesDefault::kYes;
+
+ // Even though the index collation will match the collection's collation, we don't
+ // know whether or not that collation is simple. However, I think we can correctly
+ // rewrite the filter expression without knowing this... Looking up the correct
+ // value would require handling mongos and mongod separately.
+ std::unique_ptr<CollatorInterface> collator{nullptr};
+
+ auto expCtx = make_intrusive<ExpressionContext>(opCtx, std::move(collator), origNs);
+ expCtx->collationMatchesDefault = collationMatchesDefault;
+
+ // partialFilterExpression is evaluated against a collection, so there are no
+ // computed fields.
+ bool haveComputedMetaField = false;
+
+ // As part of building the index, we verify that the collection does not contain
+ // any mixed-schema buckets. So by the time the index is visible to the query
+ // planner, this will be true.
+ bool assumeNoMixedSchemaData = true;
+
+ BSONObj bucketPred =
+ BucketSpec::pushdownPredicate(expCtx,
+ options,
+ collationMatchesDefault,
+ pred,
+ haveComputedMetaField,
+ assumeNoMixedSchemaData,
+ BucketSpec::IneligiblePredicatePolicy::kError);
+ builder.append(IndexDescriptor::kPartialFilterExprFieldName, bucketPred);
+ continue;
}
if (elem.fieldNameStringData() == IndexDescriptor::kSparseFieldName) {
@@ -149,6 +199,9 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
std::move(bucketsIndexSpecWithStatus.getValue()));
continue;
}
+
+ // Any index option that's not explicitly banned, and not handled specially, we pass
+ // through unchanged.
builder.append(elem);
}
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
index 570ab781281..1dff8c06341 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
@@ -32,6 +32,8 @@
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
#include "mongo/db/index_names.h"
+#include "mongo/db/matcher/expression_algo.h"
+#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/storage/storage_parameters_gen.h"
#include "mongo/db/timeseries/timeseries_constants.h"
#include "mongo/db/timeseries/timeseries_gen.h"
@@ -400,11 +402,13 @@ bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeserie
/*timeseriesMetricIndexesFeatureFlagEnabled=*/false) != boost::none;
}
-bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseriesOptions,
- const BSONObj& bucketsIndex) {
- if (!bucketsIndex.hasField(kKeyFieldName)) {
- return false;
- }
+bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx,
+ const NamespaceString& bucketNs,
+ const TimeseriesOptions& timeseriesOptions,
+ const BSONObj& bucketsIndex) {
+ tassert(5916306,
+ str::stream() << "Index spec has no 'key': " << bucketsIndex.toString(),
+ bucketsIndex.hasField(kKeyFieldName));
auto timeField = timeseriesOptions.getTimeField();
auto metaField = timeseriesOptions.getMetaField();
@@ -413,22 +417,58 @@ bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseries
<< timeseries::kControlMinFieldNamePrefix << timeField;
const std::string controlMaxTimeField = str::stream()
<< timeseries::kControlMaxFieldNamePrefix << timeField;
+ static const std::string idField = "_id";
- const BSONObj keyObj = bucketsIndex.getField(kKeyFieldName).Obj();
- for (const auto& elem : keyObj) {
- if (elem.fieldNameStringData() == controlMinTimeField ||
- elem.fieldNameStringData() == controlMaxTimeField) {
- continue;
+ auto isMeasurementField = [&](StringData name) -> bool {
+ if (name == controlMinTimeField || name == controlMaxTimeField) {
+ return false;
}
if (metaField) {
- if (elem.fieldNameStringData() == timeseries::kBucketMetaFieldName ||
- elem.fieldNameStringData().startsWith(timeseries::kBucketMetaFieldName + ".")) {
- continue;
+ if (name == timeseries::kBucketMetaFieldName ||
+ name.startsWith(timeseries::kBucketMetaFieldName + ".")) {
+ return false;
}
}
return true;
+ };
+
+ // Check index key.
+ const BSONObj keyObj = bucketsIndex.getField(kKeyFieldName).Obj();
+ for (const auto& elem : keyObj) {
+ if (isMeasurementField(elem.fieldNameStringData()))
+ return true;
+ }
+
+ // Check partial filter expression.
+ if (auto filterElem = bucketsIndex[kPartialFilterExpressionFieldName]) {
+ tassert(5916302,
+ str::stream() << "Partial filter expression is not an object: " << filterElem,
+ filterElem.type() == BSONType::Object);
+
+ auto expCtx = make_intrusive<ExpressionContext>(opCtx, nullptr /* collator */, bucketNs);
+
+ MatchExpressionParser::AllowedFeatureSet allowedFeatures =
+ MatchExpressionParser::kDefaultSpecialFeatures;
+
+ // TODO SERVER-53380 convert to tassertStatusOK.
+ auto statusWithFilter = MatchExpressionParser::parse(
+ filterElem.Obj(), expCtx, ExtensionsCallbackNoop{}, allowedFeatures);
+ tassert(5916303,
+ str::stream() << "Partial filter expression failed to parse: "
+ << statusWithFilter.getStatus(),
+ statusWithFilter.isOK());
+ auto filter = std::move(statusWithFilter.getValue());
+
+ if (!expression::isOnlyDependentOn(*filter,
+ {std::string{timeseries::kBucketMetaFieldName},
+ controlMinTimeField,
+ controlMaxTimeField,
+ idField})) {
+ // Partial filter expression depends on a non-time, non-metadata field.
+ return true;
+ }
}
return false;
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
index f51e52db0bc..ca9fc2e1a77 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
@@ -77,11 +77,15 @@ bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeserie
const BSONObj& bucketsIndex);
/**
- * Returns true if 'bucketsIndex' contains a key on a measurement field, excluding the time field.
- * This is helpful to detect if the 'bucketsIndex' contains a key on a field that was allowed to
- * have mixed-schema data in MongoDB versions < 5.2.
+ * Returns true if 'bucketsIndex' uses a measurement field, excluding the time field. Checks both
+ * the index key and the partialFilterExpression, if present.
+ *
+ * This is helpful to detect if the 'bucketsIndex' relies on a field that was allowed to have
+ * mixed-schema data in MongoDB versions < 5.2.
*/
-bool doesBucketsIndexIncludeKeyOnMeasurement(const TimeseriesOptions& timeseriesOptions,
- const BSONObj& bucketsIndex);
+bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx,
+ const NamespaceString& bucketNs,
+ const TimeseriesOptions& timeseriesOptions,
+ const BSONObj& bucketsIndex);
} // namespace mongo::timeseries
diff --git a/src/mongo/shell/types.js b/src/mongo/shell/types.js
index 571f40d8af6..6a0f78dad6a 100644
--- a/src/mongo/shell/types.js
+++ b/src/mongo/shell/types.js
@@ -276,6 +276,8 @@ Object.extend = function(dst, src, deep) {
eval("v = " + tojson(v));
} else if ("floatApprox" in v) { // convert NumberLong properly
eval("v = " + tojson(v));
+ } else if (v.constructor === Date) { // convert Date properly
+ eval("v = " + tojson(v));
} else {
v = Object.extend(typeof (v.length) == "number" ? [] : {}, v, true);
}