diff options
author | Rui Liu <rui.liu@mongodb.com> | 2022-11-02 21:06:09 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-02 22:14:44 +0000 |
commit | 660a4274bf88a46734619a6606c320da30e868ab (patch) | |
tree | 8fe77dd4470c892a53b57ed9be979ca4a74d1724 | |
parent | a29be1cf33df71c62ebbcc55ec1c1d870c46e297 (diff) | |
download | mongo-660a4274bf88a46734619a6606c320da30e868ab.tar.gz |
SERVER-70269 Push match predicates into unpack stage
10 files changed, 1184 insertions, 249 deletions
diff --git a/jstests/core/timeseries/nondefault_collation.js b/jstests/core/timeseries/nondefault_collation.js index 307d40cbfc3..4e791a81dd3 100644 --- a/jstests/core/timeseries/nondefault_collation.js +++ b/jstests/core/timeseries/nondefault_collation.js @@ -44,7 +44,7 @@ const englishCollation = { }; const simpleCollation = { - collation: {locale: "simple"} + locale: "simple" }; assert.commandWorked(db.createCollection(coll.getName(), { @@ -127,17 +127,17 @@ assert.commandWorked(coll.insert( // its metadata using simple collation. These tests confirm that queries on the indexed field using // nondefault (simple) collation use the index. They also confirm that queries that don't involve // strings but do use default collation, on indexed fields, also use the index. -const nonDefaultCollationQuery = coll.find({meta: 2}, {collation: englishCollation}).explain(); +const nonDefaultCollationQuery = coll.find({meta: 2}).collation(englishCollation).explain(); assert(aggPlanHasStage(nonDefaultCollationQuery, "IXSCAN"), nonDefaultCollationQuery); -const simpleNonDefaultCollationQuery = coll.find({meta: 2}, simpleCollation).explain(); +const simpleNonDefaultCollationQuery = coll.find({meta: 2}).collation(simpleCollation).explain(); assert(aggPlanHasStage(simpleNonDefaultCollationQuery, "IXSCAN"), simpleNonDefaultCollationQuery); -const defaultCollationQuery = coll.find({meta: 1}, {collation: defaultCollation}).explain(); +const defaultCollationQuery = coll.find({meta: 1}).collation(defaultCollation).explain(); assert(aggPlanHasStage(defaultCollationQuery, "IXSCAN"), defaultCollationQuery); // This test guarantees that the bucket's min/max matches the query's min/max regardless of // collation. -results = coll.find({value: {$gt: "4"}}, simpleCollation); -assert.eq(4, results.itcount()); +results = coll.find({value: {$gt: "4"}}).collation(simpleCollation); +assert.eq(1, results.itcount()); }()); diff --git a/jstests/core/timeseries/timeseries_match_pushdown.js b/jstests/core/timeseries/timeseries_match_pushdown.js new file mode 100644 index 00000000000..7fe13e878a6 --- /dev/null +++ b/jstests/core/timeseries/timeseries_match_pushdown.js @@ -0,0 +1,411 @@ +/** + * Tests that the $match stage followed by unpacking stage has been pushed down with correct + * predicates. + * + * @tags: [ + * requires_timeseries, + * requires_fcv_62, + * does_not_support_stepdowns, + * directly_against_shardsvrs_incompatible, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For getAggPlanStages + +const coll = db.timeseries_match_pushdown; +coll.drop(); + +const timeField = 'time'; +const metaField = 'meta'; +const measureField = 'a'; +assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField, metaField}})); + +// Insert documents into the collection. The bucketing is designed so that some buckets match the +// query entirely, some buckets match the query partially, and some with no matches. +assert.commandWorked(coll.insert([ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, +])); +const aTime = ISODate('2022-01-01T00:00:03'); +const bTime = ISODate('2022-01-01T00:00:07'); +const bMeta = 3; +const aMeasure = 3; + +/** + * Runs a $match query with the specified 'eventFilter' or a 'pipeline'. + * Assert the 'wholeBucketFilter' is attached correctly to the unpacking stage, and has the expected + * result 'expectedDocs'. + */ +const runTest = function({pipeline, eventFilter, wholeBucketFilter, expectedDocs}) { + if (!pipeline) { + pipeline = [{$match: eventFilter}]; + } + const explain = assert.commandWorked(coll.explain().aggregate(pipeline)); + const unpackStages = getAggPlanStages(explain, '$_internalUnpackBucket'); + assert.eq(1, + unpackStages.length, + "Should only have a single $_internalUnpackBucket stage: " + tojson(explain)); + const unpackStage = unpackStages[0].$_internalUnpackBucket; + assert.docEq(unpackStage.eventFilter, eventFilter, "Incorrect eventFilter: " + tojson(explain)); + if (wholeBucketFilter) { + assert.docEq(unpackStage.wholeBucketFilter, + wholeBucketFilter, + "Incorrect wholeBucketFilter: " + tojson(explain)); + } else { + assert(!unpackStage.wholeBucketFilter, "Incorrect wholeBucketFilter: " + tojson(explain)); + } + + const docs = coll.aggregate([...pipeline, {$sort: {time: 1}}]).toArray(); + assert.eq(docs.length, expectedDocs.length, "Incorrect docs: " + tojson(docs)); + expectedDocs.forEach((doc, i) => { + assert.docEq(doc, expectedDocs[i], "Incorrect docs: " + tojson(docs)); + }); +}; + +const minTimeField = `control.min.${timeField}`; +const maxTimeField = `control.max.${timeField}`; + +// $gt on time +runTest({ + eventFilter: {[timeField]: {$gt: aTime}}, + wholeBucketFilter: {[minTimeField]: {$gt: aTime}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $gt on measurement +runTest({ + eventFilter: {[measureField]: {$gt: aMeasure}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $gt in $expr on time +runTest({ + pipeline: [{$match: {$expr: {$gt: [`$${timeField}`, {$const: aTime}]}}}], + eventFilter: { + $and: [ + {[timeField]: {$_internalExprGt: aTime}}, + {$expr: {$gt: [`$${timeField}`, {$const: aTime}]}}, + ] + }, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$_internalExprGt: aTime}}, + {[minTimeField]: {$_internalExprGt: aTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $gte on time +runTest({ + eventFilter: {[timeField]: {$gte: aTime}}, + wholeBucketFilter: {[minTimeField]: {$gte: aTime}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $gte on measurement +runTest({ + eventFilter: {[measureField]: {$gte: aMeasure}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $gte in $expr on time +runTest({ + pipeline: [{$match: {$expr: {$gte: [`$${timeField}`, {$const: aTime}]}}}], + eventFilter: { + $and: [ + {[timeField]: {$_internalExprGte: aTime}}, + {$expr: {$gte: [`$${timeField}`, {$const: aTime}]}}, + ] + }, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$_internalExprGte: aTime}}, + {[minTimeField]: {$_internalExprGte: aTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $lt on time +runTest({ + eventFilter: {[timeField]: {$lt: aTime}}, + wholeBucketFilter: {[maxTimeField]: {$lt: aTime}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + ], +}); + +// $lt on measurement +runTest({ + eventFilter: {[measureField]: {$lt: aMeasure}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + ], +}); + +// $lt in $expr on time +runTest({ + pipeline: [{$match: {$expr: {$lt: [`$${timeField}`, {$const: aTime}]}}}], + eventFilter: { + $and: [ + {[timeField]: {$_internalExprLt: aTime}}, + {$expr: {$lt: [`$${timeField}`, {$const: aTime}]}}, + ] + }, + wholeBucketFilter: { + $and: [ + {[maxTimeField]: {$_internalExprLt: aTime}}, + {[maxTimeField]: {$_internalExprLt: aTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + ], +}); + +// $lte on time +runTest({ + eventFilter: {[timeField]: {$lte: aTime}}, + wholeBucketFilter: {[maxTimeField]: {$lte: aTime}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $lte in $expr on time +runTest({ + pipeline: [{$match: {$expr: {$lte: [`$${timeField}`, {$const: aTime}]}}}], + eventFilter: { + $and: [ + {[timeField]: {$_internalExprLte: aTime}}, + {$expr: {$lte: [`$${timeField}`, {$const: aTime}]}}, + ] + }, + wholeBucketFilter: { + $and: [ + {[maxTimeField]: {$_internalExprLte: aTime}}, + {[maxTimeField]: {$_internalExprLte: aTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $lte on measurement +runTest({ + eventFilter: {[measureField]: {$lte: aMeasure}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $eq on time +runTest({ + eventFilter: {[timeField]: {$eq: aTime}}, + wholeBucketFilter: {$and: [{[minTimeField]: {$eq: aTime}}, {[maxTimeField]: {$eq: aTime}}]}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $eq in $expr on time +runTest({ + pipeline: [{$match: {$expr: {$eq: [`$${timeField}`, {$const: aTime}]}}}], + eventFilter: { + $and: [ + {[timeField]: {$_internalExprEq: aTime}}, + {$expr: {$eq: [`$${timeField}`, {$const: aTime}]}}, + ] + }, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$_internalExprEq: aTime}}, + {[maxTimeField]: {$_internalExprEq: aTime}}, + {[minTimeField]: {$_internalExprEq: aTime}}, + {[maxTimeField]: {$_internalExprEq: aTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $eq on measurement +runTest({ + eventFilter: {[measureField]: {$eq: aMeasure}}, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + ], +}); + +// $and on time +runTest({ + eventFilter: {$and: [{[timeField]: {$gt: aTime}}, {[timeField]: {$lt: bTime}}]}, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$gt: aTime}}, + {[maxTimeField]: {$lt: bTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + ], +}); + +// $or on time +runTest({ + eventFilter: {$or: [{[timeField]: {$lte: aTime}}, {[timeField]: {$gte: bTime}}]}, + wholeBucketFilter: { + $or: [ + {[maxTimeField]: {$lte: aTime}}, + {[minTimeField]: {$gte: bTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// $match on time and meta +runTest({ + pipeline: [{$match: {$and: [{[timeField]: {$gt: aTime}}, {[metaField]: {$lte: bMeta}}]}}], + eventFilter: {[timeField]: {$gt: aTime}}, + wholeBucketFilter: { + [minTimeField]: {$gt: aTime}, + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:07'), [measureField]: 7, [metaField]: 3}, + {[timeField]: ISODate('2022-01-01T00:00:08'), [measureField]: 8, [metaField]: 3}, + ], +}); + +// $match on time or meta +runTest({ + eventFilter: {$or: [{[timeField]: {$lte: aTime}}, {[metaField]: {$gt: bMeta}}]}, + wholeBucketFilter: { + $or: [ + {[maxTimeField]: {$lte: aTime}}, + {[metaField]: {$gt: bMeta}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:01'), [measureField]: 1, [metaField]: 0}, + {[timeField]: ISODate('2022-01-01T00:00:02'), [measureField]: 2, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:03'), [measureField]: 3, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:09'), [measureField]: 9, [metaField]: 4}, + ], +}); + +// double $match +runTest({ + pipeline: [{$match: {[timeField]: {$gt: aTime}}}, {$match: {[timeField]: {$lt: bTime}}}], + eventFilter: {$and: [{[timeField]: {$gt: aTime}}, {[timeField]: {$lt: bTime}}]}, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$gt: aTime}}, + {[maxTimeField]: {$lt: bTime}}, + ] + }, + expectedDocs: [ + {[timeField]: ISODate('2022-01-01T00:00:04'), [measureField]: 4, [metaField]: 1}, + {[timeField]: ISODate('2022-01-01T00:00:05'), [measureField]: 5, [metaField]: 2}, + {[timeField]: ISODate('2022-01-01T00:00:06'), [measureField]: 6, [metaField]: 3}, + ], +}); + +// triple $match +runTest({ + pipeline: [ + {$match: {[timeField]: {$gt: aTime}}}, + {$match: {[timeField]: {$lt: bTime}}}, + {$match: {[timeField]: {$lt: aTime}}}, + ], + eventFilter: { + $and: + [{[timeField]: {$gt: aTime}}, {[timeField]: {$lt: bTime}}, {[timeField]: {$lt: aTime}}] + }, + wholeBucketFilter: { + $and: [ + {[minTimeField]: {$gt: aTime}}, + {[maxTimeField]: {$lt: bTime}}, + {[maxTimeField]: {$lt: aTime}}, + ] + }, + expectedDocs: [], +}); +})(); diff --git a/jstests/core/timeseries/timeseries_match_pushdown_with_project.js b/jstests/core/timeseries/timeseries_match_pushdown_with_project.js new file mode 100644 index 00000000000..f63a4be214d --- /dev/null +++ b/jstests/core/timeseries/timeseries_match_pushdown_with_project.js @@ -0,0 +1,130 @@ +/** + * Tests that the unpacking stage has correct unpacking behaviour when $match is pushed into it. + * + * @tags: [ + * requires_timeseries, + * requires_fcv_62, + * does_not_support_stepdowns, + * directly_against_shardsvrs_incompatible, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For getAggPlanStages + +const coll = db.timeseries_match_pushdown_with_project; +coll.drop(); + +const timeField = 'time'; +const metaField = 'meta'; +assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField, metaField}})); + +const aTime = ISODate('2022-01-01T00:00:00'); +assert.commandWorked(coll.insert([ + {[timeField]: aTime, a: 1, b: 1, _id: 1}, + {[timeField]: aTime, a: 2, b: 2, _id: 2}, + {[timeField]: aTime, a: 3, b: 3, _id: 3}, + {[timeField]: aTime, a: 4, b: 4, _id: 4}, + {[timeField]: aTime, a: 5, b: 5, _id: 5}, + {[timeField]: aTime, a: 6, b: 6, _id: 6}, + {[timeField]: aTime, a: 7, b: 7, _id: 7}, + {[timeField]: aTime, a: 8, b: 8, _id: 8}, + {[timeField]: aTime, a: 9, b: 9, _id: 9}, +])); + +/** + * Runs a 'pipeline', asserts the bucket unpacking 'behaviour' (either include or exclude) is + * expected. + */ +const runTest = function({pipeline, behaviour, expectedDocs}) { + const explain = assert.commandWorked(coll.explain().aggregate(pipeline)); + const unpackStages = getAggPlanStages(explain, '$_internalUnpackBucket'); + assert.eq(1, + unpackStages.length, + "Should only have a single $_internalUnpackBucket stage: " + tojson(explain)); + const unpackStage = unpackStages[0].$_internalUnpackBucket; + if (behaviour.include) { + assert(unpackStage.include, + "Unpacking stage must have 'include' behaviour: " + tojson(explain)); + assert.sameMembers(behaviour.include, unpackStage.include); + } + if (behaviour.exclude) { + assert(unpackStage.exclude, + "Unpacking stage must have 'exclude' behaviour: " + tojson(explain)); + assert.sameMembers(behaviour.exclude, unpackStage.exclude); + } + + const docs = coll.aggregate([...pipeline, {$sort: {a: 1, b: 1, _id: 1}}]).toArray(); + assert.eq(docs.length, expectedDocs.length, "Incorrect docs: " + tojson(docs)); + expectedDocs.forEach((doc, i) => { + assert.docEq(doc, expectedDocs[i], "Incorrect docs: " + tojson(docs)); + }); +}; + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {b: 1}}], + behaviour: {include: ['_id', 'a', 'b']}, + expectedDocs: [ + {a: 6, b: 6, _id: 6}, + {a: 7, b: 7, _id: 7}, + {a: 8, b: 8, _id: 8}, + {a: 9, b: 9, _id: 9}, + ], +}); + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {_id: 0, b: 1}}], + behaviour: {include: ['a', 'b']}, + expectedDocs: [ + {a: 6, b: 6}, + {a: 7, b: 7}, + {a: 8, b: 8}, + {a: 9, b: 9}, + ], +}); + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {a: 1}}], + behaviour: {include: ['_id', 'a']}, + expectedDocs: [ + {a: 6, _id: 6}, + {a: 7, _id: 7}, + {a: 8, _id: 8}, + {a: 9, _id: 9}, + ], +}); + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {_id: 0, a: 1}}], + behaviour: {include: ['a']}, + expectedDocs: [ + {a: 6}, + {a: 7}, + {a: 8}, + {a: 9}, + ], +}); + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {a: 0}}], + behaviour: {exclude: []}, + expectedDocs: [ + {[timeField]: aTime, b: 6, _id: 6}, + {[timeField]: aTime, b: 7, _id: 7}, + {[timeField]: aTime, b: 8, _id: 8}, + {[timeField]: aTime, b: 9, _id: 9}, + ], +}); + +runTest({ + pipeline: [{$match: {a: {$gt: 5}}}, {$project: {b: 0}}], + behaviour: {exclude: []}, + expectedDocs: [ + {[timeField]: aTime, a: 6, _id: 6}, + {[timeField]: aTime, a: 7, _id: 7}, + {[timeField]: aTime, a: 8, _id: 8}, + {[timeField]: aTime, a: 9, _id: 9}, + ], +}); +})(); diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index d2ad79f0e03..6a819181df9 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -41,6 +41,7 @@ #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/expression_tree.h" #include "mongo/db/matcher/extensions_callback_noop.h" +#include "mongo/db/matcher/rewrite_expr.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/timeseries/timeseries_options.h" @@ -130,9 +131,9 @@ std::unique_ptr<MatchExpression> makeOr(std::vector<std::unique_ptr<MatchExpress return std::make_unique<OrMatchExpression>(std::move(nontrivial)); } -std::unique_ptr<MatchExpression> handleIneligible(IneligiblePredicatePolicy policy, - const MatchExpression* matchExpr, - StringData message) { +BucketSpec::BucketPredicate handleIneligible(IneligiblePredicatePolicy policy, + const MatchExpression* matchExpr, + StringData message) { switch (policy) { case IneligiblePredicatePolicy::kError: uasserted( @@ -140,7 +141,7 @@ std::unique_ptr<MatchExpression> handleIneligible(IneligiblePredicatePolicy poli "Error translating non-metadata time-series predicate to operate on buckets: " + message + ": " + matchExpr->serialize().toString()); case IneligiblePredicatePolicy::kIgnore: - return nullptr; + return {}; } MONGO_UNREACHABLE_TASSERT(5916307); } @@ -204,40 +205,32 @@ std::unique_ptr<MatchExpression> createTypeEqualityPredicate( return makeOr(std::move(typeEqualityPredicates)); } -std::unique_ptr<MatchExpression> createComparisonPredicate( - const ComparisonMatchExpressionBase* matchExpr, +boost::optional<StringData> checkComparisonPredicateErrors( + const MatchExpression* matchExpr, + const StringData matchExprPath, + const BSONElement& matchExprData, const BucketSpec& bucketSpec, - int bucketMaxSpanSeconds, - ExpressionContext::CollationMatchesDefault collationMatchesDefault, - boost::intrusive_ptr<ExpressionContext> pExpCtx, - bool haveComputedMetaField, - bool includeMetaField, - bool assumeNoMixedSchemaData, - IneligiblePredicatePolicy policy) { + ExpressionContext::CollationMatchesDefault collationMatchesDefault) { using namespace timeseries; - const auto matchExprPath = matchExpr->path(); - const auto matchExprData = matchExpr->getData(); - // The control field's min and max are chosen using a field-order insensitive comparator, while // MatchExpressions use a comparator that treats field-order as significant. Because of this we // will not perform this optimization on queries with operands of compound types. if (matchExprData.type() == BSONType::Object || matchExprData.type() == BSONType::Array) - return handleIneligible(policy, matchExpr, "operand can't be an object or array"_sd); + return "operand can't be an object or array"_sd; // MatchExpressions have special comparison semantics regarding null, in that {$eq: null} will // match all documents where the field is either null or missing. Because this is different // from both the comparison semantics that InternalExprComparison expressions and the control's // min and max fields use, we will not perform this optimization on queries with null operands. if (matchExprData.type() == BSONType::jstNULL) - return handleIneligible(policy, matchExpr, "can't handle {$eq: null}"_sd); + return "can't handle {$eq: null}"_sd; // The control field's min and max are chosen based on the collation of the collection. If the // query's collation does not match the collection's collation and the query operand is a // string or compound type (skipped above) we will not perform this optimization. if (collationMatchesDefault == ExpressionContext::CollationMatchesDefault::kNo && matchExprData.type() == BSONType::String) { - return handleIneligible( - policy, matchExpr, "can't handle string comparison with a non-default collation"_sd); + return "can't handle string comparison with a non-default collation"_sd; } // This function only handles time and measurement predicates--not metadata. @@ -252,19 +245,45 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( // We must avoid mapping predicates on fields computed via $addFields or a computed $project. if (bucketSpec.fieldIsComputed(matchExprPath.toString())) { - return handleIneligible(policy, matchExpr, "can't handle a computed field"); + return "can't handle a computed field"_sd; } const auto isTimeField = (matchExprPath == bucketSpec.timeField()); if (isTimeField && matchExprData.type() != BSONType::Date) { // Users are not allowed to insert non-date measurements into time field. So this query // would not match anything. We do not need to optimize for this case. - return handleIneligible( - policy, - matchExpr, - "This predicate will never be true, because the time field always contains a Date"); + return "This predicate will never be true, because the time field always contains a Date"_sd; + } + + return boost::none; +} + +std::unique_ptr<MatchExpression> createComparisonPredicate( + const ComparisonMatchExpressionBase* matchExpr, + const BucketSpec& bucketSpec, + int bucketMaxSpanSeconds, + ExpressionContext::CollationMatchesDefault collationMatchesDefault, + boost::intrusive_ptr<ExpressionContext> pExpCtx, + bool haveComputedMetaField, + bool includeMetaField, + bool assumeNoMixedSchemaData, + IneligiblePredicatePolicy policy) { + using namespace timeseries; + const auto matchExprPath = matchExpr->path(); + const auto matchExprData = matchExpr->getData(); + + const auto error = checkComparisonPredicateErrors( + matchExpr, matchExprPath, matchExprData, bucketSpec, collationMatchesDefault); + if (error) { + return handleIneligible(policy, matchExpr, *error).loosePredicate; } + const auto isTimeField = (matchExprPath == bucketSpec.timeField()); + auto minPath = std::string{kControlMinFieldNamePrefix} + matchExprPath; + const StringData minPathStringData(minPath); + auto maxPath = std::string{kControlMaxFieldNamePrefix} + matchExprPath; + const StringData maxPathStringData(maxPath); + BSONObj minTime; BSONObj maxTime; if (isTimeField) { @@ -273,11 +292,6 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( maxTime = BSON("" << timeField + Seconds(bucketMaxSpanSeconds)); } - const auto minPath = std::string{kControlMinFieldNamePrefix} + matchExprPath; - const StringData minPathStringData(minPath); - const auto maxPath = std::string{kControlMaxFieldNamePrefix} + matchExprPath; - const StringData maxPathStringData(maxPath); - switch (matchExpr->matchType()) { case MatchExpression::EQ: case MatchExpression::INTERNAL_EXPR_EQ: @@ -481,9 +495,108 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( MONGO_UNREACHABLE_TASSERT(5348303); } +std::unique_ptr<MatchExpression> createTightComparisonPredicate( + const ComparisonMatchExpressionBase* matchExpr, + const BucketSpec& bucketSpec, + ExpressionContext::CollationMatchesDefault collationMatchesDefault) { + using namespace timeseries; + const auto matchExprPath = matchExpr->path(); + const auto matchExprData = matchExpr->getData(); + + const auto error = checkComparisonPredicateErrors( + matchExpr, matchExprPath, matchExprData, bucketSpec, collationMatchesDefault); + if (error) { + return handleIneligible(BucketSpec::IneligiblePredicatePolicy::kIgnore, matchExpr, *error) + .loosePredicate; + } + + // We have to disable the tight predicate for the measurement field. There might be missing + // values in the measurements and the control fields ignore them on insertion. So we cannot use + // bucket min and max to determine the property of all events in the bucket. For measurement + // fields, there's a further problem that if the control field is an array, we cannot generate + // the tight predicate because the predicate will be implicitly mapped over the array elements. + if (matchExprPath != bucketSpec.timeField()) { + return handleIneligible(BucketSpec::IneligiblePredicatePolicy::kIgnore, + matchExpr, + "can't create tight predicate on non-time field") + .tightPredicate; + } + + auto minPath = std::string{kControlMinFieldNamePrefix} + matchExprPath; + const StringData minPathStringData(minPath); + auto maxPath = std::string{kControlMaxFieldNamePrefix} + matchExprPath; + const StringData maxPathStringData(maxPath); + + switch (matchExpr->matchType()) { + // All events satisfy $eq if bucket min and max both satisfy $eq. + case MatchExpression::EQ: + return makePredicate( + MatchExprPredicate<EqualityMatchExpression>(minPathStringData, matchExprData), + MatchExprPredicate<EqualityMatchExpression>(maxPathStringData, matchExprData)); + case MatchExpression::INTERNAL_EXPR_EQ: + return makePredicate( + MatchExprPredicate<InternalExprEqMatchExpression>(minPathStringData, matchExprData), + MatchExprPredicate<InternalExprEqMatchExpression>(maxPathStringData, + matchExprData)); + + // All events satisfy $gt if bucket min satisfy $gt. + case MatchExpression::GT: + return std::make_unique<GTMatchExpression>(minPathStringData, matchExprData); + case MatchExpression::INTERNAL_EXPR_GT: + return std::make_unique<InternalExprGTMatchExpression>(minPathStringData, + matchExprData); + + // All events satisfy $gte if bucket min satisfy $gte. + case MatchExpression::GTE: + return std::make_unique<GTEMatchExpression>(minPathStringData, matchExprData); + case MatchExpression::INTERNAL_EXPR_GTE: + return std::make_unique<InternalExprGTEMatchExpression>(minPathStringData, + matchExprData); + + // All events satisfy $lt if bucket max satisfy $lt. + case MatchExpression::LT: + return std::make_unique<LTMatchExpression>(maxPathStringData, matchExprData); + case MatchExpression::INTERNAL_EXPR_LT: + return std::make_unique<InternalExprLTMatchExpression>(maxPathStringData, + matchExprData); + + // All events satisfy $lte if bucket max satisfy $lte. + case MatchExpression::LTE: + return std::make_unique<LTEMatchExpression>(maxPathStringData, matchExprData); + case MatchExpression::INTERNAL_EXPR_LTE: + return std::make_unique<InternalExprLTEMatchExpression>(maxPathStringData, + matchExprData); + + default: + MONGO_UNREACHABLE_TASSERT(7026901); + } +} + +std::unique_ptr<MatchExpression> createTightExprComparisonPredicate( + const ExprMatchExpression* matchExpr, + const BucketSpec& bucketSpec, + ExpressionContext::CollationMatchesDefault collationMatchesDefault, + boost::intrusive_ptr<ExpressionContext> pExpCtx) { + using namespace timeseries; + auto rewriteMatchExpr = RewriteExpr::rewrite(matchExpr->getExpression(), pExpCtx->getCollator()) + .releaseMatchExpression(); + if (rewriteMatchExpr && + ComparisonMatchExpressionBase::isInternalExprComparison(rewriteMatchExpr->matchType())) { + auto compareMatchExpr = + checked_cast<const ComparisonMatchExpressionBase*>(rewriteMatchExpr.get()); + return createTightComparisonPredicate( + compareMatchExpr, bucketSpec, collationMatchesDefault); + } + + return handleIneligible(BucketSpec::IneligiblePredicatePolicy::kIgnore, + matchExpr, + "can't handle non-comparison $expr match expression") + .tightPredicate; +} + } // namespace -std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( +BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( const MatchExpression* matchExpr, const BucketSpec& bucketSpec, int bucketMaxSpanSeconds, @@ -516,39 +629,61 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( if (!includeMetaField) return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field"); - auto result = matchExpr->shallowClone(); + auto looseResult = matchExpr->shallowClone(); expression::applyRenamesToExpression( - result.get(), + looseResult.get(), {{bucketSpec.metaField().value(), timeseries::kBucketMetaFieldName.toString()}}); - return result; + auto tightResult = looseResult->shallowClone(); + return {std::move(looseResult), std::move(tightResult)}; } if (matchExpr->matchType() == MatchExpression::AND) { auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr); - auto andMatchExpr = std::make_unique<AndMatchExpression>(); - + auto looseAndExpression = std::make_unique<AndMatchExpression>(); + auto tightAndExpression = std::make_unique<AndMatchExpression>(); for (size_t i = 0; i < nextAnd->numChildren(); i++) { - if (auto child = createPredicatesOnBucketLevelField(nextAnd->getChild(i), - bucketSpec, - bucketMaxSpanSeconds, - collationMatchesDefault, - pExpCtx, - haveComputedMetaField, - includeMetaField, - assumeNoMixedSchemaData, - policy)) { - andMatchExpr->add(std::move(child)); + auto child = createPredicatesOnBucketLevelField(nextAnd->getChild(i), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + includeMetaField, + assumeNoMixedSchemaData, + policy); + if (child.loosePredicate) { + looseAndExpression->add(std::move(child.loosePredicate)); + } + + if (tightAndExpression && child.tightPredicate) { + tightAndExpression->add(std::move(child.tightPredicate)); + } else { + // For tight expression, null means always false, we can short circuit here. + tightAndExpression = nullptr; } } - if (andMatchExpr->numChildren() == 1) { - return andMatchExpr->releaseChild(0); + + // For a loose predicate, if we are unable to generate an expression we can just treat it as + // always true or an empty AND. This is because we are trying to generate a predicate that + // will match the superset of our actual results. + std::unique_ptr<MatchExpression> looseExpression = nullptr; + if (looseAndExpression->numChildren() == 1) { + looseExpression = looseAndExpression->releaseChild(0); + } else if (looseAndExpression->numChildren() > 1) { + looseExpression = std::move(looseAndExpression); } - if (andMatchExpr->numChildren() > 0) { - return andMatchExpr; + + // For a tight predicate, if we are unable to generate an expression we can just treat it as + // always false. This is because we are trying to generate a predicate that will match the + // subset of our actual results. + std::unique_ptr<MatchExpression> tightExpression = nullptr; + if (tightAndExpression && tightAndExpression->numChildren() == 1) { + tightExpression = tightAndExpression->releaseChild(0); + } else { + tightExpression = std::move(tightAndExpression); } - // No error message here: an empty AND is valid. - return nullptr; + return {std::move(looseExpression), std::move(tightExpression)}; } else if (matchExpr->matchType() == MatchExpression::OR) { // Given {$or: [A, B]}, suppose A, B can be pushed down as A', B'. // If an event matches {$or: [A, B]} then either: @@ -556,9 +691,9 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( // - it matches B, which means any bucket containing it matches B' // So {$or: [A', B']} will capture all the buckets we need to satisfy {$or: [A, B]}. auto nextOr = static_cast<const OrMatchExpression*>(matchExpr); - auto result = std::make_unique<OrMatchExpression>(); + auto looseOrExpression = std::make_unique<OrMatchExpression>(); + auto tightOrExpression = std::make_unique<OrMatchExpression>(); - bool alwaysTrue = false; for (size_t i = 0; i < nextOr->numChildren(); i++) { auto child = createPredicatesOnBucketLevelField(nextOr->getChild(i), bucketSpec, @@ -569,41 +704,76 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( includeMetaField, assumeNoMixedSchemaData, policy); - if (child) { - result->add(std::move(child)); + if (looseOrExpression && child.loosePredicate) { + looseOrExpression->add(std::move(child.loosePredicate)); } else { - // Since this argument is always-true, the entire OR is always-true. - alwaysTrue = true; + // For loose expression, null means always true, we can short circuit here. + looseOrExpression = nullptr; + } - // Only short circuit if we're uninterested in reporting errors. - if (policy == IneligiblePredicatePolicy::kIgnore) - break; + // For tight predicate, we give a tighter bound so that all events in the bucket + // either all matches A or all matches B. + if (child.tightPredicate) { + tightOrExpression->add(std::move(child.tightPredicate)); } } - if (alwaysTrue) - return nullptr; - // No special case for an empty OR: returning nullptr would be incorrect because it - // means 'always-true', here. - return result; + // For a loose predicate, if we are unable to generate an expression we can just treat it as + // always true. This is because we are trying to generate a predicate that will match the + // superset of our actual results. + std::unique_ptr<MatchExpression> looseExpression = nullptr; + if (looseOrExpression && looseOrExpression->numChildren() == 1) { + looseExpression = looseOrExpression->releaseChild(0); + } else { + looseExpression = std::move(looseOrExpression); + } + + // For a tight predicate, if we are unable to generate an expression we can just treat it as + // always false or an empty OR. This is because we are trying to generate a predicate that + // will match the subset of our actual results. + std::unique_ptr<MatchExpression> tightExpression = nullptr; + if (tightOrExpression->numChildren() == 1) { + tightExpression = tightOrExpression->releaseChild(0); + } else if (tightOrExpression->numChildren() > 1) { + tightExpression = std::move(tightOrExpression); + } + + return {std::move(looseExpression), std::move(tightExpression)}; } else if (ComparisonMatchExpression::isComparisonMatchExpression(matchExpr) || ComparisonMatchExpressionBase::isInternalExprComparison(matchExpr->matchType())) { - return createComparisonPredicate( - checked_cast<const ComparisonMatchExpressionBase*>(matchExpr), - bucketSpec, - bucketMaxSpanSeconds, - collationMatchesDefault, - pExpCtx, - haveComputedMetaField, - includeMetaField, - assumeNoMixedSchemaData, - policy); + return { + createComparisonPredicate(checked_cast<const ComparisonMatchExpressionBase*>(matchExpr), + bucketSpec, + bucketMaxSpanSeconds, + collationMatchesDefault, + pExpCtx, + haveComputedMetaField, + includeMetaField, + assumeNoMixedSchemaData, + policy), + createTightComparisonPredicate( + checked_cast<const ComparisonMatchExpressionBase*>(matchExpr), + bucketSpec, + collationMatchesDefault)}; + } else if (matchExpr->matchType() == MatchExpression::EXPRESSION) { + return { + // The loose predicate will be pushed before the unpacking which will be inspected by + // the + // query planner. Since the classic planner doesn't handle the $expr expression, we + // don't + // generate the loose predicate. + nullptr, + createTightExprComparisonPredicate(checked_cast<const ExprMatchExpression*>(matchExpr), + bucketSpec, + collationMatchesDefault, + pExpCtx)}; } else if (matchExpr->matchType() == MatchExpression::GEO) { auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression(); if (geoExpr.getPred() == GeoExpression::WITHIN || geoExpr.getPred() == GeoExpression::INTERSECT) { - return std::make_unique<InternalBucketGeoWithinMatchExpression>( - geoExpr.getGeometryPtr(), geoExpr.getField()); + return {std::make_unique<InternalBucketGeoWithinMatchExpression>( + geoExpr.getGeometryPtr(), geoExpr.getField()), + nullptr}; } } else if (matchExpr->matchType() == MatchExpression::EXISTS) { if (assumeNoMixedSchemaData) { @@ -613,7 +783,7 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( std::string{timeseries::kControlMinFieldNamePrefix} + matchExpr->path()))); result->add(std::make_unique<ExistsMatchExpression>(StringData( std::string{timeseries::kControlMaxFieldNamePrefix} + matchExpr->path()))); - return result; + return {std::move(result), nullptr}; } else { // At time of writing, we only pass 'kError' when creating a partial index, and // we know the collection will have no mixed-schema buckets by the time the index is @@ -622,7 +792,7 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( "Can't push down {$exists: true} when the collection may have mixed-schema " "buckets.", policy != IneligiblePredicatePolicy::kError); - return nullptr; + return {}; } } else if (matchExpr->matchType() == MatchExpression::MATCH_IN) { // {a: {$in: [X, Y]}} is equivalent to {$or: [ {a: X}, {a: Y} ]}. @@ -664,11 +834,11 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( } } if (alwaysTrue) - return nullptr; + return {}; // As above, no special case for an empty IN: returning nullptr would be incorrect because // it means 'always-true', here. - return result; + return {std::move(result), nullptr}; } return handleIneligible(policy, matchExpr, "can't handle this predicate"); } @@ -713,9 +883,9 @@ std::pair<bool, BSONObj> BucketSpec::pushdownPredicate( BucketSpec{ tsOptions.getTimeField().toString(), metaField.map([](StringData s) { return s.toString(); }), - // Since we are operating on a collection, not a query-result, there are no - // inclusion/exclusion projections we need to apply to the buckets before - // unpacking. + // Since we are operating on a collection, not a query-result, + // there are no inclusion/exclusion projections we need to apply + // to the buckets before unpacking. {}, // And there are no computed projections. {}, @@ -727,6 +897,7 @@ std::pair<bool, BSONObj> BucketSpec::pushdownPredicate( includeMetaField, assumeNoMixedSchemaData, policy) + .loosePredicate : nullptr; BSONObjBuilder result; @@ -1230,9 +1401,10 @@ Document BucketUnpacker::extractSingleMeasurement(int j) { return measurement.freeze(); } -void BucketUnpacker::reset(BSONObj&& bucket) { +void BucketUnpacker::reset(BSONObj&& bucket, bool bucketMatchedQuery) { _unpackingImpl.reset(); _bucket = std::move(bucket); + _bucketMatchedQuery = bucketMatchedQuery; uassert(5346510, "An empty bucket cannot be unpacked", !_bucket.isEmpty()); auto&& dataRegion = _bucket.getField(timeseries::kBucketDataFieldName).Obj(); diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h index 8f7f8210618..3a9813eb87a 100644 --- a/src/mongo/db/exec/bucket_unpacker.h +++ b/src/mongo/db/exec/bucket_unpacker.h @@ -127,14 +127,29 @@ public: kError, }; + struct BucketPredicate { + // A loose predicate is a predicate which returns true when any measures of a bucket + // matches. + std::unique_ptr<MatchExpression> loosePredicate; + + // A tight predicate is a predicate which returns true when all measures of a bucket + // matches. + std::unique_ptr<MatchExpression> tightPredicate; + }; + /** * Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and - * attempts to map it to a new predicate on the 'control' field. For example, the predicate - * {a: {$gt: 5}} will generate the predicate {control.max.a: {$_internalExprGt: 5}}, which will - * be added before the $_internalUnpackBucket stage. + * attempts to map it to new predicates on the 'control' field. There will be a 'loose' + * predicate that will match if some of the event field matches, also a 'tight' predicate that + * will match if all of the event field matches. For example, the event level predicate {a: + * {$gt: 5}} will generate the loose predicate {control.max.a: {$_internalExprGt: 5}}, and the + * tight predicate {control.min.a: {$_internalExprGt: 5}}. The loose predicate will be added + * before the + * $_internalUnpackBucket stage to filter out buckets with no match. The tight predicate will + * be used to evaluate predicate on bucket level to avoid unnecessary event level evaluation. * - * If the original predicate is on the bucket's timeField we may also create a new predicate - * on the '_id' field to assist in index utilization. For example, the predicate + * If the original predicate is on the bucket's timeField we may also create a new loose + * predicate on the '_id' field to assist in index utilization. For example, the predicate * {time: {$lt: new Date(...)}} will generate the following predicate: * {$and: [ * {_id: {$lt: ObjectId(...)}}, @@ -147,7 +162,7 @@ public: * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it * returns null. When using IneligiblePredicatePolicy::kError it raises a user error. */ - static std::unique_ptr<MatchExpression> createPredicatesOnBucketLevelField( + static BucketPredicate createPredicatesOnBucketLevelField( const MatchExpression* matchExpr, const BucketSpec& bucketSpec, int bucketMaxSpanSeconds, @@ -269,7 +284,7 @@ public: /** * This resets the unpacker to prepare to unpack a new bucket described by the given document. */ - void reset(BSONObj&& bucket); + void reset(BSONObj&& bucket, bool bucketMatchedQuery = false); Behavior behavior() const { return _unpackerBehavior; @@ -283,6 +298,10 @@ public: return _bucket; } + bool bucketMatchedQuery() const { + return _bucketMatchedQuery; + } + bool includeMetaField() const { return _includeMetaField; } @@ -350,6 +369,9 @@ private: bool _hasNext = false; + // A flag used to mark that the entire bucket matches the following $match predicate. + bool _bucketMatchedQuery = false; + // A flag used to mark that the timestamp value should be materialized in measurements. bool _includeTimeField{false}; diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 9ef6d892a51..89959bb0b49 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -46,6 +46,7 @@ #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" +#include "mongo/db/matcher/match_expression_dependencies.h" #include "mongo/db/pipeline/accumulator_multi.h" #include "mongo/db/pipeline/document_source_add_fields.h" #include "mongo/db/pipeline/document_source_geo_near.h" @@ -250,6 +251,35 @@ DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket( _bucketUnpacker(std::move(bucketUnpacker)), _bucketMaxSpanSeconds{bucketMaxSpanSeconds} {} +DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BucketUnpacker bucketUnpacker, + int bucketMaxSpanSeconds, + const boost::optional<BSONObj>& eventFilterBson, + const boost::optional<BSONObj>& wholeBucketFilterBson, + bool assumeNoMixedSchemaData) + : DocumentSourceInternalUnpackBucket( + expCtx, std::move(bucketUnpacker), bucketMaxSpanSeconds, assumeNoMixedSchemaData) { + if (eventFilterBson) { + _eventFilterBson = eventFilterBson->getOwned(); + _eventFilter = + uassertStatusOK(MatchExpressionParser::parse(_eventFilterBson, + pExpCtx, + ExtensionsCallbackNoop(), + Pipeline::kAllowedMatcherFeatures)); + _eventFilterDeps = {}; + match_expression::addDependencies(_eventFilter.get(), &_eventFilterDeps); + } + if (wholeBucketFilterBson) { + _wholeBucketFilterBson = wholeBucketFilterBson->getOwned(); + _wholeBucketFilter = + uassertStatusOK(MatchExpressionParser::parse(_wholeBucketFilterBson, + pExpCtx, + ExtensionsCallbackNoop(), + Pipeline::kAllowedMatcherFeatures)); + } +} + boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createFromBsonInternal( BSONElement specElem, const boost::intrusive_ptr<ExpressionContext>& expCtx) { uassert(5346500, @@ -267,6 +297,8 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF auto bucketMaxSpanSeconds = 0; auto assumeClean = false; std::vector<std::string> computedMetaProjFields; + boost::optional<BSONObj> eventFilterBson; + boost::optional<BSONObj> wholeBucketFilterBson; for (auto&& elem : specElem.embeddedObject()) { auto fieldName = elem.fieldNameStringData(); if (fieldName == kInclude || fieldName == kExclude) { @@ -360,6 +392,18 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF << " field must be a bool, got: " << elem.type(), elem.type() == BSONType::Bool); bucketSpec.setUsesExtendedRange(elem.boolean()); + } else if (fieldName == kEventFilter) { + uassert(7026902, + str::stream() << kEventFilter + << " field must be an object, got: " << elem.type(), + elem.type() == BSONType::Object); + eventFilterBson = elem.Obj(); + } else if (fieldName == kWholeBucketFilter) { + uassert(7026903, + str::stream() << kWholeBucketFilter + << " field must be an object, got: " << elem.type(), + elem.type() == BSONType::Object); + wholeBucketFilterBson = elem.Obj(); } else { uasserted(5346506, str::stream() @@ -378,6 +422,8 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF expCtx, BucketUnpacker{std::move(bucketSpec), unpackerBehavior}, bucketMaxSpanSeconds, + eventFilterBson, + wholeBucketFilterBson, assumeClean); } @@ -476,6 +522,13 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( out.addField(kIncludeMaxTimeAsMetadata, Value{_bucketUnpacker.includeMaxTimeAsMetadata()}); } + if (_wholeBucketFilter) { + out.addField(kWholeBucketFilter, Value{_wholeBucketFilter->serialize()}); + } + if (_eventFilter) { + out.addField(kEventFilter, Value{_eventFilter->serialize()}); + } + if (!explain) { array.push_back(Value(DOC(getSourceName() << out.freeze()))); if (_sampleSize) { @@ -491,25 +544,49 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( } } +boost::optional<Document> DocumentSourceInternalUnpackBucket::getNextMatchingMeasure() { + while (_bucketUnpacker.hasNext()) { + auto measure = _bucketUnpacker.getNext(); + if (_eventFilter) { + // MatchExpression only takes BSON documents, so we have to make one. As an + // optimization, only serialize the fields we need to do the match. + BSONObj measureBson = _eventFilterDeps.needWholeDocument + ? measure.toBson() + : document_path_support::documentToBsonWithPaths(measure, _eventFilterDeps.fields); + if (_bucketUnpacker.bucketMatchedQuery() || _eventFilter->matchesBSON(measureBson)) { + return measure; + } + } else { + return measure; + } + } + return {}; +} + DocumentSource::GetNextResult DocumentSourceInternalUnpackBucket::doGetNext() { tassert(5521502, "calling doGetNext() when '_sampleSize' is set is disallowed", !_sampleSize); // Otherwise, fallback to unpacking every measurement in all buckets until the child stage is // exhausted. - if (_bucketUnpacker.hasNext()) { - return _bucketUnpacker.getNext(); + if (auto measure = getNextMatchingMeasure()) { + return GetNextResult(std::move(*measure)); } auto nextResult = pSource->getNext(); - if (nextResult.isAdvanced()) { + while (nextResult.isAdvanced()) { auto bucket = nextResult.getDocument().toBson(); - _bucketUnpacker.reset(std::move(bucket)); + auto bucketMatchedQuery = _wholeBucketFilter && _wholeBucketFilter->matchesBSON(bucket); + _bucketUnpacker.reset(std::move(bucket), bucketMatchedQuery); + uassert(5346509, str::stream() << "A bucket with _id " << _bucketUnpacker.bucket()[timeseries::kBucketIdFieldName].toString() << " contains an empty data region", _bucketUnpacker.hasNext()); - return _bucketUnpacker.getNext(); + if (auto measure = getNextMatchingMeasure()) { + return GetNextResult(std::move(*measure)); + } + nextResult = pSource->getNext(); } return nextResult; @@ -587,7 +664,8 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje // Check for a viable inclusion $project after the $_internalUnpackBucket. auto [existingProj, isInclusion] = getIncludeExcludeProjectAndType(std::next(itr)->get()); - if (isInclusion && !existingProj.isEmpty() && canInternalizeProjectObj(existingProj)) { + if (!_eventFilter && isInclusion && !existingProj.isEmpty() && + canInternalizeProjectObj(existingProj)) { container->erase(std::next(itr)); return {existingProj, isInclusion}; } @@ -595,8 +673,7 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje // Attempt to get an inclusion $project representing the root-level dependencies of the pipeline // after the $_internalUnpackBucket. If this $project is not empty, then the dependency set was // finite. - Pipeline::SourceContainer restOfPipeline(std::next(itr), container->end()); - auto deps = Pipeline::getDependenciesForContainer(pExpCtx, restOfPipeline, boost::none); + auto deps = getRestPipelineDependencies(itr, container); if (auto dependencyProj = deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes); !dependencyProj.isEmpty()) { @@ -604,7 +681,7 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje } // Check for a viable exclusion $project after the $_internalUnpackBucket. - if (!existingProj.isEmpty() && canInternalizeProjectObj(existingProj)) { + if (!_eventFilter && !existingProj.isEmpty() && canInternalizeProjectObj(existingProj)) { container->erase(std::next(itr)); return {existingProj, isInclusion}; } @@ -612,8 +689,7 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje return {BSONObj{}, false}; } -std::unique_ptr<MatchExpression> -DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( +BucketSpec::BucketPredicate DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( const MatchExpression* matchExpr) const { return BucketSpec::createPredicatesOnBucketLevelField( matchExpr, @@ -1005,6 +1081,16 @@ bool findSequentialDocumentCache(Pipeline::SourceContainer::iterator start, return start != end; } +DepsTracker DocumentSourceInternalUnpackBucket::getRestPipelineDependencies( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const { + auto deps = Pipeline::getDependenciesForContainer( + pExpCtx, Pipeline::SourceContainer{std::next(itr), container->end()}, boost::none); + if (_eventFilter) { + match_expression::addDependencies(_eventFilter.get(), &deps); + } + return deps; +} + Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -1018,7 +1104,8 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi bool haveComputedMetaField = this->haveComputedMetaField(); // Before any other rewrites for the current stage, consider reordering with $sort. - if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get())) { + if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get()); + sortPtr && !_eventFilter) { if (auto metaField = _bucketUnpacker.bucketSpec().metaField(); metaField && !haveComputedMetaField) { if (checkMetadataSortReorder(sortPtr->getSortKeyPattern(), metaField.value())) { @@ -1049,7 +1136,8 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi } // Attempt to push geoNear on the metaField past $_internalUnpackBucket. - if (auto nextNear = dynamic_cast<DocumentSourceGeoNear*>(std::next(itr)->get())) { + if (auto nextNear = dynamic_cast<DocumentSourceGeoNear*>(std::next(itr)->get()); + nextNear && !_eventFilter) { // Currently we only support geo indexes on the meta field, and we enforce this by // requiring the key field to be set so we can check before we try to look up indexes. auto keyField = nextNear->getKeyField(); @@ -1130,8 +1218,7 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi { // Check if the rest of the pipeline needs any fields. For example we might only be // interested in $count. - auto deps = Pipeline::getDependenciesForContainer( - pExpCtx, Pipeline::SourceContainer{std::next(itr), container->end()}, boost::none); + auto deps = getRestPipelineDependencies(itr, container); if (deps.hasNoRequirements()) { _bucketUnpacker.setBucketSpecAndBehavior({_bucketUnpacker.bucketSpec().timeField(), _bucketUnpacker.bucketSpec().metaField(), @@ -1151,31 +1238,65 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi } // Attempt to optimize last-point type queries. - if (!_triedLastpointRewrite && optimizeLastpoint(itr, container)) { + if (!_triedLastpointRewrite && !_eventFilter && optimizeLastpoint(itr, container)) { _triedLastpointRewrite = true; // If we are able to rewrite the aggregation, give the resulting pipeline a chance to // perform further optimizations. return container->begin(); }; - // Attempt to map predicates on bucketed fields to predicates on the control field. - if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>(std::next(itr)->get()); - nextMatch && !_triedBucketLevelFieldsPredicatesPushdown) { - _triedBucketLevelFieldsPredicatesPushdown = true; + // Attempt to map predicates on bucketed fields to the predicates on the control field. + if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>(std::next(itr)->get())) { - if (auto match = createPredicatesOnBucketLevelField(nextMatch->getMatchExpression())) { + // Merge multiple following $match stages. + auto itrToMatch = std::next(itr); + while (std::next(itrToMatch) != container->end() && + dynamic_cast<DocumentSourceMatch*>(std::next(itrToMatch)->get())) { + nextMatch->doOptimizeAt(itrToMatch, container); + } + + auto predicates = createPredicatesOnBucketLevelField(nextMatch->getMatchExpression()); + + // Try to create a tight bucket predicate to perform bucket level matching. + if (predicates.tightPredicate) { + _wholeBucketFilterBson = predicates.tightPredicate->serialize(); + _wholeBucketFilter = + uassertStatusOK(MatchExpressionParser::parse(_wholeBucketFilterBson, + pExpCtx, + ExtensionsCallbackNoop(), + Pipeline::kAllowedMatcherFeatures)); + _wholeBucketFilter = MatchExpression::optimize(std::move(_wholeBucketFilter)); + } + + // Push the original event predicate into the unpacking stage. + _eventFilterBson = nextMatch->getQuery().getOwned(); + _eventFilter = + uassertStatusOK(MatchExpressionParser::parse(_eventFilterBson, + pExpCtx, + ExtensionsCallbackNoop(), + Pipeline::kAllowedMatcherFeatures)); + _eventFilter = MatchExpression::optimize(std::move(_eventFilter)); + _eventFilterDeps = {}; + match_expression::addDependencies(_eventFilter.get(), &_eventFilterDeps); + container->erase(std::next(itr)); + + // Create a loose bucket predicate and push it before the unpacking stage. + if (predicates.loosePredicate) { BSONObjBuilder bob; - match->serialize(&bob); + predicates.loosePredicate->serialize(&bob); container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx)); // Give other stages a chance to optimize with the new $match. return std::prev(itr) == container->begin() ? std::prev(itr) : std::prev(std::prev(itr)); } + + // We have removed a $match after this stage, so we try to optimize this stage again. + return itr; } // Attempt to push down a $project on the metaField past $_internalUnpackBucket. - if (!haveComputedMetaField) { + if (!_eventFilter && !haveComputedMetaField) { if (auto [metaProject, deleteRemainder] = extractProjectForPushDown(std::next(itr)->get()); !metaProject.isEmpty()) { container->insert(itr, @@ -1194,7 +1315,7 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi // Attempt to extract computed meta projections from subsequent $project, $addFields, or $set // and push them before the $_internalunpackBucket. - if (pushDownComputedMetaProjection(itr, container)) { + if (!_eventFilter && pushDownComputedMetaProjection(itr, container)) { // We've pushed down and removed a stage after this one. Try to optimize the new stage. return std::prev(itr) == container->begin() ? std::prev(itr) : std::prev(std::prev(itr)); } diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 4dd93046533..a5dab5462ad 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -51,6 +51,8 @@ public: static constexpr StringData kBucketMaxSpanSeconds = "bucketMaxSpanSeconds"_sd; static constexpr StringData kIncludeMinTimeAsMetadata = "includeMinTimeAsMetadata"_sd; static constexpr StringData kIncludeMaxTimeAsMetadata = "includeMaxTimeAsMetadata"_sd; + static constexpr StringData kWholeBucketFilter = "wholeBucketFilter"_sd; + static constexpr StringData kEventFilter = "eventFilter"_sd; static boost::intrusive_ptr<DocumentSource> createFromBsonInternal( BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx); @@ -62,6 +64,13 @@ public: int bucketMaxSpanSeconds, bool assumeNoMixedSchemaData = false); + DocumentSourceInternalUnpackBucket(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BucketUnpacker bucketUnpacker, + int bucketMaxSpanSeconds, + const boost::optional<BSONObj>& eventFilterBson, + const boost::optional<BSONObj>& wholeBucketFilterBson, + bool assumeNoMixedSchemaData = false); + const char* getSourceName() const override { return kStageNameInternal.rawData(); } @@ -158,7 +167,7 @@ public: /** * Convenience wrapper around BucketSpec::createPredicatesOnBucketLevelField(). */ - std::unique_ptr<MatchExpression> createPredicatesOnBucketLevelField( + BucketSpec::BucketPredicate createPredicatesOnBucketLevelField( const MatchExpression* matchExpr) const; /** @@ -243,8 +252,14 @@ public: GetModPathsReturn getModifiedPaths() const final override; + DepsTracker getRestPipelineDependencies(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) const; + private: GetNextResult doGetNext() final; + + boost::optional<Document> getNextMatchingMeasure(); + bool haveComputedMetaField() const; // If buckets contained a mixed type schema along some path, we have to push down special @@ -261,9 +276,13 @@ private: int _bucketMaxCount = 0; boost::optional<long long> _sampleSize; - // Used to avoid infinite loops after we step backwards to optimize a $match on bucket level - // fields, otherwise we may do an infinite number of $match pushdowns. - bool _triedBucketLevelFieldsPredicatesPushdown = false; + // Filters pushed from the later $match stages + std::unique_ptr<MatchExpression> _eventFilter; + BSONObj _eventFilterBson; + DepsTracker _eventFilterDeps; + std::unique_ptr<MatchExpression> _wholeBucketFilter; + BSONObj _wholeBucketFilterBson; + bool _optimizedEndOfPipeline = false; bool _triedInternalizeProject = false; bool _triedLastpointRewrite = false; diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp index ffe291dcbd0..63d3b4a0b23 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp @@ -55,10 +55,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -76,10 +77,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -97,10 +99,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -118,10 +121,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -139,11 +143,12 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}}," "{'control.max.a': {$_internalExprGte: 1}}]}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -161,7 +166,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate); + ASSERT(predicate.loosePredicate); auto expected = fromjson( "{$or: [" " {$or: [" @@ -185,7 +190,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, " ]}}" " ]}" "]}"); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), expected); + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), expected); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -208,10 +214,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -234,10 +241,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -260,10 +268,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -286,10 +295,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -312,11 +322,12 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}}," "{'control.max.a': {$_internalExprGte: 1}}]}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -334,13 +345,14 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," "{$type: [ \"$control.max.b\" ]} ]}} ]}," "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -358,7 +370,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -376,7 +389,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [" " {'control.max.b': {$_internalExprGt: 1}}," " {$expr: {$ne: [" @@ -384,6 +397,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, " {$type: [ \"$control.max.b\" ]}" " ]}}" "]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -402,7 +416,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}}," "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," "{$type: [ \"$control.max.b\" ]} ]}} ]}," @@ -412,6 +426,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -429,8 +444,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT(predicate.loosePredicate); + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [" " {$or: [" " {'control.max.b': {$_internalExprGt: 1}}," @@ -447,6 +462,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, " ]}}" " ]}" "]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -464,7 +480,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -485,7 +502,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, // When a predicate can't be pushed down, it's the same as pushing down a trivially-true // predicate. So when any child of an $or can't be pushed down, we could generate something like // {$or: [ ... {$alwaysTrue: {}}, ... ]}, but then we might as well not push down the whole $or. - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -504,7 +522,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$or: [" " {$or: [" " {'control.max.b': {$_internalExprGte: 2}}," @@ -530,6 +548,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, " ]}" " ]}" "]}")); + ASSERT_FALSE(predicate.tightPredicate); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -541,19 +560,21 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_EQ(pipeline->getSources().size(), 2U); pipeline->optimizePipeline(); - ASSERT_EQ(pipeline->getSources().size(), 3U); + ASSERT_EQ(pipeline->getSources().size(), 2U); // To get the optimized $match from the pipeline, we have to serialize with explain. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(stages.size(), 3U); + ASSERT_EQ(stages.size(), 2U); ASSERT_BSONOBJ_EQ(stages[0].getDocument().toBson(), fromjson("{$match: {$or: [ {'control.max.b': {$_internalExprGt: 1}}," "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," "{$type: [ \"$control.max.b\" ]} ]}} ]}}")); - ASSERT_BSONOBJ_EQ(stages[1].getDocument().toBson(), unpackBucketObj); - ASSERT_BSONOBJ_EQ(stages[2].getDocument().toBson(), - fromjson("{$match: {$and: [{b: {$gt: 1}}, {a: {$not: {$eq: 5}}}]}}")); + ASSERT_BSONOBJ_EQ( + stages[1].getDocument().toBson(), + fromjson( + "{$_internalUnpackBucket: {exclude: [], timeField: 'time', bucketMaxSpanSeconds: 3600, " + "eventFilter: { $and: [ { b: { $gt: 1 } }, { a: { $not: { $eq: 5 } } } ] }}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -566,10 +587,10 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_EQ(pipeline->getSources().size(), 2U); pipeline->optimizePipeline(); - ASSERT_EQ(pipeline->getSources().size(), 3U); + ASSERT_EQ(pipeline->getSources().size(), 2U); auto stages = pipeline->serializeToBson(); - ASSERT_EQ(stages.size(), 3U); + ASSERT_EQ(stages.size(), 2U); ASSERT_BSONOBJ_EQ( stages[0], @@ -582,8 +603,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}")); - ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj); - ASSERT_BSONOBJ_EQ(stages[2], matchObj); + ASSERT_BSONOBJ_EQ(stages[1], + fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "bucketMaxSpanSeconds: 3600," + "eventFilter: { $and: [ { b: { $gte: 2 } }, { c: { $gt: 1 } }, { a: " + "{ $lt: 5 } } ] } } }")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -601,7 +625,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -619,7 +644,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -637,7 +663,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT(predicate == nullptr); + ASSERT(predicate.loosePredicate == nullptr); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -656,7 +683,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); // Meta predicates are mapped to the meta field, not the control min/max fields. - ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{meta: {$gt: 5}}")); + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{meta: {$gt: 5}}")); + ASSERT_BSONOBJ_EQ(predicate.tightPredicate->serialize(true), fromjson("{meta: {$gt: 5}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -675,7 +703,10 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); // Meta predicates are mapped to the meta field, not the control min/max fields. - ASSERT_BSONOBJ_EQ(predicate->serialize(true), fromjson("{'meta.foo': {$gt: 5}}")); + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), + fromjson("{'meta.foo': {$gt: 5}}")); + ASSERT_BSONOBJ_EQ(predicate.tightPredicate->serialize(true), + fromjson("{'meta.foo': {$gt: 5}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -693,7 +724,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$and: [" " {$or: [" " {'control.max.a': {$_internalExprGt: 1}}," @@ -704,6 +735,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, " ]}," " {meta: {$eq: 5}}" "]}")); + ASSERT(predicate.tightPredicate == nullptr); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePredicatesOnId) { @@ -740,7 +772,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePre dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.get()); + auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.loosePredicate.get()); auto children = andExpr->getChildVector(); ASSERT_EQ(children->size(), 3); @@ -797,7 +829,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePre auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.get()); + auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.loosePredicate.get()); auto children = andExpr->getChildVector(); ASSERT_EQ(children->size(), 3); @@ -846,7 +878,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePre auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.get()); + auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.loosePredicate.get()); auto children = andExpr->getChildVector(); ASSERT_EQ(children->size(), 6); @@ -908,7 +940,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePre dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.get()); + auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.loosePredicate.get()); auto children = andExpr->getChildVector(); ASSERT_EQ(children->size(), 3); @@ -957,7 +989,7 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePre auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.get()); + auto andExpr = dynamic_cast<AndMatchExpression*>(predicate.loosePredicate.get()); auto children = andExpr->getChildVector(); ASSERT_EQ(children->size(), 3); @@ -1000,7 +1032,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_FALSE(predicate); + ASSERT_FALSE(predicate.loosePredicate); + ASSERT_FALSE(predicate.tightPredicate); } } { @@ -1021,7 +1054,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_FALSE(predicate); + ASSERT_FALSE(predicate.loosePredicate); + ASSERT_FALSE(predicate.tightPredicate); } } { @@ -1042,7 +1076,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_FALSE(predicate); + ASSERT_FALSE(predicate.loosePredicate); + ASSERT_FALSE(predicate.tightPredicate); } } { @@ -1065,7 +1100,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_FALSE(predicate); + ASSERT_FALSE(predicate.loosePredicate); + ASSERT_FALSE(predicate.tightPredicate); } } { @@ -1086,7 +1122,8 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_FALSE(predicate); + ASSERT_FALSE(predicate.loosePredicate); + ASSERT_FALSE(predicate.tightPredicate); } } } @@ -1107,10 +1144,11 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get()) ->createPredicatesOnBucketLevelField(original->getMatchExpression()); - ASSERT_BSONOBJ_EQ(predicate->serialize(true), + ASSERT_BSONOBJ_EQ(predicate.loosePredicate->serialize(true), fromjson("{$_internalBucketGeoWithin: { withinRegion: { $geometry: { type : " "\"Polygon\" ,coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 " "] ] ]}},field: \"loc\"}}")); + ASSERT_FALSE(predicate.tightPredicate); } } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp index 2001cb2cab3..aeb7ac8ec8c 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp @@ -54,7 +54,7 @@ TEST_F(OptimizePipeline, MixedMatchPushedDown) { // To get the optimized $match from the pipeline, we have to serialize with explain. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(3u, stages.size()); + ASSERT_EQ(2u, stages.size()); // We should push down the $match on the metaField and the predicates on the control field. // The created $match stages should be added before $_internalUnpackBucket and merged. @@ -63,8 +63,10 @@ TEST_F(OptimizePipeline, MixedMatchPushedDown) { "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ] } ] } } ] }]}}"), stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), + stages[1].getDocument().toBson()); } TEST_F(OptimizePipeline, MetaMatchPushedDown) { @@ -103,7 +105,7 @@ TEST_F(OptimizePipeline, MixedMatchOr) { pipeline->optimizePipeline(); auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(3u, stages.size()); + ASSERT_EQ(2u, stages.size()); auto expected = fromjson( "{$match: {$and: [" // Result of pushing down {x: {$lte: 1}}. @@ -123,8 +125,11 @@ TEST_F(OptimizePipeline, MixedMatchOr) { " ]}" "]}}"); ASSERT_BSONOBJ_EQ(expected, stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(match, stages[2].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"foo\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { $and: [ { x: { $lte: 1 } }, { $or: [ { " + "\"myMeta.a\": { $gt: 1 } }, { y: { $lt: 1 } } ] } ] } } }"), + stages[1].getDocument().toBson()); } TEST_F(OptimizePipeline, MixedMatchOnlyMetaMatchPushedDown) { @@ -142,11 +147,13 @@ TEST_F(OptimizePipeline, MixedMatchOnlyMetaMatchPushedDown) { // We should push down the $match on the metaField but not the predicate on '$a', which is // ineligible because of the $type. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(3u, serialized.size()); + ASSERT_EQ(2u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}}"), serialized[0]); - ASSERT_BSONOBJ_EQ(unpack, serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$type: [ 2 ]}}}"), serialized[2]); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $type: [ 2 ] } } } }"), + serialized[1]); } TEST_F(OptimizePipeline, MultipleMatchesPushedDown) { @@ -164,15 +171,17 @@ TEST_F(OptimizePipeline, MultipleMatchesPushedDown) { // We should push down both the $match on the metaField and the predicates on the control field. // The created $match stages should be added before $_internalUnpackBucket and merged. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(3u, stages.size()); + ASSERT_EQ(2u, stages.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [ {meta: {$gte: 0}}," "{meta: {$lte: 5}}," "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), + stages[1].getDocument().toBson()); } TEST_F(OptimizePipeline, MultipleMatchesPushedDownWithSort) { @@ -191,16 +200,18 @@ TEST_F(OptimizePipeline, MultipleMatchesPushedDownWithSort) { // We should push down both the $match on the metaField and the predicates on the control field. // The created $match stages should be added before $_internalUnpackBucket and merged. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(4u, stages.size()); + ASSERT_EQ(3u, stages.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [ { meta: { $gte: 0 } }," "{meta: { $lte: 5 } }," "{$or: [ { 'control.min.a': { $_internalExprLte: 4 } }," "{$expr: { $ne: [ {$type: [ \"$control.min.a\" ] }," "{$type: [ \"$control.max.a\" ] } ] } } ] }]}}"), stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$sort: {sortKey: {a: 1}}}"), stages[3].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), + stages[1].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{$sort: {sortKey: {a: 1}}}"), stages[2].getDocument().toBson()); } TEST_F(OptimizePipeline, MetaMatchThenCountPushedDown) { @@ -261,7 +272,7 @@ TEST_F(OptimizePipeline, SortThenMixedMatchPushedDown) { // We should push down both the $sort and parts of the $match. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(4u, serialized.size()); + ASSERT_EQ(3u, serialized.size()); auto expected = fromjson( "{$match: {$and: [" " {meta: {$eq: 'abc'}}," @@ -274,8 +285,10 @@ TEST_F(OptimizePipeline, SortThenMixedMatchPushedDown) { "]}}"); ASSERT_BSONOBJ_EQ(expected, serialized[0]); ASSERT_BSONOBJ_EQ(fromjson("{$sort: {meta: -1}}"), serialized[1]); - ASSERT_BSONOBJ_EQ(unpack, serialized[2]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$gte: 5}}}"), serialized[3]); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $gte: 5 } } } }"), + serialized[2]); } TEST_F(OptimizePipeline, MetaMatchThenSortPushedDown) { @@ -331,18 +344,19 @@ TEST_F(OptimizePipeline, MixedMatchThenProjectPushedDown) { // We can push down part of the $match and use dependency analysis on the end of the pipeline. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(4u, stages.size()); + ASSERT_EQ(3u, stages.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: 'abc'}}," "{$or: [ {'control.min.a': { $_internalExprLte: 4 } }," "{$expr: { $ne: [ {$type: [ \"$control.min.a\" ] }," "{$type: [ \"$control.max.a\" ] } ] } } ] } ]}}"), stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'x'], timeField: " - "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), - stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); + ASSERT_BSONOBJ_EQ( + fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"a\", \"x\" ], timeField: " + "\"time\", metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), + stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$project: {_id: true, x: true}}"), - stages[3].getDocument().toBson()); + stages[2].getDocument().toBson()); } @@ -379,21 +393,21 @@ TEST_F(OptimizePipeline, ProjectThenMixedMatchPushedDown) { // We should push down part of the $match and do dependency analysis on the rest. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(4u, stages.size()); + ASSERT_EQ(3u, stages.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: \"abc\"}}," "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ] }," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ( - fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'x', 'myMeta'], timeField: " - "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), + fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"a\", \"x\", \"myMeta\" ], " + "timeField: \"time\", metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); const UnorderedFieldsBSONObjComparator kComparator; ASSERT_EQ( kComparator.compare(fromjson("{$project: {_id: true, a: true, myMeta: true, x: true}}"), - stages[3].getDocument().toBson()), + stages[2].getDocument().toBson()), 0); } @@ -410,7 +424,7 @@ TEST_F(OptimizePipeline, ProjectWithRenameThenMixedMatchPushedDown) { // We should push down part of the $match and do dependency analysis on the end of the pipeline. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); - ASSERT_EQ(4u, stages.size()); + ASSERT_EQ(3u, stages.size()); ASSERT_BSONOBJ_EQ( fromjson("{$match: {$and: [{$or: [ {'control.max.y': {$_internalExprGte: \"abc\"}}," "{$expr: {$ne: [ {$type: [ \"$control.min.y\" ]}," @@ -419,13 +433,13 @@ TEST_F(OptimizePipeline, ProjectWithRenameThenMixedMatchPushedDown) { "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ] }," "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), stages[0].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'y'], timeField: " - "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), - stages[1].getDocument().toBson()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{y: {$gte: 'abc'}}, {a: {$lte: 4}}]}}"), - stages[2].getDocument().toBson()); + ASSERT_BSONOBJ_EQ( + fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"a\", \"y\" ], timeField: " + "\"time\", metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { $and: [ { y: { $gte: \"abc\" } }, { a: { $lte: 4 } } ] } } }"), + stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$project: {_id: true, a: true, myMeta: '$y'}}"), - stages[3].getDocument().toBson()); + stages[2].getDocument().toBson()); } TEST_F(OptimizePipeline, ComputedProjectThenMetaMatchPushedDown) { @@ -466,15 +480,15 @@ TEST_F(OptimizePipeline, ComputedProjectThenMetaMatchNotPushedDown) { // We should both push down the project and internalize the remaining project, but we can't // push down the meta match due to the (now invalid) renaming. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(3u, serialized.size()); + ASSERT_EQ(2u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$addFields: {myMeta: {$sum: ['$meta.a', '$meta.b']}}}"), serialized[0]); ASSERT_BSONOBJ_EQ( - fromjson( - "{$_internalUnpackBucket: { include: ['_id', 'myMeta'], timeField: 'time', metaField: " - "'myMeta', bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['myMeta']}}"), + fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"myMeta\" ], timeField: " + "\"time\", metaField: \"myMeta\", " + "bucketMaxSpanSeconds: 3600, computedMetaProjFields: [ \"myMeta\" ], " + "eventFilter: { myMeta: { $gte: \"abc\" } } } }"), serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {myMeta: {$gte: 'abc'}}}"), serialized[2]); } // namespace TEST_F(OptimizePipeline, ComputedProjectThenMatchNotPushedDown) { @@ -491,13 +505,13 @@ TEST_F(OptimizePipeline, ComputedProjectThenMatchNotPushedDown) { // We should push down the computed project but not the match, because it depends on the newly // computed values. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(3u, serialized.size()); + ASSERT_EQ(2u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$addFields: {y: {$sum: ['$meta.a', '$meta.b']}}}"), serialized[0]); - ASSERT_BSONOBJ_EQ( - fromjson("{$_internalUnpackBucket: { include: ['_id', 'y'], timeField: 'time', metaField: " - "'myMeta', bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['y']}}"), - serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {y: {$gt: 'abc'}}}"), serialized[2]); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"y\" ], " + "timeField: \"time\", metaField: \"myMeta\", " + "bucketMaxSpanSeconds: 3600, computedMetaProjFields: [ \"y\" ], " + "eventFilter: { y: { $gt: \"abc\" } } } }"), + serialized[1]); } TEST_F(OptimizePipeline, MetaSortThenProjectPushedDown) { @@ -857,7 +871,7 @@ TEST_F(OptimizePipeline, MatchWithGeoWithinOnMeasurementsPushedDownUsingInternal pipeline->optimizePipeline(); auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(serialized.size(), 3U); + ASSERT_EQ(serialized.size(), 2U); // $match with $geoWithin on a non-metadata field is pushed down and $_internalBucketGeoWithin // is used. @@ -866,12 +880,12 @@ TEST_F(OptimizePipeline, MatchWithGeoWithinOnMeasurementsPushedDownUsingInternal "\"Polygon\" ,coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 " "] ] ]}},field: \"loc\"}}}"), serialized[0]); - ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " - "'time', bucketMaxSpanSeconds: 3600}}"), - serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {loc: {$geoWithin: {$geometry: {type: \"Polygon\", " - "coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ]}}}}}"), - serialized[2]); + ASSERT_BSONOBJ_EQ( + fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "bucketMaxSpanSeconds: 3600, " + "eventFilter: { loc: { $geoWithin: { $geometry: { type: \"Polygon\", coordinates: " + "[ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ] } } } } } }"), + serialized[1]); } TEST_F(OptimizePipeline, MatchWithGeoWithinOnMetaFieldIsPushedDown) { @@ -913,7 +927,7 @@ TEST_F(OptimizePipeline, pipeline->optimizePipeline(); auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(serialized.size(), 3U); + ASSERT_EQ(serialized.size(), 2U); // $match with $geoIntersects on a non-metadata field is pushed down and // $_internalBucketGeoWithin is used. @@ -922,12 +936,12 @@ TEST_F(OptimizePipeline, "\"Polygon\" ,coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 " "] ] ]}},field: \"loc\"}}}"), serialized[0]); - ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " - "'time', bucketMaxSpanSeconds: 3600}}"), - serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {loc: {$geoIntersects: {$geometry: {type: \"Polygon\", " - "coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ]}}}}}"), - serialized[2]); + ASSERT_BSONOBJ_EQ( + fromjson("{ $_internalUnpackBucket: { exclude: [], timeField: \"time\", " + "bucketMaxSpanSeconds: 3600, " + "eventFilter: { loc: { $geoIntersects: { $geometry: { type: \"Polygon\", " + "coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ] } } } } } }"), + serialized[1]); } TEST_F(OptimizePipeline, MatchWithGeoIntersectsOnMetaFieldIsPushedDown) { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp index ba4f31adf17..4ce5d558ac4 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp @@ -56,7 +56,7 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMaps // predicate on 'control.min.a'. These two created $match stages should be added before // $_internalUnpackBucket and merged. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(3u, serialized.size()); + ASSERT_EQ(2u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [" " {meta: {$gte: 0}}," " {meta: {$lte: 5}}," @@ -68,8 +68,13 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMaps " ]}" "]}}"), serialized[0]); - ASSERT_BSONOBJ_EQ(unpack, serialized[1]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { " + "exclude: [], " + "timeField: \"foo\", " + "metaField: \"myMeta\", " + "bucketMaxSpanSeconds: 3600, " + "eventFilter: { a: { $lte: 4 } } } }"), + serialized[1]); } TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeMovesMetaMatchBeforeUnpack) { @@ -94,10 +99,6 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, auto unpack = fromjson( "{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta', " "bucketMaxSpanSeconds: 3600}}"); - auto unpackExcluded = fromjson( - "{$_internalUnpackBucket: { include: ['_id', 'data'], timeField: 'foo', metaField: " - "'myMeta', " - "bucketMaxSpanSeconds: 3600}}"); auto pipeline = Pipeline::parse(makeVector(unpack, fromjson("{$project: {data: 1}}"), fromjson("{$match: {myMeta: {$gte: 0}}}")), @@ -108,9 +109,11 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, // The $match on meta is not moved before $_internalUnpackBucket since the field is excluded. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(2u, serialized.size()); - ASSERT_BSONOBJ_EQ(unpackExcluded, serialized[0]); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {myMeta: {$gte: 0}}}"), serialized[1]); + ASSERT_EQ(1u, serialized.size()); + ASSERT_BSONOBJ_EQ(fromjson("{ $_internalUnpackBucket: { include: [ \"_id\", \"data\" ], " + "timeField: \"foo\", metaField: \"myMeta\", bucketMaxSpanSeconds: " + "3600, eventFilter: { myMeta: { $gte: 0 } } } }"), + serialized[0]); } TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, @@ -134,7 +137,7 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, // We should fail to split the match because of the $or clause. We should still be able to // map the predicate on 'x' to a predicate on the control field. auto serialized = pipeline->serializeToBson(); - ASSERT_EQ(3u, serialized.size()); + ASSERT_EQ(2u, serialized.size()); auto expected = fromjson( "{$match: {$and: [" // Result of pushing down {x: {$lte: 1}}. @@ -154,8 +157,13 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, " ]}" "]}}"); ASSERT_BSONOBJ_EQ(expected, serialized[0]); - ASSERT_BSONOBJ_EQ(unpack, serialized[1]); - ASSERT_BSONOBJ_EQ(match, serialized[2]); + ASSERT_BSONOBJ_EQ( + fromjson( + "{ $_internalUnpackBucket: { " + "exclude: [], timeField: \"foo\", metaField: \"myMeta\", bucketMaxSpanSeconds: 3600, " + "eventFilter: { $and: [ { x: { $lte: 1 } }, { $or: [ { \"myMeta.a\": { $gt: 1 } }, { " + "y: { $lt: 1 } } ] } ] } } }"), + serialized[1]); } } // namespace } // namespace mongo |