diff options
9 files changed, 348 insertions, 60 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index d9a7d3c47ef..e653da6ee97 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -100,6 +100,8 @@ last-continuous: test_file: jstests/aggregation/range.js - ticket: SERVER-59923 test_file: jstests/sharding/test_resharding_test_fixture_shutdown_retry_needed.js + - ticket: SERVER-59505 + test_file: jstests/core/timeseries/timeseries_find.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: @@ -288,6 +290,8 @@ last-lts: test_file: jstests/replsets/cluster_chaining_override.js - ticket: SERVER-57729 test_file: jstests/core/timeseries/timeseries_bucket_drop.js + - ticket: SERVER-59505 + test_file: jstests/core/timeseries/timeseries_find.js - ticket: SERVER-56800 test_file: jstests/sharding/cwwc_conflict_add_shard.js - ticket: SERVER-57262 diff --git a/jstests/core/timeseries/timeseries_find.js b/jstests/core/timeseries/timeseries_find.js new file mode 100644 index 00000000000..7e907100354 --- /dev/null +++ b/jstests/core/timeseries/timeseries_find.js @@ -0,0 +1,148 @@ +/** + * Test that variable-type fields are found correctly in timeseries collections. + * + * @tags: [ + * does_not_support_transactions, + * does_not_support_stepdowns, + * requires_pipeline_optimization, + * requires_timeseries, + * ] + */ + +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); + +if (!TimeseriesTest.timeseriesCollectionsEnabled(db.getMongo())) { + jsTestLog("Skipping test because the time-series collection feature flag is disabled"); + return; +} + +const timeFieldName = "time"; + +/* + * Creates a collection, populates it with `docs`, runs the `query` and ensures that the result set + * is equal to `results`. Also checks that the bounds at `path` formed by the min & max of the + * control values for the path `path` are equal to `bounds`. In order to do this we assert that only + * one bucket was created. + */ +function runTest(docs, query, results, path, bounds) { + // Setup our DB & our collections. + const tsColl = db.getCollection(jsTestName()); + tsColl.drop(); + const bucketColl = db.getCollection('system.buckets.' + tsColl.getName()); + + assert.commandWorked( + db.createCollection(tsColl.getName(), {timeseries: {timeField: timeFieldName}})); + + // Construct our pipelines for later use. + const pipeline = [{$match: query}, {$sort: {_id: 1}}, {$project: {_id: 0, time: 0}}]; + const controlPipeline = + [{$project: {_id: 0, value: [`$control.min.${path}`, `$control.max.${path}`]}}]; + + // Populate the collection with documents. + docs.forEach(d => tsColl.insert(Object.assign({[timeFieldName]: new Date("2021-01-01")}, d))); + + // Check that the result is in the result set. + assert.docEq(tsColl.aggregate(pipeline).toArray(), results); + const buckets = bucketColl.aggregate(controlPipeline).toArray(); + + // Check that we only have one bucket. + assert.eq(buckets.length, 1); + + // Check that the bounds are what we expect. + assert.docEq(buckets[0].value, bounds); +} + +// 'a.b' is missing in the bounds even though it appears in the events. +runTest([{a: 1}, {a: {b: 3}}, {a: new Date("2021-01-01")}], + {"a.b": {$gt: 2}}, + [{a: {b: 3}}], + "a", + [1, new Date("2021-01-01")]); + +// 'a.b' is missing in the bounds even though it appears in the events. The bounds it is missing +// in are arrays on both sides. +runTest([{a: [1]}, {a: [{b: 3}]}, {a: [new Date("2021-01-01")]}], + {"a.b": {$gt: 2}}, + [{a: [{b: 3}]}], + "a", + [[1], [new Date("2021-01-01")]]); + +// 'a.b' appears in the bounds which are arrays. But it doesn't appear not in every pair of bounds. +// And the relevant value of 'a.b' does not appear in the bounds despite being present in the +// events. +runTest([{a: [1]}, {a: [{b: 3}]}, {a: [new Date("2021-01-01"), {b: 1}]}], + {"a.b": {$gt: 2}}, + [{a: [{b: 3}]}], + "a", + [[1, {b: 1}], [new Date("2021-01-01"), {b: 1}]]); + +// 'a.b' appears in the bounds but not the relevant side of the bounds. +runTest( + [ + {a: 1}, + {a: {b: 1}}, + ], + {"a.b": {$lt: 2}}, + [{a: {b: 1}}], + "a", + [1, {b: 1}]); + +// We query the upper bound for 'a.b', but 'a.b' only appears in the lower bound. +runTest( + [ + {a: {b: 3}}, + {a: {b: [1, 2]}}, + ], + {"a.b": {$gte: 3}}, + [{a: {b: 3}}], + "a.b", + [3, [1, 2]]); + +// We query the lower bound for 'a.b', but 'a.b' only appears in the upper bound. +runTest( + [ + {a: {b: 4}}, + {a: {b: [1, 2]}}, + ], + {"a.b": {$lte: 3}}, + [{a: {b: [1, 2]}}], + "a.b", + [4, [1, 2]]); + +// 'a.b' appears in the bounds but the matching values appear in neither side of the bounds. +runTest([{a: {b: 3}}, {a: {b: [1, 2]}}, {a: new Date("2021-01-01")}], + {"a.b": {$eq: 2}}, + [{a: {b: [1, 2]}}], + "a", + [{b: 3}, new Date("2021-01-01")]); + +// 'a.0' doesn't appear in the bounds. +runTest( + [ + {a: 1.7881393632457332e-7}, + { + a: ["ya", {"b": "/lc/"}, 0.9999999701976788, 0.3044235921021081], + }, + {a: true} + ], + {"a.0": {$gte: ""}}, + [{a: ["ya", {"b": "/lc/"}, 0.9999999701976788, 0.3044235921021081]}], + "a", + [1.7881393632457332e-7, true]); + +// We test arrays wrapping objects and objects wrapping arrays as different ways of achieving +// multiple bounds on 'a.b'. +runTest([{a: {b: [3, 4]}}, {a: [{b: 1}, {b: 2}]}], + {"a.b": {$lt: 2}}, + [{a: [{b: 1}, {b: 2}]}], + "a", + [{b: [3, 4]}, [{b: 1}, {b: 2}]]); +runTest([{a: {b: [3, 4]}}, {a: [{b: 1}, {b: 2}]}], + {"a.b": {$gte: 3}}, + [{a: {b: [3, 4]}}], + "a", + [{b: [3, 4]}, [{b: 1}, {b: 2}]]); +})(); diff --git a/src/mongo/db/matcher/expression_internal_expr_comparison_test.cpp b/src/mongo/db/matcher/expression_internal_expr_comparison_test.cpp index 9085008ad75..767ecc4df2a 100644 --- a/src/mongo/db/matcher/expression_internal_expr_comparison_test.cpp +++ b/src/mongo/db/matcher/expression_internal_expr_comparison_test.cpp @@ -182,6 +182,8 @@ TEST(InternalExprComparisonMatchExpression, CorrectlyComparesNaN) { } } +// Note we depend on this for our ability to rewrite predicates on timeseries collections where the +// buckets have mixed types. TEST(InternalExprComparisonMatchExpression, AlwaysReturnsTrueWithLeafArrays) { BSONObj operand = BSON("x" << 2); { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 950a6521fd7..7e8f33c5a6b 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -38,9 +38,11 @@ #include <type_traits> #include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsontypes.h" #include "mongo/db/exec/document_value/document.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" @@ -541,11 +543,61 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje return {BSONObj{}, false}; } +/* + * Creates a predicate that ensures that if there exists a subpath of matchExprPath such that the + * type of `control.min.subpath` is not the same as `control.max.subpath` then we will match that + * document. + */ +std::unique_ptr<MatchExpression> createTypeEqualityPredicate( + boost::intrusive_ptr<ExpressionContext> pExpCtx, const StringData& matchExprPath) { + FieldPath matchExprField(matchExprPath); + using namespace timeseries; + std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates; + + // Assume that we're generating a predicate on "a.b" + for (size_t i = 0; i < matchExprField.getPathLength(); i++) { + auto minPath = std::string{kControlMinFieldNamePrefix} + matchExprField.getSubpath(i); + auto maxPath = std::string{kControlMaxFieldNamePrefix} + matchExprField.getSubpath(i); + + // This whole block adds + // {$expr: {$ne: [{$type: "$control.min.a"}, {$type: "$control.max.a"}]}} + // in order to ensure that the type of `control.min.a` and `control.max.a` are the same. + + // This produces {$expr: ... } + typeEqualityPredicates.push_back(std::make_unique<ExprMatchExpression>( + // This produces {$ne: ... } + make_intrusive<ExpressionCompare>( + pExpCtx.get(), + ExpressionCompare::CmpOp::NE, + // This produces [...] + makeVector<boost::intrusive_ptr<Expression>>( + // This produces {$type: ... } + make_intrusive<ExpressionType>( + pExpCtx.get(), + // This produces [...] + makeVector<boost::intrusive_ptr<Expression>>( + // This produces "$control.min.a" + ExpressionFieldPath::createPathFromString( + pExpCtx.get(), minPath, pExpCtx->variablesParseState))), + // This produces {$type: ... } + make_intrusive<ExpressionType>( + pExpCtx.get(), + // This produces [...] + makeVector<boost::intrusive_ptr<Expression>>( + // This produces "$control.max.a" + ExpressionFieldPath::createPathFromString( + pExpCtx.get(), maxPath, pExpCtx->variablesParseState))))), + pExpCtx)); + } + return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates)); +} + std::unique_ptr<MatchExpression> createComparisonPredicate( const ComparisonMatchExpression* matchExpr, const BucketSpec& bucketSpec, int bucketMaxSpanSeconds, - ExpressionContext::CollationMatchesDefault collationMatchesDefault) { + ExpressionContext::CollationMatchesDefault collationMatchesDefault, + boost::intrusive_ptr<ExpressionContext> pExpCtx) { using namespace timeseries; const auto matchExprPath = matchExpr->path(); const auto matchExprData = matchExpr->getData(); @@ -625,9 +677,12 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : makePredicate( - MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData), - MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData)); + : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + makePredicate(MatchExprPredicate<InternalExprLTEMatchExpression>( + minPath, matchExprData), + MatchExprPredicate<InternalExprGTEMatchExpression>( + maxPath, matchExprData)), + createTypeEqualityPredicate(pExpCtx, matchExprPath))); case MatchExpression::GT: // For $gt, make a $gt predicate against 'control.max'. In addition, if the comparison @@ -647,8 +702,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : makePredicate( - MatchExprPredicate<InternalExprGTMatchExpression>(maxPath, matchExprData)); + : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData), + createTypeEqualityPredicate(pExpCtx, matchExprPath))); case MatchExpression::GTE: // For $gte, make a $gte predicate against 'control.max'. In addition, if the comparison @@ -668,8 +724,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<GTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : makePredicate( - MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData)); + : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData), + createTypeEqualityPredicate(pExpCtx, matchExprPath))); case MatchExpression::LT: // For $lt, make a $lt predicate against 'control.min'. In addition, if the comparison @@ -685,8 +742,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<LTMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : makePredicate( - MatchExprPredicate<InternalExprLTMatchExpression>(minPath, matchExprData)); + : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData), + createTypeEqualityPredicate(pExpCtx, matchExprPath))); case MatchExpression::LTE: // For $lte, make a $lte predicate against 'control.min'. In addition, if the comparison @@ -702,8 +760,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( kBucketIdFieldName, constructObjectIdValue<LTEMatchExpression>(matchExprData, bucketMaxSpanSeconds))) - : makePredicate( - MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData)); + : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( + std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData), + createTypeEqualityPredicate(pExpCtx, matchExprPath))); default: MONGO_UNREACHABLE_TASSERT(5348302); @@ -731,7 +790,8 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( return createComparisonPredicate(static_cast<const ComparisonMatchExpression*>(matchExpr), _bucketUnpacker.bucketSpec(), _bucketMaxSpanSeconds, - pExpCtx->collationMatchesDefault); + pExpCtx->collationMatchesDefault, + pExpCtx); } else if (matchExpr->matchType() == MatchExpression::GEO) { auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression(); if (geoExpr.getPred() == GeoExpression::WITHIN) { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp index c2ad3f07280..2c307863b6c 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp @@ -56,7 +56,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{'control.max.a': {$_internalExprGt: 1}}")); + fromjson("{$or: [ {'control.max.a': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -75,7 +77,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{'control.max.a': {$_internalExprGte: 1}}")); + fromjson("{$or: [ {'control.max.a': {$_internalExprGte: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -94,7 +98,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{'control.min.a': {$_internalExprLt: 1}}")); + fromjson("{$or: [ {'control.min.a': {$_internalExprLt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -113,7 +119,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{'control.min.a': {$_internalExprLte: 1}}")); + fromjson("{$or: [ {'control.min.a': {$_internalExprLte: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -132,8 +140,10 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$and: [{'control.min.a': {$_internalExprLte: 1}}, " - "{'control.max.a': {$_internalExprGte: 1}}]}")); + fromjson("{$or: [ {$and:[{'control.min.a': {$_internalExprLte: 1}}," + "{'control.max.a': {$_internalExprGte: 1}}]}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -152,8 +162,12 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$and: [{'control.max.b': {$_internalExprGt: 1}}, " - "{'control.min.a': {$_internalExprLt: 5}}]}")); + fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -190,7 +204,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$and: [{'control.max.b': {$_internalExprGt: 1}}]}")); + fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -210,9 +226,15 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$and: [{'control.max.b': {$_internalExprGte: 2}}, {$and: " - "[{'control.max.b': {$_internalExprGt: 1}}, " - "{'control.min.a': {$_internalExprLt: 5}}]}]}")); + fromjson("{$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$and: [ {$or: [ {'control.max.b': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, @@ -231,7 +253,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_EQ(stages.size(), 3U); ASSERT_BSONOBJ_EQ(stages[0].getDocument().toBson(), - fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}")); + fromjson("{$match: {$or: [ {'control.max.b': {$_internalExprGt: 1}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]}}")); ASSERT_BSONOBJ_EQ(stages[1].getDocument().toBson(), unpackBucketObj); ASSERT_BSONOBJ_EQ(stages[2].getDocument().toBson(), fromjson("{$match: {$and: [{b: {$gt: 1}}, {a: {$not: {$eq: 5}}}]}}")); @@ -254,8 +278,15 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ASSERT_BSONOBJ_EQ( stages[0], - fromjson("{$match: {$and: [{'control.max.b': {$_internalExprGte: 2}}, {'control.max.c': " - "{$_internalExprGt: 1}}, {'control.min.a': {$_internalExprLt: 5}}]}}")); + fromjson("{$match: {$and: [ {$or: [ {'control.max.b': {$_internalExprGte: 2}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.b\" ]}," + "{$type: [ \"$control.max.b\" ]} ]}} ]} ]}," + "{$or: [ {'control.max.c': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.c\" ]}," + "{$type: [ \"$control.max.c\" ]} ]}} ]} ]}," + "{$or: [ {'control.min.a': {$_internalExprLt: 5}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}")); ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj); ASSERT_BSONOBJ_EQ(stages[2], matchObj); } @@ -366,7 +397,9 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, ->createPredicatesOnBucketLevelField(original->getMatchExpression()); ASSERT_BSONOBJ_EQ(predicate->serialize(true), - fromjson("{$and: [{'control.max.a': {$_internalExprGt: 1}}]}")); + fromjson("{$and: [ {$or: [ {'control.max.a': {$_internalExprGt: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, OptimizeMapsTimePredicatesOnId) { diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp index e1e46139ff7..de175f2e116 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/optimize_pipeline_test.cpp @@ -58,8 +58,10 @@ TEST_F(OptimizePipeline, MixedMatchPushedDown) { // We should push down the $match on the metaField and the predicates on the control field. // The created $match stages should be added before $_internalUnpackBucket and merged. - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, " - "{'control.min.a': {$_internalExprLte: 4}}]}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}," + "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ] } ] } } ] }]}}"), stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); @@ -98,7 +100,9 @@ TEST_F(OptimizePipeline, MixedMatchOnlyControlPredicatesPushedDown) { // able to map the predicate on 'x' to a predicate on the control field. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(3u, stages.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {'control.min.x': {$_internalExprLte: 1}}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$or: [ {'control.min.x': {$_internalExprLte: 1}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," + "{$type: [ \"$control.max.x\" ] } ] } } ] }}"), stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(match, stages[2].getDocument().toBson()); @@ -141,8 +145,11 @@ TEST_F(OptimizePipeline, MultipleMatchesPushedDown) { // The created $match stages should be added before $_internalUnpackBucket and merged. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(3u, stages.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, " - "{'control.min.a': {$_internalExprLte: 4}}]}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [ {meta: {$gte: 0}}," + "{meta: {$lte: 5}}," + "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); @@ -165,8 +172,11 @@ TEST_F(OptimizePipeline, MultipleMatchesPushedDownWithSort) { // The created $match stages should be added before $_internalUnpackBucket and merged. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(4u, stages.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, " - "{'control.min.a': {$_internalExprLte: 4}}]}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [ { meta: { $gte: 0 } }," + "{meta: { $lte: 5 } }," + "{$or: [ { 'control.min.a': { $_internalExprLte: 4 } }," + "{$expr: { $ne: [ {$type: [ \"$control.min.a\" ] }," + "{$type: [ \"$control.max.a\" ] } ] } } ] }]}}"), stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(unpack, stages[1].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), stages[2].getDocument().toBson()); @@ -232,10 +242,11 @@ TEST_F(OptimizePipeline, SortThenMixedMatchPushedDown) { // We should push down both the $sort and parts of the $match. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(4u, serialized.size()); - ASSERT_BSONOBJ_EQ( - fromjson( - "{$match: {$and: [{meta: {$eq: 'abc'}}, {'control.max.a': {$_internalExprGte: 5}}]}}"), - serialized[0]); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: 'abc'}}," + "{$or: [ {'control.max.a': {$_internalExprGte: 5}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"), + serialized[0]); ASSERT_BSONOBJ_EQ(fromjson("{$sort: {meta: -1}}"), serialized[1]); ASSERT_BSONOBJ_EQ(unpack, serialized[2]); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$gte: 5}}}"), serialized[3]); @@ -295,10 +306,11 @@ TEST_F(OptimizePipeline, MixedMatchThenProjectPushedDown) { // We can push down part of the $match and use dependency analysis on the end of the pipeline. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(4u, stages.size()); - ASSERT_BSONOBJ_EQ( - fromjson( - "{$match: {$and: [{meta: {$eq: 'abc'}}, {'control.min.a': {$_internalExprLte: 4}}]}}"), - stages[0].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: 'abc'}}," + "{$or: [ {'control.min.a': { $_internalExprLte: 4 } }," + "{$expr: { $ne: [ {$type: [ \"$control.min.a\" ] }," + "{$type: [ \"$control.max.a\" ] } ] } } ] } ]}}"), + stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'x'], timeField: " "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), stages[1].getDocument().toBson()); @@ -342,10 +354,11 @@ TEST_F(OptimizePipeline, ProjectThenMixedMatchPushedDown) { // We should push down part of the $match and do dependency analysis on the rest. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(4u, stages.size()); - ASSERT_BSONOBJ_EQ( - fromjson( - "{$match: {$and: [{meta: {$eq: 'abc'}}, {'control.min.a': {$_internalExprLte: 4}}]}}"), - stages[0].getDocument().toBson()); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$eq: \"abc\"}}," + "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ] }," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), + stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ( fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'x', 'myMeta'], timeField: " "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), @@ -372,9 +385,14 @@ TEST_F(OptimizePipeline, ProjectWithRenameThenMixedMatchPushedDown) { // We should push down part of the $match and do dependency analysis on the end of the pipeline. auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(4u, stages.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{'control.max.y': {$_internalExprGte: 'abc'}}, " - "{'control.min.a': {$_internalExprLte: 4}}]}}"), - stages[0].getDocument().toBson()); + ASSERT_BSONOBJ_EQ( + fromjson("{$match: {$and: [{$or: [ {'control.max.y': {$_internalExprGte: \"abc\"}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.y\" ]}," + "{$type: [ \"$control.max.y\" ]} ]}} ]}," + "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," + "{$expr: {$ne: [ {$type: [ \"$control.min.a\" ] }," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]}}"), + stages[0].getDocument().toBson()); ASSERT_BSONOBJ_EQ(fromjson("{$_internalUnpackBucket: { include: ['_id', 'a', 'y'], timeField: " "'time', metaField: 'myMeta', bucketMaxSpanSeconds: 3600}}"), stages[1].getDocument().toBson()); diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp index 0dbd5fbbe52..3405518a6a0 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp @@ -262,7 +262,9 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMaps auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}, " - "{'control.min.a': {$_internalExprLte: 4}}]}}"), + "{$or: [ {'control.min.a': {$_internalExprLte: 4}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.a\" ]}," + "{$type: [ \"$control.max.a\" ]} ]}} ]} ]} ]}}"), serialized[0]); ASSERT_BSONOBJ_EQ(unpack, serialized[1]); ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]); @@ -302,7 +304,9 @@ TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, // map the predicate on 'x' to a predicate on the control field. auto serialized = pipeline->serializeToBson(); ASSERT_EQ(3u, serialized.size()); - ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{'control.min.x': {$_internalExprLte: 1}}]}}"), + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{$or: [ {'control.min.x': {$_internalExprLte: 1}}," + "{$or: [ {$expr: {$ne: [ {$type: [ \"$control.min.x\" ]}," + "{$type: [ \"$control.max.x\" ]} ]}} ]} ]} ]}}"), serialized[0]); ASSERT_BSONOBJ_EQ(unpack, serialized[1]); ASSERT_BSONOBJ_EQ(match, serialized[2]); diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index d94d5c0e095..27319aed5cc 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -1044,6 +1044,19 @@ boost::intrusive_ptr<Expression> ExpressionCond::optimize() { return this; } +boost::intrusive_ptr<Expression> ExpressionCond::create(ExpressionContext* const expCtx, + boost::intrusive_ptr<Expression> ifExp, + boost::intrusive_ptr<Expression> elseExpr, + boost::intrusive_ptr<Expression> thenExpr) { + intrusive_ptr<ExpressionCond> ret = new ExpressionCond(expCtx); + ret->_children.resize(3); + + ret->_children[0] = ifExp; + ret->_children[1] = elseExpr; + ret->_children[2] = thenExpr; + return ret; +} + intrusive_ptr<Expression> ExpressionCond::parse(ExpressionContext* const expCtx, BSONElement expr, const VariablesParseState& vps) { diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index 28b51d4495c..c77f8a4076f 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -1325,6 +1325,12 @@ public: const char* getOpName() const final; boost::intrusive_ptr<Expression> optimize() final; + static boost::intrusive_ptr<Expression> create( + ExpressionContext* expCtx, + boost::intrusive_ptr<Expression> ifExp, + boost::intrusive_ptr<Expression> elseExpr, + boost::intrusive_ptr<Expression> thenExpr = nullptr); + static boost::intrusive_ptr<Expression> parse(ExpressionContext* expCtx, BSONElement expr, const VariablesParseState& vps); @@ -2202,6 +2208,13 @@ public: class ExpressionMap final : public Expression { public: + ExpressionMap( + ExpressionContext* expCtx, + const std::string& varName, // name of variable to set + Variables::Id varId, // id of variable to set + boost::intrusive_ptr<Expression> input, // yields array to iterate + boost::intrusive_ptr<Expression> each); // yields results to be added to output array + boost::intrusive_ptr<Expression> optimize() final; Value serialize(bool explain) const final; Value evaluate(const Document& root, Variables* variables) const final; @@ -2225,13 +2238,6 @@ protected: void _doAddDependencies(DepsTracker* deps) const final; private: - ExpressionMap( - ExpressionContext* expCtx, - const std::string& varName, // name of variable to set - Variables::Id varId, // id of variable to set - boost::intrusive_ptr<Expression> input, // yields array to iterate - boost::intrusive_ptr<Expression> each); // yields results to be added to output array - std::string _varName; Variables::Id _varId; boost::intrusive_ptr<Expression>& _input; |