diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-07-29 15:33:09 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-29 15:58:09 +0000 |
commit | f5d28a6e731319f7ef6c2745df65e62fdce885f6 (patch) | |
tree | f9749be0baf0c02099c4611cbf9b17d4134bd8a9 | |
parent | f1da0586d6327666d213fbea9b1c1953a7bb0dd8 (diff) | |
download | mongo-f5d28a6e731319f7ef6c2745df65e62fdce885f6.tar.gz |
SERVER-66072 Fix dependency analysis for $match on $expr with $rand
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 4 | ||||
-rw-r--r-- | jstests/aggregation/match_no_swap_rand.js | 161 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 25 |
4 files changed, 203 insertions, 10 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 8bca31e37a7..2938a496e67 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -98,6 +98,8 @@ last-continuous: test_file: jstests/core/internal_apply_oplog_update.js - ticket: SERVER-67532 test_file: jstests/replsets/optime.js + - ticket: SERVER-66072 + test_file: jstests/aggregation/match_no_swap_rand.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: @@ -356,6 +358,8 @@ last-lts: test_file: jstests/core/internal_apply_oplog_update.js - ticket: SERVER-67532 test_file: jstests/replsets/optime.js + - ticket: SERVER-66072 + test_file: jstests/aggregation/match_no_swap_rand.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: diff --git a/jstests/aggregation/match_no_swap_rand.js b/jstests/aggregation/match_no_swap_rand.js new file mode 100644 index 00000000000..a3946c7e56a --- /dev/null +++ b/jstests/aggregation/match_no_swap_rand.js @@ -0,0 +1,161 @@ +/** + * Test that $rand (and by extension, $sampleRate) doesn't get pushed down. + * @tags: [ + * # Tests the 'stages' field of the explain output which is hidden beneath each shard's name when + * # run against sharded collections. + * assumes_unsharded_collection, + * # Tests the explain output, so does not work when wrapped in a facet. + * do_not_wrap_aggregations_in_facets, + * # Explicitly testing optimization. + * requires_pipeline_optimization, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); + +function getWinningPlanForPipeline({coll, pipeline}) { + const explain = assert.commandWorked(coll.explain().aggregate(pipeline)); + if ("queryPlanner" in explain) { + return getWinningPlan(explain.queryPlanner); + } + return getWinningPlan(explain.stages[0].$cursor.queryPlanner); +} + +function assertScanFilterEq({coll, pipeline, filter}) { + const winningPlan = getWinningPlanForPipeline({coll, pipeline}); + const collScan = getPlanStage(winningPlan, "COLLSCAN"); + assert(collScan); + // Sometimes explain will have 'filter' set to an empty object, other times there will be no + // 'filter'. If we are expecting there to be no filter on the COLLSCAN, either is acceptable. + if (filter) { + assert.docEq(collScan.filter, filter); + } else { + assert(!collScan.filter || Object.keys(collScan.filter).length == 0); + } +} + +// Test that a $match with a random expression should not be pushed past a $group. +{ + const coll = db[jsTestName()]; + coll.drop(); + + let bulk = coll.initializeUnorderedBulkOp(); + for (var i = 0; i < 100; i++) { + bulk.insert({a: {b: i % 5, c: i}}); + } + assert.commandWorked(bulk.execute()); + + assertScanFilterEq({ + coll, + pipeline: + [{$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}}, {$match: {$sampleRate: 0.25}}] + }); + + assertScanFilterEq({ + coll, + pipeline: [ + {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}}, + {$replaceRoot: {newRoot: '$first'}}, + {$match: {$sampleRate: 0.25}}, + ] + }); + + assertScanFilterEq({ + coll, + pipeline: [ + {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}}, + {$match: {$sampleRate: 0.25}}, + {$replaceRoot: {newRoot: '$first'}}, + ] + }); + + assertScanFilterEq({ + coll, + pipeline: [ + {$match: {c: {$gt: 500}}}, + {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}}, + {$match: {$sampleRate: 0.25}}, + ], + filter: {c: {$gt: 500}} + }); + + assertScanFilterEq({ + coll, + pipeline: [ + {$match: {c: {$gt: 500}}}, + // A $lookup that split $match exprs can push down past. + { + $lookup: { + as: "joinedC", + from: coll.getName(), + localField: "c", + foreignField: "c", + } + }, + { + $match: { + $and: [ + {$expr: {$lt: ["$c", 800]}}, // Should split me out. + {$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}} // Can't split me out. + ] + } + }, + ], + filter: { + $and: [ + {c: {$gt: 500}}, + {$expr: {$lt: ["$c", {$const: 800}]}}, + {c: {$_internalExprLt: 800}}, + ] + } + }); +} + +// Test that a $match with a random expression should not be pushed past $_internalUnpackBucket. +{ + const collName = jsTestName() + "_ts"; + db[collName].drop(); + assert.commandWorked(db.createCollection(collName, { + timeseries: { + timeField: "t", + metaField: "m", + } + })); + const coll = db[collName]; + + let bulk = coll.initializeUnorderedBulkOp(); + for (var i = 0; i < 100; i++) { + bulk.insert({t: new Date(), m: i}); + } + assert.commandWorked(bulk.execute()); + + assertScanFilterEq({ + coll, + pipeline: [ + {$match: {$sampleRate: 0.25}}, + ] + }); + + assertScanFilterEq({ + coll, + pipeline: [ + { + $match: { + $and: [ + {$expr: {$lt: ["$m", 50]}}, // Should split me out. + {$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}} // Can't split me out. + ] + } + }, + ], + filter: { + $and: [ + {$expr: {$lt: ["$meta", {$const: 50}]}}, + {meta: {$_internalExprLt: 50}}, + ] + } + }); +} +}()); diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 11a280eb9ad..3696c4fc274 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -420,14 +420,16 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p auto depsTracker = DepsTracker{}; expr.addDependencies(&depsTracker); - return std::none_of( - depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) { - return pathSet.find(field) != pathSet.end() || - std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) { - return expression::isPathPrefixOf(field, path) || - expression::isPathPrefixOf(path, field); - }); - }); + // Match expressions that generate random numbers can't be safely split out and pushed down. + return !depsTracker.needRandomGenerator && + std::none_of( + depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) { + return pathSet.find(field) != pathSet.end() || + std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) { + return expression::isPathPrefixOf(field, path) || + expression::isPathPrefixOf(path, field); + }); + }); } bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) { @@ -439,8 +441,9 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& auto depsTracker = DepsTracker{}; expr.addDependencies(&depsTracker); - return std::all_of( - depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) { + // Match expressions that generate random numbers can't be safely split out and pushed down. + return !depsTracker.needRandomGenerator && + std::all_of(depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) { auto fieldRef = FieldRef{field}; return !fieldRef.empty() && roots.find(fieldRef.getPart(0).toString()) != roots.end(); }); diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index 685500716b4..f9e965e46a5 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -1274,6 +1274,31 @@ TEST(SplitMatchExpression, ShouldMoveIndependentPredicateWhenThereAreMultipleRen ASSERT_FALSE(splitExpr.second.get()); } +TEST(SplitMatchExpression, ShouldNotSplitWhenRand) { + const auto randExpr = "{$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}}"; + const auto assertMatchDoesNotSplit = [&](const std::string& exprString) { + BSONObj matchPredicate = fromjson(exprString); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(matcher.getStatus()); + + auto&& [split, residual] = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, {}); + ASSERT_FALSE(split.get()); + ASSERT_TRUE(residual.get()); + + BSONObjBuilder oldBob; + residual->serialize(&oldBob, true); + ASSERT_BSONOBJ_EQ(oldBob.obj(), fromjson(randExpr)); + }; + + // We should not push down a $match with a $rand expression. + assertMatchDoesNotSplit(randExpr); + + // This is equivalent to 'randExpr'. + assertMatchDoesNotSplit("{$sampleRate: 0.25}"); +} + TEST(ApplyRenamesToExpression, ShouldApplyBasicRenamesForAMatchWithExpr) { BSONObj matchPredicate = fromjson("{$expr: {$eq: ['$a.b', '$c']}}"); boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); |