summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-07-29 15:33:09 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-29 15:58:09 +0000
commitf5d28a6e731319f7ef6c2745df65e62fdce885f6 (patch)
treef9749be0baf0c02099c4611cbf9b17d4134bd8a9
parentf1da0586d6327666d213fbea9b1c1953a7bb0dd8 (diff)
downloadmongo-f5d28a6e731319f7ef6c2745df65e62fdce885f6.tar.gz
SERVER-66072 Fix dependency analysis for $match on $expr with $rand
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/aggregation/match_no_swap_rand.js161
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp23
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp25
4 files changed, 203 insertions, 10 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 8bca31e37a7..2938a496e67 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -98,6 +98,8 @@ last-continuous:
test_file: jstests/core/internal_apply_oplog_update.js
- ticket: SERVER-67532
test_file: jstests/replsets/optime.js
+ - ticket: SERVER-66072
+ test_file: jstests/aggregation/match_no_swap_rand.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
@@ -356,6 +358,8 @@ last-lts:
test_file: jstests/core/internal_apply_oplog_update.js
- ticket: SERVER-67532
test_file: jstests/replsets/optime.js
+ - ticket: SERVER-66072
+ test_file: jstests/aggregation/match_no_swap_rand.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/jstests/aggregation/match_no_swap_rand.js b/jstests/aggregation/match_no_swap_rand.js
new file mode 100644
index 00000000000..a3946c7e56a
--- /dev/null
+++ b/jstests/aggregation/match_no_swap_rand.js
@@ -0,0 +1,161 @@
+/**
+ * Test that $rand (and by extension, $sampleRate) doesn't get pushed down.
+ * @tags: [
+ * # Tests the 'stages' field of the explain output which is hidden beneath each shard's name when
+ * # run against sharded collections.
+ * assumes_unsharded_collection,
+ * # Tests the explain output, so does not work when wrapped in a facet.
+ * do_not_wrap_aggregations_in_facets,
+ * # Explicitly testing optimization.
+ * requires_pipeline_optimization,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");
+
+function getWinningPlanForPipeline({coll, pipeline}) {
+ const explain = assert.commandWorked(coll.explain().aggregate(pipeline));
+ if ("queryPlanner" in explain) {
+ return getWinningPlan(explain.queryPlanner);
+ }
+ return getWinningPlan(explain.stages[0].$cursor.queryPlanner);
+}
+
+function assertScanFilterEq({coll, pipeline, filter}) {
+ const winningPlan = getWinningPlanForPipeline({coll, pipeline});
+ const collScan = getPlanStage(winningPlan, "COLLSCAN");
+ assert(collScan);
+ // Sometimes explain will have 'filter' set to an empty object, other times there will be no
+ // 'filter'. If we are expecting there to be no filter on the COLLSCAN, either is acceptable.
+ if (filter) {
+ assert.docEq(collScan.filter, filter);
+ } else {
+ assert(!collScan.filter || Object.keys(collScan.filter).length == 0);
+ }
+}
+
+// Test that a $match with a random expression should not be pushed past a $group.
+{
+ const coll = db[jsTestName()];
+ coll.drop();
+
+ let bulk = coll.initializeUnorderedBulkOp();
+ for (var i = 0; i < 100; i++) {
+ bulk.insert({a: {b: i % 5, c: i}});
+ }
+ assert.commandWorked(bulk.execute());
+
+ assertScanFilterEq({
+ coll,
+ pipeline:
+ [{$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}}, {$match: {$sampleRate: 0.25}}]
+ });
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}},
+ {$replaceRoot: {newRoot: '$first'}},
+ {$match: {$sampleRate: 0.25}},
+ ]
+ });
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}},
+ {$match: {$sampleRate: 0.25}},
+ {$replaceRoot: {newRoot: '$first'}},
+ ]
+ });
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {$match: {c: {$gt: 500}}},
+ {$group: {_id: "$a.b", first: {$first: "$$CURRENT"}}},
+ {$match: {$sampleRate: 0.25}},
+ ],
+ filter: {c: {$gt: 500}}
+ });
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {$match: {c: {$gt: 500}}},
+ // A $lookup that split $match exprs can push down past.
+ {
+ $lookup: {
+ as: "joinedC",
+ from: coll.getName(),
+ localField: "c",
+ foreignField: "c",
+ }
+ },
+ {
+ $match: {
+ $and: [
+ {$expr: {$lt: ["$c", 800]}}, // Should split me out.
+ {$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}} // Can't split me out.
+ ]
+ }
+ },
+ ],
+ filter: {
+ $and: [
+ {c: {$gt: 500}},
+ {$expr: {$lt: ["$c", {$const: 800}]}},
+ {c: {$_internalExprLt: 800}},
+ ]
+ }
+ });
+}
+
+// Test that a $match with a random expression should not be pushed past $_internalUnpackBucket.
+{
+ const collName = jsTestName() + "_ts";
+ db[collName].drop();
+ assert.commandWorked(db.createCollection(collName, {
+ timeseries: {
+ timeField: "t",
+ metaField: "m",
+ }
+ }));
+ const coll = db[collName];
+
+ let bulk = coll.initializeUnorderedBulkOp();
+ for (var i = 0; i < 100; i++) {
+ bulk.insert({t: new Date(), m: i});
+ }
+ assert.commandWorked(bulk.execute());
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {$match: {$sampleRate: 0.25}},
+ ]
+ });
+
+ assertScanFilterEq({
+ coll,
+ pipeline: [
+ {
+ $match: {
+ $and: [
+ {$expr: {$lt: ["$m", 50]}}, // Should split me out.
+ {$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}} // Can't split me out.
+ ]
+ }
+ },
+ ],
+ filter: {
+ $and: [
+ {$expr: {$lt: ["$meta", {$const: 50}]}},
+ {meta: {$_internalExprLt: 50}},
+ ]
+ }
+ });
+}
+}());
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index 11a280eb9ad..3696c4fc274 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -420,14 +420,16 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p
auto depsTracker = DepsTracker{};
expr.addDependencies(&depsTracker);
- return std::none_of(
- depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) {
- return pathSet.find(field) != pathSet.end() ||
- std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) {
- return expression::isPathPrefixOf(field, path) ||
- expression::isPathPrefixOf(path, field);
- });
- });
+ // Match expressions that generate random numbers can't be safely split out and pushed down.
+ return !depsTracker.needRandomGenerator &&
+ std::none_of(
+ depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) {
+ return pathSet.find(field) != pathSet.end() ||
+ std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) {
+ return expression::isPathPrefixOf(field, path) ||
+ expression::isPathPrefixOf(path, field);
+ });
+ });
}
bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) {
@@ -439,8 +441,9 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>&
auto depsTracker = DepsTracker{};
expr.addDependencies(&depsTracker);
- return std::all_of(
- depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) {
+ // Match expressions that generate random numbers can't be safely split out and pushed down.
+ return !depsTracker.needRandomGenerator &&
+ std::all_of(depsTracker.fields.begin(), depsTracker.fields.end(), [&roots](auto&& field) {
auto fieldRef = FieldRef{field};
return !fieldRef.empty() && roots.find(fieldRef.getPart(0).toString()) != roots.end();
});
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index 685500716b4..f9e965e46a5 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -1274,6 +1274,31 @@ TEST(SplitMatchExpression, ShouldMoveIndependentPredicateWhenThereAreMultipleRen
ASSERT_FALSE(splitExpr.second.get());
}
+TEST(SplitMatchExpression, ShouldNotSplitWhenRand) {
+ const auto randExpr = "{$expr: {$lt: [{$rand: {}}, {$const: 0.25}]}}";
+ const auto assertMatchDoesNotSplit = [&](const std::string& exprString) {
+ BSONObj matchPredicate = fromjson(exprString);
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(matcher.getStatus());
+
+ auto&& [split, residual] =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, {});
+ ASSERT_FALSE(split.get());
+ ASSERT_TRUE(residual.get());
+
+ BSONObjBuilder oldBob;
+ residual->serialize(&oldBob, true);
+ ASSERT_BSONOBJ_EQ(oldBob.obj(), fromjson(randExpr));
+ };
+
+ // We should not push down a $match with a $rand expression.
+ assertMatchDoesNotSplit(randExpr);
+
+ // This is equivalent to 'randExpr'.
+ assertMatchDoesNotSplit("{$sampleRate: 0.25}");
+}
+
TEST(ApplyRenamesToExpression, ShouldApplyBasicRenamesForAMatchWithExpr) {
BSONObj matchPredicate = fromjson("{$expr: {$eq: ['$a.b', '$c']}}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());