summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2021-01-29 01:12:21 +0000
committerDavid Percy <david.percy@mongodb.com>2021-06-24 14:54:08 +0000
commit9a313631ab20d2996811bc7cb3b4ccaec6ba5a82 (patch)
treee656270bc9941371087180024a4856de3510ca93
parent6b14a422616c84e5f8eab8c3bf81618bcf7ebc17 (diff)
downloadmongo-9a313631ab20d2996811bc7cb3b4ccaec6ba5a82.tar.gz
SERVER-54128 Don't push down expressions past sort+limit
-rw-r--r--jstests/aggregation/bugs/expression_swap_limit.js82
-rw-r--r--jstests/aggregation/bugs/server6192_server6193.js67
-rw-r--r--jstests/aggregation/optimize_away_pipeline.js41
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp39
4 files changed, 185 insertions, 44 deletions
diff --git a/jstests/aggregation/bugs/expression_swap_limit.js b/jstests/aggregation/bugs/expression_swap_limit.js
new file mode 100644
index 00000000000..eef11dea51d
--- /dev/null
+++ b/jstests/aggregation/bugs/expression_swap_limit.js
@@ -0,0 +1,82 @@
+/**
+ * Pushing down computed projection before sort+limit can cause the projection to be evaluated on
+ * invalid documents and throw exceptions. This test checks that we do not push computed projection
+ * before sort+limit, preventing valid queries from failing.
+ *
+ * This test was intended to reproduce a bug, SERVER-54128.
+ */
+(function() {
+
+const coll = db.expression_swap_limit;
+coll.drop();
+
+const NUM_INVALID_DOCS = 10;
+
+const docs = Array.from({length: NUM_INVALID_DOCS}, (_, i) => ({_id: i, a: ""}));
+docs.push({_id: 99, a: 123});
+coll.insert(docs);
+
+coll.createIndex({some_other_field: 1});
+
+const predicate = {
+ $or: [
+ // One branch of the $or needs to have more than one relevant index.
+ {_id: {$lt: 9999}, some_other_field: {$ne: 3}},
+ // The other branch doesn't matter: it's only here to prevent the $or being
+ // optimized out.
+ {this_predicate_matches_nothing: true},
+ ]
+};
+const sortSpec = {
+ _id: -1
+};
+const oppositeSortSpec = {
+ _id: +1
+};
+const projection = {
+ _id: 1,
+ b: {$round: "$a"},
+};
+const pipeline1 = [
+ // We need a rooted $or to trigger the SubplanStage.
+ // Instead of one MultiPlanStage for the whole query, we get one
+ // per branch of the $or.
+ {$match: predicate},
+ // Next we need a $sort+$limit. These two stages unambiguously select _id: 99.
+ // From the user's point of view, the $limit returns a bag of 1 document.
+ {$sort: sortSpec},
+ {$limit: 1},
+ // This projection raises an error if we evaluate it on any document other than
+ // _id: 99, because that is the only document where 'a' is numeric.
+ {$project: projection},
+];
+{
+ // The pipeline should succeed.
+ const aggResult = coll.aggregate(pipeline1).toArray();
+ assert.docEq(aggResult, [{_id: 99, b: 123}]);
+
+ // The pipeline should succeed without pushing down to find.
+ const noOptResult =
+ coll.aggregate([{$_internalInhibitOptimization: {}}].concat(pipeline1)).toArray();
+ assert.docEq(noOptResult, [{_id: 99, b: 123}]);
+}
+
+// Similarly, we can select the 1 valid document by flipping the sort and skipping
+// all but one document.
+const pipeline2 = [
+ {$match: predicate},
+ {$sort: oppositeSortSpec},
+ {$skip: NUM_INVALID_DOCS},
+ {$project: projection},
+];
+{
+ // The pipeline should succeed.
+ const aggResult = coll.aggregate(pipeline2).toArray();
+ assert.docEq(aggResult, [{_id: 99, b: 123}]);
+
+ // The pipeline should succeed without pushing down to find.
+ const noOptResult =
+ coll.aggregate([{$_internalInhibitOptimization: {}}].concat(pipeline2)).toArray();
+ assert.docEq(noOptResult, [{_id: 99, b: 123}]);
+}
+})();
diff --git a/jstests/aggregation/bugs/server6192_server6193.js b/jstests/aggregation/bugs/server6192_server6193.js
index 7a395f28262..30103dfd0a8 100644
--- a/jstests/aggregation/bugs/server6192_server6193.js
+++ b/jstests/aggregation/bugs/server6192_server6193.js
@@ -20,41 +20,56 @@ const t = db.jstests_aggregation_server6192;
t.drop();
assert.commandWorked(t.insert({x: true}));
-function assertOptimized(pipeline, v) {
- const explained = t.runCommand("aggregate", {pipeline: pipeline, explain: true});
- const projectStage = getPlanStage(explained, "PROJECTION_DEFAULT");
- assert.eq(projectStage.transformBy.a["$const"], v, "ensure short-circuiting worked", explained);
+function optimize(expression) {
+ const explained = t.explain().aggregate([
+ // This inhibit optimization prevents the expression being pushed down into the .find()
+ // layer, to make assertions simpler. But it doesn't prevent the $project stage itself
+ // from being optimized.
+ {$_internalInhibitOptimization: {}},
+ {$project: {result: expression}},
+ ]);
+
+ const stage = getAggPlanStage(explained, "$project");
+ assert(stage, explained);
+ assert(stage.$project.result, explained);
+ return stage.$project.result;
+}
+
+function assertOptimized(expression, value) {
+ const optimized = optimize(expression);
+ assert.docEq(optimized, {$const: value}, "ensure short-circuiting worked", optimized);
}
-function assertNotOptimized(pipeline) {
- const explained = t.runCommand("aggregate", {pipeline: pipeline, explain: true});
- const projectStage = getPlanStage(explained, "PROJECTION_DEFAULT");
- assert(!("$const" in projectStage.transformBy.a), "ensure no short-circuiting");
+function assertNotOptimized(expression) {
+ const optimized = optimize(expression);
+ // 'optimized' may be simpler than 'expression', but we assert it did not optimize to a
+ // constant.
+ assert.neq(Object.keys(optimized), ['$const'], "ensure no short-circuiting", optimized);
}
// short-circuiting for $and
-assertOptimized([{$project: {a: {$and: [0, '$x']}}}], false);
-assertOptimized([{$project: {a: {$and: [0, 1, '$x']}}}], false);
-assertOptimized([{$project: {a: {$and: [0, 1, '', '$x']}}}], false);
+assertOptimized({$and: [0, '$x']}, false);
+assertOptimized({$and: [0, 1, '$x']}, false);
+assertOptimized({$and: [0, 1, '', '$x']}, false);
-assertOptimized([{$project: {a: {$and: [1, 0, '$x']}}}], false);
-assertOptimized([{$project: {a: {$and: [1, '', 0, '$x']}}}], false);
-assertOptimized([{$project: {a: {$and: [1, 1, 0, 1]}}}], false);
+assertOptimized({$and: [1, 0, '$x']}, false);
+assertOptimized({$and: [1, '', 0, '$x']}, false);
+assertOptimized({$and: [1, 1, 0, 1]}, false);
// short-circuiting for $or
-assertOptimized([{$project: {a: {$or: [1, '$x']}}}], true);
-assertOptimized([{$project: {a: {$or: [1, 0, '$x']}}}], true);
-assertOptimized([{$project: {a: {$or: [1, '', '$x']}}}], true);
+assertOptimized({$or: [1, '$x']}, true);
+assertOptimized({$or: [1, 0, '$x']}, true);
+assertOptimized({$or: [1, '', '$x']}, true);
-assertOptimized([{$project: {a: {$or: [0, 1, '$x']}}}], true);
-assertOptimized([{$project: {a: {$or: ['', 0, 1, '$x']}}}], true);
-assertOptimized([{$project: {a: {$or: [0, 0, 0, 1]}}}], true);
+assertOptimized({$or: [0, 1, '$x']}, true);
+assertOptimized({$or: ['', 0, 1, '$x']}, true);
+assertOptimized({$or: [0, 0, 0, 1]}, true);
// examples that should not short-circuit
-assertNotOptimized([{$project: {a: {$and: [1, '$x']}}}]);
-assertNotOptimized([{$project: {a: {$or: [0, '$x']}}}]);
-assertNotOptimized([{$project: {a: {$and: ['$x', '$x']}}}]);
-assertNotOptimized([{$project: {a: {$or: ['$x', '$x']}}}]);
-assertNotOptimized([{$project: {a: {$and: ['$x']}}}]);
-assertNotOptimized([{$project: {a: {$or: ['$x']}}}]);
+assertNotOptimized({$and: [1, '$x']});
+assertNotOptimized({$or: [0, '$x']});
+assertNotOptimized({$and: ['$x', '$x']});
+assertNotOptimized({$or: ['$x', '$x']});
+assertNotOptimized({$and: ['$x']});
+assertNotOptimized({$or: ['$x']});
}());
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js
index 887923b13ce..c783698b1fe 100644
--- a/jstests/aggregation/optimize_away_pipeline.js
+++ b/jstests/aggregation/optimize_away_pipeline.js
@@ -17,7 +17,6 @@
(function() {
"use strict";
-load("jstests/aggregation/extras/utils.js"); // For 'orderedArrayEq' and 'arrayEq'.
load("jstests/concurrency/fsm_workload_helpers/server_types.js"); // For isWiredTiger.
load("jstests/libs/analyze_plan.js"); // For 'aggPlanHasStage' and other explain helpers.
load("jstests/libs/fixture_helpers.js"); // For 'isMongos' and 'isSharded'.
@@ -75,8 +74,11 @@ function assertPipelineUsesAggregation({
if (expectedResult) {
const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray();
- assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult)
- : arrayEq(actualResult, expectedResult));
+ if (preserveResultOrder) {
+ assert.docEq(actualResult, expectedResult);
+ } else {
+ assert.sameMembers(actualResult, expectedResult);
+ }
}
return explainOutput;
@@ -116,8 +118,11 @@ function assertPipelineDoesNotUseAggregation({
if (expectedResult) {
const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray();
- assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult)
- : arrayEq(actualResult, expectedResult));
+ if (preserveResultOrder) {
+ assert.docEq(actualResult, expectedResult);
+ } else {
+ assert.sameMembers(actualResult, expectedResult);
+ }
}
return explainOutput;
@@ -128,7 +133,7 @@ function testGetMore({command = null, expectedResult = null} = {}) {
const documents =
new DBCommandCursor(db, assert.commandWorked(db.runCommand(command)), 1 /* batchsize */)
.toArray();
- assert(arrayEq(documents, expectedResult));
+ assert.sameMembers(documents, expectedResult);
}
let explainOutput;
@@ -180,6 +185,19 @@ assertPipelineDoesNotUseAggregation({
expectedStages: ["IXSCAN"],
expectedResult: [{x: 20}]
});
+// However, when the $project is computed, pushing it down into the find() layer would sometimes
+// have the effect of reordering it before the $sort and $limit. This can cause a valid query to
+// throw an error, as in SERVER-54128.
+assertPipelineUsesAggregation({
+ pipeline: [
+ {$match: {x: {$gte: 20}}},
+ {$sort: {x: 1}},
+ {$limit: 1},
+ {$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}
+ ],
+ expectedStages: ["IXSCAN"],
+ expectedResult: [{x: ""}]
+});
assert.commandWorked(coll.dropIndexes());
assert.commandWorked(coll.insert({_id: 4, x: 40, a: {b: "ab1"}}));
@@ -571,10 +589,9 @@ if (!FixtureHelpers.isMongos(db) && isWiredTiger(db)) {
});
db.setProfilingLevel(0);
let profile = db.system.profile.find({}, {op: 1, ns: 1}).sort({ts: 1}).toArray();
- assert(
- arrayEq(profile,
- [{op: "command", ns: coll.getFullName()}, {op: "getmore", ns: coll.getFullName()}]),
- profile);
+ assert.sameMembers(
+ profile,
+ [{op: "command", ns: coll.getFullName()}, {op: "getmore", ns: coll.getFullName()}]);
// Test getMore puts a correct namespace into profile data for a view with an optimized away
// pipeline.
if (!FixtureHelpers.isSharded(coll)) {
@@ -591,9 +608,9 @@ if (!FixtureHelpers.isMongos(db) && isWiredTiger(db)) {
});
db.setProfilingLevel(0);
profile = db.system.profile.find({}, {op: 1, ns: 1}).sort({ts: 1}).toArray();
- assert(arrayEq(
+ assert.sameMembers(
profile,
- [{op: "query", ns: view.getFullName()}, {op: "getmore", ns: view.getFullName()}]));
+ [{op: "query", ns: view.getFullName()}, {op: "getmore", ns: view.getFullName()}]);
}
}
}());
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 26382290319..afbe455fee4 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/db/query/projection_parser.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
#include "mongo/platform/basic.h"
@@ -650,11 +651,18 @@ SkipThenLimit extractSkipAndLimitForPushdown(Pipeline* pipeline) {
* 2. If there is no inclusion projection at the front of the pipeline, but there is a finite
* dependency set, a projection representing this dependency set will be pushed down.
* 3. Otherwise, an empty projection is returned and no projection push down will happen.
+ *
+ * If 'allowExpressions' is true, the returned projection may include expressions (which can only
+ * happen in case 1). If 'allowExpressions' is false and the projection we find has expressions,
+ * then we fall through to case 2 and attempt to push down a pure-inclusion projection based on its
+ * dependencies.
*/
-auto buildProjectionForPushdown(const DepsTracker& deps, Pipeline* pipeline) {
+auto buildProjectionForPushdown(const DepsTracker& deps,
+ Pipeline* pipeline,
+ bool allowExpressions) {
auto&& sources = pipeline->getSources();
- // Short-circuit if the pipeline is emtpy, there is no projection and nothing to push down.
+ // Short-circuit if the pipeline is empty: there is no projection and nothing to push down.
if (sources.empty()) {
return BSONObj();
}
@@ -663,17 +671,24 @@ auto buildProjectionForPushdown(const DepsTracker& deps, Pipeline* pipeline) {
exact_pointer_cast<DocumentSourceSingleDocumentTransformation*>(sources.front().get());
projStage) {
if (projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection) {
- // If there is an inclusion projection at the front of the pipeline, we have case 1.
auto projObj =
projStage->getTransformer().serializeTransformation(boost::none).toBson();
- sources.pop_front();
- return projObj;
+ auto projAst = projection_ast::parse(projStage->getContext(),
+ projObj,
+ ProjectionPolicies::aggregateProjectionPolicies());
+ if (!projAst.hasExpressions() || allowExpressions) {
+ // If there is an inclusion projection at the front of the pipeline, we have case 1.
+ sources.pop_front();
+ return projObj;
+ }
}
}
// Depending of whether there is a finite dependency set, either return a projection
// representing this dependency set, or an empty BSON, meaning no projection push down will
// happen. This covers cases 2 and 3.
+ if (deps.getNeedsAnyMetadata())
+ return BSONObj();
return deps.toProjectionWithoutMetadata();
}
} // namespace
@@ -902,7 +917,19 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
// Build a BSONObj representing a projection eligible for pushdown. If there is an inclusion
// projection at the front of the pipeline, it will be removed and handled by the PlanStage
// layer. If a projection cannot be pushed down, an empty BSONObj will be returned.
- projObj = buildProjectionForPushdown(deps, pipeline);
+
+ // In most cases .find() behaves as if it evaluates in a predictable order:
+ // predicate, sort, skip, limit, projection.
+ // But there is at least one case where it runs the projection before the sort/skip/limit:
+ // when the predicate has a rooted $or. (In that case we plan each branch of the $or
+ // separately, using Subplan, and include the projection on each branch.)
+
+ // To work around this behavior, don't allow pushing down expressions if we are also going
+ // to push down a sort, skip or limit. We don't want the expressions to be evaluated on any
+ // documents that the sort/skip/limit would have filtered out. (The sort stage can be a
+ // top-k sort, which both sorts and limits.)
+ bool allowExpressions = !sortStage && !skipThenLimit.getSkip() && !skipThenLimit.getLimit();
+ projObj = buildProjectionForPushdown(deps, pipeline, allowExpressions);
plannerOpts |= QueryPlannerParams::RETURN_OWNED_DATA;
}