diff options
author | David Percy <david.percy@mongodb.com> | 2021-01-29 01:12:21 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-23 15:52:33 +0000 |
commit | 93e25299fbf8e0279cd855f4ce19becae230d2a1 (patch) | |
tree | 31d6b248add5e63f96dce60abc1f0479cf14ef82 | |
parent | 2950f483e8cfa0970d18ae873c3f0461f033fe31 (diff) | |
download | mongo-93e25299fbf8e0279cd855f4ce19becae230d2a1.tar.gz |
SERVER-54128 Don't push down expressions past sort+limit
-rw-r--r-- | jstests/aggregation/bugs/expression_swap_limit.js | 82 | ||||
-rw-r--r-- | jstests/aggregation/bugs/server6192_server6193.js | 67 | ||||
-rw-r--r-- | jstests/aggregation/optimize_away_pipeline.js | 41 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 39 |
4 files changed, 185 insertions, 44 deletions
diff --git a/jstests/aggregation/bugs/expression_swap_limit.js b/jstests/aggregation/bugs/expression_swap_limit.js new file mode 100644 index 00000000000..eef11dea51d --- /dev/null +++ b/jstests/aggregation/bugs/expression_swap_limit.js @@ -0,0 +1,82 @@ +/** + * Pushing down computed projection before sort+limit can cause the projection to be evaluated on + * invalid documents and throw exceptions. This test checks that we do not push computed projection + * before sort+limit, preventing valid queries from failing. + * + * This test was intended to reproduce a bug, SERVER-54128. + */ +(function() { + +const coll = db.expression_swap_limit; +coll.drop(); + +const NUM_INVALID_DOCS = 10; + +const docs = Array.from({length: NUM_INVALID_DOCS}, (_, i) => ({_id: i, a: ""})); +docs.push({_id: 99, a: 123}); +coll.insert(docs); + +coll.createIndex({some_other_field: 1}); + +const predicate = { + $or: [ + // One branch of the $or needs to have more than one relevant index. + {_id: {$lt: 9999}, some_other_field: {$ne: 3}}, + // The other branch doesn't matter: it's only here to prevent the $or being + // optimized out. + {this_predicate_matches_nothing: true}, + ] +}; +const sortSpec = { + _id: -1 +}; +const oppositeSortSpec = { + _id: +1 +}; +const projection = { + _id: 1, + b: {$round: "$a"}, +}; +const pipeline1 = [ + // We need a rooted $or to trigger the SubplanStage. + // Instead of one MultiPlanStage for the whole query, we get one + // per branch of the $or. + {$match: predicate}, + // Next we need a $sort+$limit. These two stages unambiguously select _id: 99. + // From the user's point of view, the $limit returns a bag of 1 document. + {$sort: sortSpec}, + {$limit: 1}, + // This projection raises an error if we evaluate it on any document other than + // _id: 99, because that is the only document where 'a' is numeric. + {$project: projection}, +]; +{ + // The pipeline should succeed. + const aggResult = coll.aggregate(pipeline1).toArray(); + assert.docEq(aggResult, [{_id: 99, b: 123}]); + + // The pipeline should succeed without pushing down to find. + const noOptResult = + coll.aggregate([{$_internalInhibitOptimization: {}}].concat(pipeline1)).toArray(); + assert.docEq(noOptResult, [{_id: 99, b: 123}]); +} + +// Similarly, we can select the 1 valid document by flipping the sort and skipping +// all but one document. +const pipeline2 = [ + {$match: predicate}, + {$sort: oppositeSortSpec}, + {$skip: NUM_INVALID_DOCS}, + {$project: projection}, +]; +{ + // The pipeline should succeed. + const aggResult = coll.aggregate(pipeline2).toArray(); + assert.docEq(aggResult, [{_id: 99, b: 123}]); + + // The pipeline should succeed without pushing down to find. + const noOptResult = + coll.aggregate([{$_internalInhibitOptimization: {}}].concat(pipeline2)).toArray(); + assert.docEq(noOptResult, [{_id: 99, b: 123}]); +} +})(); diff --git a/jstests/aggregation/bugs/server6192_server6193.js b/jstests/aggregation/bugs/server6192_server6193.js index 7a395f28262..30103dfd0a8 100644 --- a/jstests/aggregation/bugs/server6192_server6193.js +++ b/jstests/aggregation/bugs/server6192_server6193.js @@ -20,41 +20,56 @@ const t = db.jstests_aggregation_server6192; t.drop(); assert.commandWorked(t.insert({x: true})); -function assertOptimized(pipeline, v) { - const explained = t.runCommand("aggregate", {pipeline: pipeline, explain: true}); - const projectStage = getPlanStage(explained, "PROJECTION_DEFAULT"); - assert.eq(projectStage.transformBy.a["$const"], v, "ensure short-circuiting worked", explained); +function optimize(expression) { + const explained = t.explain().aggregate([ + // This inhibit optimization prevents the expression being pushed down into the .find() + // layer, to make assertions simpler. But it doesn't prevent the $project stage itself + // from being optimized. + {$_internalInhibitOptimization: {}}, + {$project: {result: expression}}, + ]); + + const stage = getAggPlanStage(explained, "$project"); + assert(stage, explained); + assert(stage.$project.result, explained); + return stage.$project.result; +} + +function assertOptimized(expression, value) { + const optimized = optimize(expression); + assert.docEq(optimized, {$const: value}, "ensure short-circuiting worked", optimized); } -function assertNotOptimized(pipeline) { - const explained = t.runCommand("aggregate", {pipeline: pipeline, explain: true}); - const projectStage = getPlanStage(explained, "PROJECTION_DEFAULT"); - assert(!("$const" in projectStage.transformBy.a), "ensure no short-circuiting"); +function assertNotOptimized(expression) { + const optimized = optimize(expression); + // 'optimized' may be simpler than 'expression', but we assert it did not optimize to a + // constant. + assert.neq(Object.keys(optimized), ['$const'], "ensure no short-circuiting", optimized); } // short-circuiting for $and -assertOptimized([{$project: {a: {$and: [0, '$x']}}}], false); -assertOptimized([{$project: {a: {$and: [0, 1, '$x']}}}], false); -assertOptimized([{$project: {a: {$and: [0, 1, '', '$x']}}}], false); +assertOptimized({$and: [0, '$x']}, false); +assertOptimized({$and: [0, 1, '$x']}, false); +assertOptimized({$and: [0, 1, '', '$x']}, false); -assertOptimized([{$project: {a: {$and: [1, 0, '$x']}}}], false); -assertOptimized([{$project: {a: {$and: [1, '', 0, '$x']}}}], false); -assertOptimized([{$project: {a: {$and: [1, 1, 0, 1]}}}], false); +assertOptimized({$and: [1, 0, '$x']}, false); +assertOptimized({$and: [1, '', 0, '$x']}, false); +assertOptimized({$and: [1, 1, 0, 1]}, false); // short-circuiting for $or -assertOptimized([{$project: {a: {$or: [1, '$x']}}}], true); -assertOptimized([{$project: {a: {$or: [1, 0, '$x']}}}], true); -assertOptimized([{$project: {a: {$or: [1, '', '$x']}}}], true); +assertOptimized({$or: [1, '$x']}, true); +assertOptimized({$or: [1, 0, '$x']}, true); +assertOptimized({$or: [1, '', '$x']}, true); -assertOptimized([{$project: {a: {$or: [0, 1, '$x']}}}], true); -assertOptimized([{$project: {a: {$or: ['', 0, 1, '$x']}}}], true); -assertOptimized([{$project: {a: {$or: [0, 0, 0, 1]}}}], true); +assertOptimized({$or: [0, 1, '$x']}, true); +assertOptimized({$or: ['', 0, 1, '$x']}, true); +assertOptimized({$or: [0, 0, 0, 1]}, true); // examples that should not short-circuit -assertNotOptimized([{$project: {a: {$and: [1, '$x']}}}]); -assertNotOptimized([{$project: {a: {$or: [0, '$x']}}}]); -assertNotOptimized([{$project: {a: {$and: ['$x', '$x']}}}]); -assertNotOptimized([{$project: {a: {$or: ['$x', '$x']}}}]); -assertNotOptimized([{$project: {a: {$and: ['$x']}}}]); -assertNotOptimized([{$project: {a: {$or: ['$x']}}}]); +assertNotOptimized({$and: [1, '$x']}); +assertNotOptimized({$or: [0, '$x']}); +assertNotOptimized({$and: ['$x', '$x']}); +assertNotOptimized({$or: ['$x', '$x']}); +assertNotOptimized({$and: ['$x']}); +assertNotOptimized({$or: ['$x']}); }()); diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js index 887923b13ce..c783698b1fe 100644 --- a/jstests/aggregation/optimize_away_pipeline.js +++ b/jstests/aggregation/optimize_away_pipeline.js @@ -17,7 +17,6 @@ (function() { "use strict"; -load("jstests/aggregation/extras/utils.js"); // For 'orderedArrayEq' and 'arrayEq'. load("jstests/concurrency/fsm_workload_helpers/server_types.js"); // For isWiredTiger. load("jstests/libs/analyze_plan.js"); // For 'aggPlanHasStage' and other explain helpers. load("jstests/libs/fixture_helpers.js"); // For 'isMongos' and 'isSharded'. @@ -75,8 +74,11 @@ function assertPipelineUsesAggregation({ if (expectedResult) { const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray(); - assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult) - : arrayEq(actualResult, expectedResult)); + if (preserveResultOrder) { + assert.docEq(actualResult, expectedResult); + } else { + assert.sameMembers(actualResult, expectedResult); + } } return explainOutput; @@ -116,8 +118,11 @@ function assertPipelineDoesNotUseAggregation({ if (expectedResult) { const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray(); - assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult) - : arrayEq(actualResult, expectedResult)); + if (preserveResultOrder) { + assert.docEq(actualResult, expectedResult); + } else { + assert.sameMembers(actualResult, expectedResult); + } } return explainOutput; @@ -128,7 +133,7 @@ function testGetMore({command = null, expectedResult = null} = {}) { const documents = new DBCommandCursor(db, assert.commandWorked(db.runCommand(command)), 1 /* batchsize */) .toArray(); - assert(arrayEq(documents, expectedResult)); + assert.sameMembers(documents, expectedResult); } let explainOutput; @@ -180,6 +185,19 @@ assertPipelineDoesNotUseAggregation({ expectedStages: ["IXSCAN"], expectedResult: [{x: 20}] }); +// However, when the $project is computed, pushing it down into the find() layer would sometimes +// have the effect of reordering it before the $sort and $limit. This can cause a valid query to +// throw an error, as in SERVER-54128. +assertPipelineUsesAggregation({ + pipeline: [ + {$match: {x: {$gte: 20}}}, + {$sort: {x: 1}}, + {$limit: 1}, + {$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}} + ], + expectedStages: ["IXSCAN"], + expectedResult: [{x: ""}] +}); assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.insert({_id: 4, x: 40, a: {b: "ab1"}})); @@ -571,10 +589,9 @@ if (!FixtureHelpers.isMongos(db) && isWiredTiger(db)) { }); db.setProfilingLevel(0); let profile = db.system.profile.find({}, {op: 1, ns: 1}).sort({ts: 1}).toArray(); - assert( - arrayEq(profile, - [{op: "command", ns: coll.getFullName()}, {op: "getmore", ns: coll.getFullName()}]), - profile); + assert.sameMembers( + profile, + [{op: "command", ns: coll.getFullName()}, {op: "getmore", ns: coll.getFullName()}]); // Test getMore puts a correct namespace into profile data for a view with an optimized away // pipeline. if (!FixtureHelpers.isSharded(coll)) { @@ -591,9 +608,9 @@ if (!FixtureHelpers.isMongos(db) && isWiredTiger(db)) { }); db.setProfilingLevel(0); profile = db.system.profile.find({}, {op: 1, ns: 1}).sort({ts: 1}).toArray(); - assert(arrayEq( + assert.sameMembers( profile, - [{op: "query", ns: view.getFullName()}, {op: "getmore", ns: view.getFullName()}])); + [{op: "query", ns: view.getFullName()}, {op: "getmore", ns: view.getFullName()}]); } } }()); diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 26382290319..afbe455fee4 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/query/projection_parser.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery #include "mongo/platform/basic.h" @@ -650,11 +651,18 @@ SkipThenLimit extractSkipAndLimitForPushdown(Pipeline* pipeline) { * 2. If there is no inclusion projection at the front of the pipeline, but there is a finite * dependency set, a projection representing this dependency set will be pushed down. * 3. Otherwise, an empty projection is returned and no projection push down will happen. + * + * If 'allowExpressions' is true, the returned projection may include expressions (which can only + * happen in case 1). If 'allowExpressions' is false and the projection we find has expressions, + * then we fall through to case 2 and attempt to push down a pure-inclusion projection based on its + * dependencies. */ -auto buildProjectionForPushdown(const DepsTracker& deps, Pipeline* pipeline) { +auto buildProjectionForPushdown(const DepsTracker& deps, + Pipeline* pipeline, + bool allowExpressions) { auto&& sources = pipeline->getSources(); - // Short-circuit if the pipeline is emtpy, there is no projection and nothing to push down. + // Short-circuit if the pipeline is empty: there is no projection and nothing to push down. if (sources.empty()) { return BSONObj(); } @@ -663,17 +671,24 @@ auto buildProjectionForPushdown(const DepsTracker& deps, Pipeline* pipeline) { exact_pointer_cast<DocumentSourceSingleDocumentTransformation*>(sources.front().get()); projStage) { if (projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection) { - // If there is an inclusion projection at the front of the pipeline, we have case 1. auto projObj = projStage->getTransformer().serializeTransformation(boost::none).toBson(); - sources.pop_front(); - return projObj; + auto projAst = projection_ast::parse(projStage->getContext(), + projObj, + ProjectionPolicies::aggregateProjectionPolicies()); + if (!projAst.hasExpressions() || allowExpressions) { + // If there is an inclusion projection at the front of the pipeline, we have case 1. + sources.pop_front(); + return projObj; + } } } // Depending of whether there is a finite dependency set, either return a projection // representing this dependency set, or an empty BSON, meaning no projection push down will // happen. This covers cases 2 and 3. + if (deps.getNeedsAnyMetadata()) + return BSONObj(); return deps.toProjectionWithoutMetadata(); } } // namespace @@ -902,7 +917,19 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep // Build a BSONObj representing a projection eligible for pushdown. If there is an inclusion // projection at the front of the pipeline, it will be removed and handled by the PlanStage // layer. If a projection cannot be pushed down, an empty BSONObj will be returned. - projObj = buildProjectionForPushdown(deps, pipeline); + + // In most cases .find() behaves as if it evaluates in a predictable order: + // predicate, sort, skip, limit, projection. + // But there is at least one case where it runs the projection before the sort/skip/limit: + // when the predicate has a rooted $or. (In that case we plan each branch of the $or + // separately, using Subplan, and include the projection on each branch.) + + // To work around this behavior, don't allow pushing down expressions if we are also going + // to push down a sort, skip or limit. We don't want the expressions to be evaluated on any + // documents that the sort/skip/limit would have filtered out. (The sort stage can be a + // top-k sort, which both sorts and limits.) + bool allowExpressions = !sortStage && !skipThenLimit.getSkip() && !skipThenLimit.getLimit(); + projObj = buildProjectionForPushdown(deps, pipeline, allowExpressions); plannerOpts |= QueryPlannerParams::RETURN_OWNED_DATA; } |