diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2020-03-23 17:07:28 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-24 02:05:39 +0000 |
commit | 30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8 (patch) | |
tree | fef13811af90ac2f9f4f6758d8edf82a82a2a288 | |
parent | 060ddbb225b21e7cb268f0299e5b34abe2e1feaa (diff) | |
download | mongo-30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8.tar.gz |
SERVER-46233 Relocate second call to optimizePipeline()
The second call to optimizePipeline() can sometimes catch
optimizations that were missed in the first call. The first call
simplifies filter expressions, opening up some optimization
opportunities that were previously blocked.
We want to catch these opportunities before we push down any stages
into the PlanExecutor.
-rw-r--r-- | jstests/aggregation/split_match_and_swap_with_sort.js | 67 | ||||
-rw-r--r-- | src/mongo/db/commands/run_aggregate.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 4 |
3 files changed, 71 insertions, 4 deletions
diff --git a/jstests/aggregation/split_match_and_swap_with_sort.js b/jstests/aggregation/split_match_and_swap_with_sort.js new file mode 100644 index 00000000000..1905c7be1b7 --- /dev/null +++ b/jstests/aggregation/split_match_and_swap_with_sort.js @@ -0,0 +1,67 @@ +// When a $sort is followed by a $match, the pipeline optimizer should be able to move the match in +// front of the $sort, so as to reduce the number of documents that need to be sorted. +// +// @tags: [ +// # Wrapping the pipeline in a $facet will prevent its $match from getting absorbed by the query +// # system, which is what we are testing for. +// do_not_wrap_aggregations_in_facets, +// # Likewise, splitting up the pipeline for sharding may also prevent the $match from getting +// # absorbed. +// assumes_unsharded_collection, +// # Don't disable the thing we are specifically testing for! +// requires_pipeline_optimization, +// ] +load('jstests/libs/analyze_plan.js'); + +(function() { +"use strict"; + +const coll = db.getSiblingDB("split_match_and_swap_with_sort")["test"]; +coll.drop(); + +assert.commandWorked( + coll.insert([{_id: 1, a: 1, b: 3}, {_id: 2, a: 2, b: 2}, {_id: 3, a: 3, b: 1}])); + +{ + const pipeline = [{$sort: {b: 1}}, {$match: {a: {$ne: 2}}}]; + + assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray()); + + const pipelineExplained = coll.explain().aggregate(pipeline); + const collScanStage = getPlanStage(pipelineExplained, "COLLSCAN"); + + // After moving the $match to the front of the pipeline, we expect the pipeline optimizer to + // push it down into the PlanExecutor, so that it becomes a filter on the COLLSCAN. + assert.neq(null, collScanStage, pipelineExplained); + assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage); +} + +{ + // Note that a $expr cannot be moved. However, the optimizer can split the non-$expr part of the + // $match and move that to be in front of the $sort. + const pipeline = + [{$sort: {b: 1}}, {$match: {$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}}]; + + assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray()); + + const pipelineExplained = coll.explain().aggregate(pipeline); + const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN"); + assert.neq(null, collScanStage, pipelineExplained); + assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage); +} + +{ + // SERVER-46233: Normally a $or at the root of a $match expression prevents it from getting + // split. However, When the $or has only one child, the optimizer should be able to eliminate + // the $or and then recognize that the simplified result _is_ eligible to be split. + const pipeline = + [{$sort: {b: 1}}, {$match: {$or: [{$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}]}}]; + + assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray()); + + const pipelineExplained = coll.explain().aggregate(pipeline); + const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN"); + assert.neq(null, collScanStage, pipelineExplained); + assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage); +} +}());
\ No newline at end of file diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 104c5da8cba..1a3dd2404c3 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -697,10 +697,6 @@ Status runAggregate(OperationContext* opCtx, std::move(attachExecutorCallback.second), pipeline.get()); - // Optimize again, since there may be additional optimizations that can be done after - // adding the initial cursor stage. - pipeline->optimizePipeline(); - auto pipelines = createExchangePipelinesIfNeeded(opCtx, expCtx, request, std::move(pipeline), uuid); for (auto&& pipelineIt : pipelines) { diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 0ce4c83f8eb..ef92f11d953 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -488,6 +488,10 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, const NamespaceString& nss, const AggregationRequest* aggRequest, Pipeline* pipeline) { + // Make a last effort to optimize pipeline stages before potentially detaching them to be pushed + // down into the query executor. + pipeline->optimizePipeline(); + Pipeline::SourceContainer& sources = pipeline->_sources; auto expCtx = pipeline->getContext(); |