SERVER-46233 Relocate second call to optimizePipeline()

The second call to optimizePipeline() can sometimes catch optimizations that were missed in the first call. The first call simplifies filter expressions, opening up some optimization opportunities that were previously blocked. We want to catch these opportunities before we push down any stages into the PlanExecutor.
author: Justin Seyster <justin.seyster@mongodb.com> 2020-03-23 17:07:28 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-03-24 02:05:39 +0000
commit: 30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8 (patch)
tree: fef13811af90ac2f9f4f6758d8edf82a82a2a288
parent: 060ddbb225b21e7cb268f0299e5b34abe2e1feaa (diff)
download: mongo-30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8.tar.gz
3 files changed, 71 insertions, 4 deletions
diff --git a/jstests/aggregation/split_match_and_swap_with_sort.js b/jstests/aggregation/split_match_and_swap_with_sort.js
new file mode 100644
index 00000000000..1905c7be1b7
--- /dev/null
+++ b/jstests/aggregation/split_match_and_swap_with_sort.js
@@ -0,0 +1,67 @@
+// When a $sort is followed by a $match, the pipeline optimizer should be able to move the match in
+// front of the $sort, so as to reduce the number of documents that need to be sorted.
+//
+// @tags: [
+//   # Wrapping the pipeline in a $facet will prevent its $match from getting absorbed by the query
+//   # system, which is what we are testing for.
+//   do_not_wrap_aggregations_in_facets,
+//   # Likewise, splitting up the pipeline for sharding may also prevent the $match from getting
+//   # absorbed.
+//   assumes_unsharded_collection,
+//   # Don't disable the thing we are specifically testing for!
+//   requires_pipeline_optimization,
+// ]
+load('jstests/libs/analyze_plan.js');
+
+(function() {
+"use strict";
+
+const coll = db.getSiblingDB("split_match_and_swap_with_sort")["test"];
+coll.drop();
+
+assert.commandWorked(
+    coll.insert([{_id: 1, a: 1, b: 3}, {_id: 2, a: 2, b: 2}, {_id: 3, a: 3, b: 1}]));
+
+{
+    const pipeline = [{$sort: {b: 1}}, {$match: {a: {$ne: 2}}}];
+
+    assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray());
+
+    const pipelineExplained = coll.explain().aggregate(pipeline);
+    const collScanStage = getPlanStage(pipelineExplained, "COLLSCAN");
+
+    // After moving the $match to the front of the pipeline, we expect the pipeline optimizer to
+    // push it down into the PlanExecutor, so that it becomes a filter on the COLLSCAN.
+    assert.neq(null, collScanStage, pipelineExplained);
+    assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
+}
+
+{
+    // Note that a $expr cannot be moved. However, the optimizer can split the non-$expr part of the
+    // $match and move that to be in front of the $sort.
+    const pipeline =
+        [{$sort: {b: 1}}, {$match: {$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}}];
+
+    assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray());
+
+    const pipelineExplained = coll.explain().aggregate(pipeline);
+    const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
+    assert.neq(null, collScanStage, pipelineExplained);
+    assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
+}
+
+{
+    // SERVER-46233: Normally a $or at the root of a $match expression prevents it from getting
+    // split. However, When the $or has only one child, the optimizer should be able to eliminate
+    // the $or and then recognize that the simplified result _is_ eligible to be split.
+    const pipeline =
+        [{$sort: {b: 1}}, {$match: {$or: [{$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}]}}];
+
+    assert.eq([{_id: 3, a: 3, b: 1}, {_id: 1, a: 1, b: 3}], coll.aggregate(pipeline).toArray());
+
+    const pipelineExplained = coll.explain().aggregate(pipeline);
+    const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
+    assert.neq(null, collScanStage, pipelineExplained);
+    assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
+}
+}());
+\ No newline at end of file
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 104c5da8cba..1a3dd2404c3 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -697,10 +697,6 @@ Status runAggregate(OperationContext* opCtx,
                                                           std::move(attachExecutorCallback.second),
                                                           pipeline.get());
 
-            // Optimize again, since there may be additional optimizations that can be done after
-            // adding the initial cursor stage.
-            pipeline->optimizePipeline();
-
             auto pipelines =
                 createExchangePipelinesIfNeeded(opCtx, expCtx, request, std::move(pipeline), uuid);
             for (auto&& pipelineIt : pipelines) {
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 0ce4c83f8eb..ef92f11d953 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -488,6 +488,10 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection,
                                           const NamespaceString& nss,
                                           const AggregationRequest* aggRequest,
                                           Pipeline* pipeline) {
+    // Make a last effort to optimize pipeline stages before potentially detaching them to be pushed
+    // down into the query executor.
+    pipeline->optimizePipeline();
+
     Pipeline::SourceContainer& sources = pipeline->_sources;
     auto expCtx = pipeline->getContext();
author	Justin Seyster <justin.seyster@mongodb.com>	2020-03-23 17:07:28 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-03-24 02:05:39 +0000
commit	30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8 (patch)
tree	fef13811af90ac2f9f4f6758d8edf82a82a2a288
parent	060ddbb225b21e7cb268f0299e5b34abe2e1feaa (diff)
download	mongo-30bd61a92cd4c8d6dd5588b59b9bdf4a0cd3c7d8.tar.gz