summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Caimano <ben.caimano@10gen.com>2020-07-09 15:07:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-10 19:50:58 +0000
commit96cb86d41e4800b0f359a4d25dad2a6a94501e32 (patch)
tree0b76472d405cb7d06f308aa79243f60e0d139b08
parent801fc9cb186ceb16abc2dd96ecc6f1749f1cde12 (diff)
downloadmongo-96cb86d41e4800b0f359a4d25dad2a6a94501e32.tar.gz
SERVER-33966 Removed Redundant Sort
-rw-r--r--jstests/sharding/query/agg_mongos_merge.js23
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp17
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp232
3 files changed, 264 insertions, 8 deletions
diff --git a/jstests/sharding/query/agg_mongos_merge.js b/jstests/sharding/query/agg_mongos_merge.js
index f36e81b6005..e917fd2a810 100644
--- a/jstests/sharding/query/agg_mongos_merge.js
+++ b/jstests/sharding/query/agg_mongos_merge.js
@@ -9,6 +9,7 @@
* @tags: [
* requires_sharding,
* requires_profiling,
+ * requires_fcv_46,
* ]
*/
@@ -339,14 +340,6 @@ function runTestCasesWhoseMergeLocationDependsOnAllowDiskUse(allowDiskUse) {
// All test cases should merge on mongoD if allowDiskUse is true, mongoS otherwise.
const assertMergeOnMongoX = (allowDiskUse ? assertMergeOnMongoD : assertMergeOnMongoS);
- // Test that a blocking $sort is only merged on mongoS if 'allowDiskUse' is not set.
- assertMergeOnMongoX({
- testName: "agg_mongos_merge_blocking_sort_no_disk_use",
- pipeline: [{$match: {_id: {$gte: -200, $lte: 200}}}, {$sort: {_id: -1}}, {$sort: {a: 1}}],
- allowDiskUse: allowDiskUse,
- expectedCount: 400
- });
-
// Test that $group is only merged on mongoS if 'allowDiskUse' is not set.
assertMergeOnMongoX({
testName: "agg_mongos_merge_group_allow_disk_use",
@@ -356,6 +349,20 @@ function runTestCasesWhoseMergeLocationDependsOnAllowDiskUse(allowDiskUse) {
expectedCount: 299
});
+ // Adjacent $sort stages will be coalesced and merge sort will occur on anyShard when disk use
+ // is allowed, and on mongos otherwise.
+ assertMergeOnMongoX({
+ testName: "agg_mongos_merge_blocking_sort_allow_disk_use",
+ pipeline: [
+ {$match: {_id: {$gte: -200, $lte: 200}}},
+ {$sort: {_id: 1}},
+ {$_internalSplitPipeline: {}},
+ {$sort: {a: 1}}
+ ],
+ allowDiskUse: allowDiskUse,
+ expectedCount: 400
+ });
+
// Test that a blocking $sample is only merged on mongoS if 'allowDiskUse' is not set.
assertMergeOnMongoX({
testName: "agg_mongos_merge_blocking_sample_allow_disk_use",
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index dcf40ba110b..02290179439 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -169,6 +169,23 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt(
_sortExecutor->setLimit(*limit);
}
+ if (std::next(itr) == container->end()) {
+ return container->end();
+ }
+
+ if (auto nextSort = dynamic_cast<DocumentSourceSort*>((*std::next(itr)).get())) {
+ // If subsequent $sort stage exists, optimize by erasing the initial one.
+ // Since $sort is not guaranteed to be stable, we can blindly remove the first $sort.
+ auto thisLim = _sortExecutor->getLimit();
+ auto otherLim = nextSort->_sortExecutor->getLimit();
+ // When coalescing subsequent $sort stages, retain the existing/lower limit.
+ if (thisLim && (!otherLim || otherLim > thisLim)) {
+ nextSort->_sortExecutor->setLimit(thisLim);
+ }
+ Pipeline::SourceContainer::iterator ret = std::next(itr);
+ container->erase(itr);
+ return ret;
+ }
return std::next(itr);
}
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index 43915889af4..239924ce42e 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -293,6 +293,238 @@ TEST(PipelineOptimizationTest, SortMatchProjSkipLimBecomesMatchTopKSortSkipProj)
assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
}
+TEST(PipelineOptimizationTest, IdenticalSortSortBecomesSort) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, IdenticalSortSortSortBecomesSort) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, NonIdenticalSortsOnlySortOnFinalKey) {
+ std::string inputPipe =
+ "[{$sort: {a: -1}}"
+ ",{$sort: {a: 1}}"
+ ",{$sort: {a: -1}}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: -1}}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: -1}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSortLimitBecomesFinalKeyTopKSort) {
+ std::string inputPipe =
+ "[{$sort: {a: -1}}"
+ ",{$sort: {a: 1}}"
+ ",{$limit: 5}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 5}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 5}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSortSkipLimitBecomesTopKSortSkip) {
+ std::string inputPipe =
+ "[{$sort: {b: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$skip : 3}"
+ ",{$limit: 5}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 8}}"
+ ",{$skip: 3}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 8}"
+ ",{$skip : 3}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortLimitSortLimitBecomesTopKSort) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 12}"
+ ",{$sort: {a: 1}}"
+ ",{$limit: 20}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 12}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 12}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortLimitSortRetainsLimit) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 12}"
+ ",{$sort: {a: 1}}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 12}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 12}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSortLimitRetainsLimit) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$limit: 20}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 20}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 20}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSortSortMatchProjSkipLimBecomesMatchTopKSortSkipProj) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$match: {a: 1}}"
+ ",{$project : {a: 1}}"
+ ",{$skip : 3}"
+ ",{$limit: 5}"
+ "]";
+
+ std::string outputPipe =
+ "[{$match: {a: {$eq: 1}}}"
+ ",{$sort: {sortKey: {a: 1}, limit: 8}}"
+ ",{$skip: 3}"
+ ",{$project: {_id: true, a: true}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$match: {a: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$limit: 8}"
+ ",{$skip : 3}"
+ ",{$project : {_id: true, a: true}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, NonIdenticalSortsBecomeFinalKeyTopKSort) {
+ std::string inputPipe =
+ "[{$sort: {a: -1}}"
+ ",{$sort: {b: -1}}"
+ ",{$sort: {b: 1}}"
+ ",{$sort: {a: 1}}"
+ ",{$limit: 7}"
+ ",{$project : {a: 1}}"
+ ",{$limit: 5}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: 1}, limit: 5}}"
+ ",{$project: {_id: true, a: true}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: 1}}"
+ ",{$limit: 5}"
+ ",{$project : {_id: true, a: true}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SubsequentSortsMergeAndBecomeTopKSortWithFinalKeyAndLowestLimit) {
+ std::string inputPipe =
+ "[{$sort: {a: 1}}"
+ ",{$sort: {a: -1}}"
+ ",{$limit: 8}"
+ ",{$limit: 7}"
+ ",{$project : {a: 1}}"
+ ",{$unwind: {path: '$a'}}"
+ "]";
+
+ std::string outputPipe =
+ "[{$sort: {sortKey: {a: -1}, limit: 7}}"
+ ",{$project: {_id: true, a: true}}"
+ ",{$unwind: {path: '$a'}}"
+ "]";
+
+ std::string serializedPipe =
+ "[{$sort: {a: -1}}"
+ ",{$limit: 7}"
+ ",{$project : {_id: true, a: true}}"
+ ",{$unwind: {path: '$a'}}"
+ "]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
TEST(PipelineOptimizationTest, RemoveSkipZero) {
assertPipelineOptimizesTo("[{$skip: 0}]", "[]");
}