diff options
author | Ben Caimano <ben.caimano@10gen.com> | 2020-07-09 15:07:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-10 19:50:58 +0000 |
commit | 96cb86d41e4800b0f359a4d25dad2a6a94501e32 (patch) | |
tree | 0b76472d405cb7d06f308aa79243f60e0d139b08 | |
parent | 801fc9cb186ceb16abc2dd96ecc6f1749f1cde12 (diff) | |
download | mongo-96cb86d41e4800b0f359a4d25dad2a6a94501e32.tar.gz |
SERVER-33966 Removed Redundant Sort
-rw-r--r-- | jstests/sharding/query/agg_mongos_merge.js | 23 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 232 |
3 files changed, 264 insertions, 8 deletions
diff --git a/jstests/sharding/query/agg_mongos_merge.js b/jstests/sharding/query/agg_mongos_merge.js index f36e81b6005..e917fd2a810 100644 --- a/jstests/sharding/query/agg_mongos_merge.js +++ b/jstests/sharding/query/agg_mongos_merge.js @@ -9,6 +9,7 @@ * @tags: [ * requires_sharding, * requires_profiling, + * requires_fcv_46, * ] */ @@ -339,14 +340,6 @@ function runTestCasesWhoseMergeLocationDependsOnAllowDiskUse(allowDiskUse) { // All test cases should merge on mongoD if allowDiskUse is true, mongoS otherwise. const assertMergeOnMongoX = (allowDiskUse ? assertMergeOnMongoD : assertMergeOnMongoS); - // Test that a blocking $sort is only merged on mongoS if 'allowDiskUse' is not set. - assertMergeOnMongoX({ - testName: "agg_mongos_merge_blocking_sort_no_disk_use", - pipeline: [{$match: {_id: {$gte: -200, $lte: 200}}}, {$sort: {_id: -1}}, {$sort: {a: 1}}], - allowDiskUse: allowDiskUse, - expectedCount: 400 - }); - // Test that $group is only merged on mongoS if 'allowDiskUse' is not set. assertMergeOnMongoX({ testName: "agg_mongos_merge_group_allow_disk_use", @@ -356,6 +349,20 @@ function runTestCasesWhoseMergeLocationDependsOnAllowDiskUse(allowDiskUse) { expectedCount: 299 }); + // Adjacent $sort stages will be coalesced and merge sort will occur on anyShard when disk use + // is allowed, and on mongos otherwise. + assertMergeOnMongoX({ + testName: "agg_mongos_merge_blocking_sort_allow_disk_use", + pipeline: [ + {$match: {_id: {$gte: -200, $lte: 200}}}, + {$sort: {_id: 1}}, + {$_internalSplitPipeline: {}}, + {$sort: {a: 1}} + ], + allowDiskUse: allowDiskUse, + expectedCount: 400 + }); + // Test that a blocking $sample is only merged on mongoS if 'allowDiskUse' is not set. assertMergeOnMongoX({ testName: "agg_mongos_merge_blocking_sample_allow_disk_use", diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index dcf40ba110b..02290179439 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -169,6 +169,23 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt( _sortExecutor->setLimit(*limit); } + if (std::next(itr) == container->end()) { + return container->end(); + } + + if (auto nextSort = dynamic_cast<DocumentSourceSort*>((*std::next(itr)).get())) { + // If subsequent $sort stage exists, optimize by erasing the initial one. + // Since $sort is not guaranteed to be stable, we can blindly remove the first $sort. + auto thisLim = _sortExecutor->getLimit(); + auto otherLim = nextSort->_sortExecutor->getLimit(); + // When coalescing subsequent $sort stages, retain the existing/lower limit. + if (thisLim && (!otherLim || otherLim > thisLim)) { + nextSort->_sortExecutor->setLimit(thisLim); + } + Pipeline::SourceContainer::iterator ret = std::next(itr); + container->erase(itr); + return ret; + } return std::next(itr); } diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 43915889af4..239924ce42e 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -293,6 +293,238 @@ TEST(PipelineOptimizationTest, SortMatchProjSkipLimBecomesMatchTopKSortSkipProj) assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); } +TEST(PipelineOptimizationTest, IdenticalSortSortBecomesSort) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, IdenticalSortSortSortBecomesSort) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, NonIdenticalSortsOnlySortOnFinalKey) { + std::string inputPipe = + "[{$sort: {a: -1}}" + ",{$sort: {a: 1}}" + ",{$sort: {a: -1}}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: -1}}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: -1}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSortLimitBecomesFinalKeyTopKSort) { + std::string inputPipe = + "[{$sort: {a: -1}}" + ",{$sort: {a: 1}}" + ",{$limit: 5}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 5}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 5}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSortSkipLimitBecomesTopKSortSkip) { + std::string inputPipe = + "[{$sort: {b: 1}}" + ",{$sort: {a: 1}}" + ",{$skip : 3}" + ",{$limit: 5}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 8}}" + ",{$skip: 3}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 8}" + ",{$skip : 3}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortLimitSortLimitBecomesTopKSort) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$limit: 12}" + ",{$sort: {a: 1}}" + ",{$limit: 20}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 12}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 12}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortLimitSortRetainsLimit) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$limit: 12}" + ",{$sort: {a: 1}}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 12}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 12}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSortLimitRetainsLimit) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + ",{$limit: 20}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 20}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 20}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSortSortMatchProjSkipLimBecomesMatchTopKSortSkipProj) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + ",{$sort: {a: 1}}" + ",{$match: {a: 1}}" + ",{$project : {a: 1}}" + ",{$skip : 3}" + ",{$limit: 5}" + "]"; + + std::string outputPipe = + "[{$match: {a: {$eq: 1}}}" + ",{$sort: {sortKey: {a: 1}, limit: 8}}" + ",{$skip: 3}" + ",{$project: {_id: true, a: true}}" + "]"; + + std::string serializedPipe = + "[{$match: {a: 1}}" + ",{$sort: {a: 1}}" + ",{$limit: 8}" + ",{$skip : 3}" + ",{$project : {_id: true, a: true}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, NonIdenticalSortsBecomeFinalKeyTopKSort) { + std::string inputPipe = + "[{$sort: {a: -1}}" + ",{$sort: {b: -1}}" + ",{$sort: {b: 1}}" + ",{$sort: {a: 1}}" + ",{$limit: 7}" + ",{$project : {a: 1}}" + ",{$limit: 5}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: 1}, limit: 5}}" + ",{$project: {_id: true, a: true}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: 1}}" + ",{$limit: 5}" + ",{$project : {_id: true, a: true}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SubsequentSortsMergeAndBecomeTopKSortWithFinalKeyAndLowestLimit) { + std::string inputPipe = + "[{$sort: {a: 1}}" + ",{$sort: {a: -1}}" + ",{$limit: 8}" + ",{$limit: 7}" + ",{$project : {a: 1}}" + ",{$unwind: {path: '$a'}}" + "]"; + + std::string outputPipe = + "[{$sort: {sortKey: {a: -1}, limit: 7}}" + ",{$project: {_id: true, a: true}}" + ",{$unwind: {path: '$a'}}" + "]"; + + std::string serializedPipe = + "[{$sort: {a: -1}}" + ",{$limit: 7}" + ",{$project : {_id: true, a: true}}" + ",{$unwind: {path: '$a'}}" + "]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + TEST(PipelineOptimizationTest, RemoveSkipZero) { assertPipelineOptimizesTo("[{$skip: 0}]", "[]"); } |