diff options
author | Eric Cox <eric.cox@mongodb.com> | 2020-10-23 15:06:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-23 21:57:50 +0000 |
commit | 5f66c3fa2af530e5f9242758eef87846d564fe96 (patch) | |
tree | 5b5a08f1e4e9a37ddd5dc9e0b1bb363fd19492a8 | |
parent | 9b2ee74bf481010096207871e20e22e3f0584e0e (diff) | |
download | mongo-5f66c3fa2af530e5f9242758eef87846d564fe96.tar.gz |
SERVER-50897 Fix Sort/Limit/Sort optimization
(cherry picked from commit d54f13fe76a8a9be411966ecdbf840c520d43833)
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 224 | ||||
-rw-r--r-- | src/mongo/db/query/sort_pattern.h | 9 |
3 files changed, 36 insertions, 233 deletions
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 02290179439..61a37f56134 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -35,6 +35,7 @@ #include "mongo/base/exact_cast.h" #include "mongo/db/exec/document_value/document.h" +#include "mongo/db/exec/document_value/document_comparator.h" #include "mongo/db/exec/document_value/value.h" #include "mongo/db/jsobj.h" #include "mongo/db/pipeline/document_source_skip.h" @@ -169,24 +170,33 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt( _sortExecutor->setLimit(*limit); } - if (std::next(itr) == container->end()) { + auto nextStage = std::next(itr); + if (nextStage == container->end()) { return container->end(); } - if (auto nextSort = dynamic_cast<DocumentSourceSort*>((*std::next(itr)).get())) { - // If subsequent $sort stage exists, optimize by erasing the initial one. - // Since $sort is not guaranteed to be stable, we can blindly remove the first $sort. - auto thisLim = _sortExecutor->getLimit(); - auto otherLim = nextSort->_sortExecutor->getLimit(); - // When coalescing subsequent $sort stages, retain the existing/lower limit. - if (thisLim && (!otherLim || otherLim > thisLim)) { - nextSort->_sortExecutor->setLimit(thisLim); - } - Pipeline::SourceContainer::iterator ret = std::next(itr); + limit = getLimit(); + + // Since $sort is not guaranteed to be stable, we can blindly remove the first $sort only when + // there's no limit on the current sort. + auto nextSort = dynamic_cast<DocumentSourceSort*>((*nextStage).get()); + if (!limit && nextSort) { container->erase(itr); - return ret; + return nextStage; + } + + if (limit && nextSort) { + // If there's a limit between two adjacent sorts with the same key pattern it's safe to + // merge the two sorts and take the minimum of the limits. + if (dynamic_cast<DocumentSourceSort*>((*itr).get())->getSortKeyPattern() == + nextSort->getSortKeyPattern()) { + // When coalescing subsequent $sort stages, the existing/lower limit is retained in + // 'setLimit'. + nextSort->_sortExecutor->setLimit(*limit); + container->erase(itr); + } } - return std::next(itr); + return nextStage; } DepsTracker::State DocumentSourceSort::getDependencies(DepsTracker* deps) const { diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 7ddeeb49a16..2fc38946417 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -293,235 +293,19 @@ TEST(PipelineOptimizationTest, SortMatchProjSkipLimBecomesMatchTopKSortSkipProj) assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); } -TEST(PipelineOptimizationTest, IdenticalSortSortBecomesSort) { +TEST(PipelineOptimizationTest, SortLimitSortWithDifferentSortPatterns) { std::string inputPipe = "[{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, IdenticalSortSortSortBecomesSort) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, NonIdenticalSortsOnlySortOnFinalKey) { - std::string inputPipe = - "[{$sort: {a: -1}}" - ",{$sort: {a: 1}}" - ",{$sort: {a: -1}}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: -1}}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: -1}}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortSortLimitBecomesFinalKeyTopKSort) { - std::string inputPipe = - "[{$sort: {a: -1}}" - ",{$sort: {a: 1}}" - ",{$limit: 5}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 5}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - ",{$limit: 5}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortSortSkipLimitBecomesTopKSortSkip) { - std::string inputPipe = - "[{$sort: {b: 1}}" - ",{$sort: {a: 1}}" - ",{$skip : 3}" - ",{$limit: 5}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 8}}" - ",{$skip: 3}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - ",{$limit: 8}" - ",{$skip : 3}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortLimitSortLimitBecomesTopKSort) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$limit: 12}" - ",{$sort: {a: 1}}" - ",{$limit: 20}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 12}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" ",{$limit: 12}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortLimitSortRetainsLimit) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$limit: 12}" - ",{$sort: {a: 1}}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 12}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - ",{$limit: 12}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortSortLimitRetainsLimit) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - ",{$limit: 20}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 20}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - ",{$limit: 20}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SortSortSortMatchProjSkipLimBecomesMatchTopKSortSkipProj) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - ",{$sort: {a: 1}}" - ",{$match: {a: 1}}" - ",{$project : {a: 1}}" - ",{$skip : 3}" - ",{$limit: 5}" - "]"; - - std::string outputPipe = - "[{$match: {a: {$eq: 1}}}" - ",{$sort: {sortKey: {a: 1}, limit: 8}}" - ",{$skip: 3}" - ",{$project: {_id: true, a: true}}" - "]"; - - std::string serializedPipe = - "[{$match: {a: 1}}" - ",{$sort: {a: 1}}" - ",{$limit: 8}" - ",{$skip : 3}" - ",{$project : {_id: true, a: true}}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, NonIdenticalSortsBecomeFinalKeyTopKSort) { - std::string inputPipe = - "[{$sort: {a: -1}}" - ",{$sort: {b: -1}}" ",{$sort: {b: 1}}" - ",{$sort: {a: 1}}" - ",{$limit: 7}" - ",{$project : {a: 1}}" - ",{$limit: 5}" - "]"; - - std::string outputPipe = - "[{$sort: {sortKey: {a: 1}, limit: 5}}" - ",{$project: {_id: true, a: true}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: 1}}" - ",{$limit: 5}" - ",{$project : {_id: true, a: true}}" - "]"; - - assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); -} - -TEST(PipelineOptimizationTest, SubsequentSortsMergeAndBecomeTopKSortWithFinalKeyAndLowestLimit) { - std::string inputPipe = - "[{$sort: {a: 1}}" - ",{$sort: {a: -1}}" - ",{$limit: 8}" - ",{$limit: 7}" - ",{$project : {a: 1}}" - ",{$unwind: {path: '$a'}}" "]"; std::string outputPipe = - "[{$sort: {sortKey: {a: -1}, limit: 7}}" - ",{$project: {_id: true, a: true}}" - ",{$unwind: {path: '$a'}}" - "]"; - - std::string serializedPipe = - "[{$sort: {a: -1}}" - ",{$limit: 7}" - ",{$project : {_id: true, a: true}}" - ",{$unwind: {path: '$a'}}" + "[{$sort: {sortKey: {a: 1}, limit: 12}}" + ",{$sort: {sortKey: {b: 1}}}" "]"; + std::string serializedPipe = inputPipe; assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); } diff --git a/src/mongo/db/query/sort_pattern.h b/src/mongo/db/query/sort_pattern.h index 6c7103a5dfd..61fe2d2fa77 100644 --- a/src/mongo/db/query/sort_pattern.h +++ b/src/mongo/db/query/sort_pattern.h @@ -50,6 +50,11 @@ public: bool isAscending = true; boost::optional<FieldPath> fieldPath; boost::intrusive_ptr<ExpressionMeta> expression; + + bool operator==(const SortPatternPart& other) const { + return isAscending == other.isAscending && fieldPath == other.fieldPath && + expression == other.expression; + } }; SortPattern(const BSONObj&, const boost::intrusive_ptr<ExpressionContext>&); @@ -88,6 +93,10 @@ public: return _sortPattern[idx]; } + bool operator==(const SortPattern& other) const { + return _sortPattern == other._sortPattern && _paths == other._paths; + } + std::vector<SortPatternPart>::const_iterator begin() const { return _sortPattern.cbegin(); } |