summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/document_source_unwind.cpp
diff options
context:
space:
mode:
authorMatt Boros <matt.boros@mongodb.com>2021-10-25 21:40:15 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-27 18:40:40 +0000
commitc034afdd3d2513adb8750263459f0a67302106e4 (patch)
tree7794ad01a9cd94402058b9007b4c45199316007b /src/mongo/db/pipeline/document_source_unwind.cpp
parent706df8b530794bd6326d6127baa509af43fc558a (diff)
downloadmongo-c034afdd3d2513adb8750263459f0a67302106e4.tar.gz
SERVER-27598 Added logic to allow limit to duplicate and swap before unwind. Altered unwind and sort logic to check for top-k sort.
Diffstat (limited to 'src/mongo/db/pipeline/document_source_unwind.cpp')
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp64
1 files changed, 49 insertions, 15 deletions
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index 4dfa047bbb1..daf7adcd5ed 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/db/pipeline/document_source_limit.h"
#include "mongo/platform/basic.h"
#include "mongo/db/pipeline/document_source_unwind.h"
@@ -226,21 +227,15 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const
return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}};
}
-Pipeline::SourceContainer::iterator DocumentSourceUnwind::doOptimizeAt(
- Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
- tassert(5482200, "DocumentSourceUnwind: itr must point to this object", *itr == this);
-
- if (std::next(itr) == container->end()) {
- return container->end();
- }
-
- // If the following stage is $sort (on a different field), push before $unwind.
- auto nextSort = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get());
- if (nextSort) {
+bool DocumentSourceUnwind::canPushSortBack(const DocumentSourceSort* sort) const {
+ // If the sort has a limit, we should also check that _preserveNullAndEmptyArrays is true,
+ // otherwise when we swap the limit and unwind, we could end up providing fewer results to the
+ // user than expected.
+ if (!sort->hasLimit() || _preserveNullAndEmptyArrays) {
auto unwindPath = _unwindPath.fullPath();
// Checks if any of the $sort's paths depend on the unwind path (or vice versa).
- SortPattern sortKeyPattern = nextSort->getSortKeyPattern();
+ SortPattern sortKeyPattern = sort->getSortKeyPattern();
bool sortPathMatchesUnwindPath =
std::any_of(sortKeyPattern.begin(), sortKeyPattern.end(), [&](auto& sortKey) {
// If 'sortKey' is a $meta expression, we can do the swap.
@@ -249,11 +244,50 @@ Pipeline::SourceContainer::iterator DocumentSourceUnwind::doOptimizeAt(
return expression::bidirectionalPathPrefixOf(unwindPath,
sortKey.fieldPath->fullPath());
});
+ return !sortPathMatchesUnwindPath;
+ }
+ return false;
+}
+
+bool DocumentSourceUnwind::canPushLimitBack(const DocumentSourceLimit* limit) const {
+ // If _smallestLimitPushedDown is boost::none, then we have not yet pushed a limit down. So no
+ // matter what the limit is, we should duplicate and push down. Otherwise we should only push
+ // the limit down if it is smaller than the smallest limit we have pushed down so far.
+ return !_smallestLimitPushedDown || limit->getLimit() < _smallestLimitPushedDown.get();
+}
+
+Pipeline::SourceContainer::iterator DocumentSourceUnwind::doOptimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ tassert(5482200, "DocumentSourceUnwind: itr must point to this object", *itr == this);
+
+ if (std::next(itr) == container->end()) {
+ return container->end();
+ }
- if (!sortPathMatchesUnwindPath) {
- std::swap(*itr, *std::next(itr));
- return itr == container->begin() ? itr : std::prev(itr);
+ // If the following stage is $sort (on a different field), push before $unwind.
+ auto next = std::next(itr);
+ auto nextSort = dynamic_cast<DocumentSourceSort*>(next->get());
+ if (nextSort && canPushSortBack(nextSort)) {
+ // If this sort is a top-k sort, we should add a limit after the unwind so that we preserve
+ // behavior and not provide more results than requested.
+ if (nextSort->hasLimit()) {
+ container->insert(
+ std::next(next),
+ DocumentSourceLimit::create(nextSort->getContext(), nextSort->getLimit().get()));
}
+ std::swap(*itr, *next);
+ return itr == container->begin() ? itr : std::prev(itr);
+ }
+
+ // If _preserveNullAndEmptyArrays is true, and unwind is followed by a limit, we can add a
+ // duplicate limit before the unwind to prevent sources further down the pipeline from giving us
+ // more than we need.
+ auto nextLimit = dynamic_cast<DocumentSourceLimit*>(next->get());
+ if (nextLimit && _preserveNullAndEmptyArrays && canPushLimitBack(nextLimit)) {
+ _smallestLimitPushedDown = nextLimit->getLimit();
+ auto newStageItr = container->insert(
+ itr, DocumentSourceLimit::create(nextLimit->getContext(), nextLimit->getLimit()));
+ return newStageItr == container->begin() ? newStageItr : std::prev(newStageItr);
}
return std::next(itr);