SERVER-65050 Optimize time-series sorting with point query on metadata

author: David Percy <david.percy@mongodb.com> 2022-04-25 16:52:45 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-05-05 17:38:40 +0000
commit: 2442d4b0d3adade21eb32e688b95a6ba3d98d5f3 (patch)
tree: 21105f0f18839cd2c6ebe3b008fc368a387061a9 /src
parent: c23236c1b63f147f950d921a5411749e637d54ae (diff)
download: mongo-2442d4b0d3adade21eb32e688b95a6ba3d98d5f3.tar.gz
6 files changed, 187 insertions, 69 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index 83cca369086..06399c4e33b 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -300,8 +300,4 @@ std::unique_ptr<PlanStageStats> IndexScan::getStats() {
     return ret;
 }
 
-const SpecificStats* IndexScan::getSpecificStats() const {
-    return &_specificStats;
-}
-
 }  // namespace mongo
diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h
index c0cf75bf02d..ae4561108bb 100644
--- a/src/mongo/db/exec/index_scan.h
+++ b/src/mongo/db/exec/index_scan.h
@@ -127,7 +127,9 @@ public:
 
     std::unique_ptr<PlanStageStats> getStats() final;
 
-    const SpecificStats* getSpecificStats() const final;
+    const IndexScanStats* getSpecificStats() const final {
+        return &_specificStats;
+    }
 
     static const char* kStageType;
 
@@ -139,6 +141,10 @@ public:
         return _forward;
     }
 
+    const IndexBounds& getBounds() const {
+        return _bounds;
+    }
+
 protected:
     void doSaveStateRequiresIndex() final;
 
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index dbc96480c10..460b9c75095 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -910,6 +910,8 @@ boost::optional<std::pair<PipelineD::IndexSortOrderAgree, PipelineD::IndexOrdere
 PipelineD::supportsSort(const BucketUnpacker& bucketUnpacker,
                         PlanStage* root,
                         const SortPattern& sort) {
+    using SortPatternPart = SortPattern::SortPatternPart;
+
     if (!root)
         return boost::none;
 
@@ -931,72 +933,121 @@ PipelineD::supportsSort(const BucketUnpacker& bucketUnpacker,
         case STAGE_IXSCAN: {
             const IndexScan* scan = static_cast<IndexScan*>(root);
 
-            const auto keyPattern = scan->getKeyPattern();
-            const bool forward = scan->isForward();
+            // Scanning only part of an index means we don't see all the index keys for a
+            // document, which means the representative (first key we encounter, for a
+            // given document) will be different. For simplicity, just check whether the
+            // index is multikey. Mabye we could do better by looking at whether each field
+            // separately is multikey, or by allowing a full index scan.
+            if (scan->getSpecificStats()->isMultiKey)
+                return boost::none;
+
+            const auto& keyPattern = scan->getKeyPattern();
+
+            const auto& time = bucketUnpacker.getTimeField();
+            const auto& controlMinTime = bucketUnpacker.getMinField(time);
+            const auto& controlMaxTime = bucketUnpacker.getMaxField(time);
+
+            auto directionCompatible = [&](const BSONElement& keyPatternComponent,
+                                           const SortPatternPart& sortComponent) -> bool {
+                // The index component must not be special.
+                if (!keyPatternComponent.isNumber() || abs(keyPatternComponent.numberInt()) != 1)
+                    return false;
+                // Is the index (as it is stored) ascending or descending on this field?
+                const bool indexIsAscending = keyPatternComponent.numberInt() == 1;
+                // Does the index scan produce this field in ascending or descending order?
+                // For example: a backwards scan of a descending index produces ascending data.
+                const bool scanIsAscending = scan->isForward() == indexIsAscending;
+                return scanIsAscending == sortComponent.isAscending;
+            };
 
             // Return none if the keyPattern cannot support the sort.
-            // Note We add one to sort size to account for the compounding on min/max for the time
-            // field.
-            if ((sort.size() + 1 > (unsigned int)keyPattern.nFields()) || (sort.size() < 1)) {
-                return boost::none;
-            }
+            // Note We add one to sort size to account for the compounding on min/max for the
+            // time field.
+
+            // Compare the requested 'sort' against the index 'keyPattern' one field at a time.
+            // - If the leading fields are compatible, keep comparing.
+            // - If the leading field of the index has a point predicate, ignore it.
+            // - If we reach the end of the sort first, success!
+            // - if we find a field of the sort that the index can't satisfy, fail.
+
+            auto keyPatternIter = scan->getKeyPattern().begin();
+            auto sortIter = sort.begin();
+            for (;;) {
+                if (sortIter == sort.end()) {
+                    // We never found a 'time' field in the sort.
+                    return boost::none;
+                }
+                if (keyPatternIter == keyPattern.end()) {
+                    // There are still components of the sort, that the index key didn't satisfy.
+                    return boost::none;
+                }
+                if (!sortIter->fieldPath) {
+                    // We don't handle special $meta sort.
+                    return boost::none;
+                }
 
-            if (sort.size() == 1) {
-                auto part = sort[0];
+                // Does the leading sort field match the index?
 
-                // TOOD SERVER-65050: implement here.
+                if (sortAndKeyPatternPartAgreeAndOnMeta(bucketUnpacker,
+                                                        keyPatternIter->fieldNameStringData(),
+                                                        *sortIter->fieldPath)) {
+                    if (!directionCompatible(*keyPatternIter, *sortIter))
+                        return boost::none;
 
-                // Check the sort we're asking for is on time.
-                if (part.fieldPath && *part.fieldPath == bucketUnpacker.getTimeField()) {
-                    auto keyPatternIter = keyPattern.begin();
+                    // No conflict. Continue comparing the index vs the sort.
+                    ++keyPatternIter;
+                    ++sortIter;
+                    continue;
+                }
 
-                    return checkTimeHelper(
-                        bucketUnpacker, keyPatternIter, forward, *part.fieldPath, part.isAscending);
+                // Does this index field have a point predicate?
+                auto hasPointPredicate = [&](StringData fieldName) -> bool {
+                    for (auto&& field : scan->getBounds().fields) {
+                        if (field.name == fieldName)
+                            return field.isPoint();
+                    }
+                    return false;
+                };
+                if (hasPointPredicate(keyPatternIter->fieldNameStringData())) {
+                    ++keyPatternIter;
+                    continue;
                 }
-            } else if (sort.size() >= 2) {
-                size_t i = 0;
-
-                for (auto keyPatternIter = keyPattern.begin();
-                     i < sort.size() && keyPatternIter != keyPattern.end();
-                     (++keyPatternIter)) {
-                    auto part = sort[i];
-                    if (!(part.fieldPath))
-                        return boost::none;
 
-                    if (i < sort.size() - 1) {
-                        // Check the meta field index isn't special.
-                        if (!(*keyPatternIter).isNumber() ||
-                            abs((*keyPatternIter).numberInt()) != 1) {
-                            return boost::none;
-                        }
+                if ((*sortIter->fieldPath) == time) {
+                    // We require the 'time' field to be the last component of the sort.
+                    // (It's fine if the index has additional fields; we just ignore those.)
+                    if (std::next(sortIter) != sort.end())
+                        return boost::none;
 
-                        // True = ascending; false = descending.
-                        bool direction = ((*keyPatternIter).numberInt() == 1);
-                        direction = (forward) ? direction : !direction;
+                    // Now any of the following index fields can satisfy a sort on time:
+                    // - control.min.time
+                    // - control.max.time
+                    // - _id  (like control.min.time but may break ties)
+                    // as long as the direction matches.
+                    // However, it's not possible for users to index the bucket _id (unless they
+                    // bypass the view), so don't bother optimizing that case.
+                    auto&& ixField = keyPatternIter->fieldNameStringData();
+                    if (ixField != controlMinTime && ixField != controlMaxTime)
+                        return boost::none;
 
-                        // Return false if partOne and the first keyPattern part don't agree.
-                        if (!sortAndKeyPatternPartAgreeAndOnMeta(
-                                bucketUnpacker, (*keyPatternIter).fieldName(), *part.fieldPath) ||
-                            part.isAscending != direction)
-                            return boost::none;
-                    } else {
-                        if (!part.fieldPath ||
-                            !(*part.fieldPath == bucketUnpacker.getTimeField())) {
-                            return boost::none;
-                        }
+                    if (!directionCompatible(*keyPatternIter, *sortIter))
+                        return boost::none;
 
-                        return checkTimeHelper(bucketUnpacker,
-                                               keyPatternIter,
-                                               forward,
-                                               *part.fieldPath,
-                                               part.isAscending);
-                    }
+                    // Success! Every field of the sort can be satisfied by a field of the index.
 
-                    // Increment index
-                    ++i;
+                    // Now the caller wants to know:
+                    // 1. Does the field in the index agree with the scan direction?
+                    //    An index on 'control.min.time' or '_id' is better for ascending.
+                    //    An index on 'control.max.time' is better for descending.
+                    // 2. Which field was first? min or max (treating _id the same as min).
+                    const bool isMinFirst = keyPatternIter->fieldNameStringData() != controlMaxTime;
+                    const bool indexOrderAgree = isMinFirst == sortIter->isAscending;
+                    return {{indexOrderAgree, isMinFirst}};
                 }
+
+                // This index field can't satisfy this sort field.
+                return boost::none;
             }
-            return boost::none;
         }
         default:
             return boost::none;
@@ -1109,6 +1160,7 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                 // Scan the pipeline to check if it's compatible with the  optimization.
                 bool badStage = false;
                 bool seenSort = false;
+                bool seenUnpack = false;
                 std::list<boost::intrusive_ptr<DocumentSource>>::iterator iter =
                     pipeline->_sources.begin();
                 std::list<boost::intrusive_ptr<DocumentSource>>::iterator unpackIter =
@@ -1118,26 +1170,78 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                         seenSort = true;
                     } else if (dynamic_cast<const DocumentSourceMatch*>(iter->get())) {
                         // do nothing
-                    } else if (dynamic_cast<const DocumentSourceInternalUnpackBucket*>(
-                                   iter->get())) {
+                    } else if (const auto* unpack =
+                                   dynamic_cast<const DocumentSourceInternalUnpackBucket*>(
+                                       iter->get())) {
                         unpackIter = iter;
+                        uassert(6505001,
+                                str::stream()
+                                    << "Expected at most one "
+                                    << DocumentSourceInternalUnpackBucket::kStageNameInternal
+                                    << " stage in the pipeline",
+                                !seenUnpack);
+                        seenUnpack = true;
+
+                        // Check that the time field is preserved.
+                        if (!unpack->includeTimeField())
+                            badStage = true;
+
+                        // If the sort is compound, check that the entire meta field is preserved.
+                        if (sortPattern.size() > 1) {
+                            // - Is there a meta field?
+                            // - Will it be unpacked?
+                            // - Will it be overwritten by 'computedMetaProjFields'?
+                            auto&& unpacker = unpack->bucketUnpacker();
+                            const boost::optional<std::string>& metaField = unpacker.getMetaField();
+                            if (!metaField || !unpack->includeMetaField() ||
+                                unpacker.bucketSpec().fieldIsComputed(*metaField)) {
+                                badStage = true;
+                            }
+                        }
                     } else if (auto projection =
                                    dynamic_cast<const DocumentSourceSingleDocumentTransformation*>(
                                        iter->get())) {
                         auto modPaths = projection->getModifiedPaths();
 
                         // Check to see if the sort paths are modified.
-                        for (auto sortIter = sortPattern.begin();
-                             !badStage && sortIter != sortPattern.end();
-                             ++sortIter) {
-
-                            auto fieldPath = sortIter->fieldPath;
-                            // If they are then escap the loop & don't optimize.
-                            if (!fieldPath || modPaths.canModify(*fieldPath)) {
-                                badStage = true;
+                        if (seenUnpack) {
+                            // This stage operates on events: check the event-level field names.
+                            for (auto sortIter = sortPattern.begin();
+                                 !badStage && sortIter != sortPattern.end();
+                                 ++sortIter) {
+
+                                auto fieldPath = sortIter->fieldPath;
+                                // If they are then escape the loop & don't optimize.
+                                if (!fieldPath || modPaths.canModify(*fieldPath)) {
+                                    badStage = true;
+                                }
+                            }
+                        } else {
+                            // This stage operates on buckets: check the bucket-level field names.
+
+                            // The time field maps to control.min.[time], control.max.[time], or
+                            // _id, and $_internalUnpackBucket assumes that all of those fields are
+                            // preserved. (We never push down a stage that would overwrite them.)
+
+                            // Each field [meta].a.b.c maps to 'meta.a.b.c'.
+                            auto rename = [&](const FieldPath& eventField) -> FieldPath {
+                                if (eventField.getPathLength() == 1)
+                                    return timeseries::kBucketMetaFieldName;
+                                return FieldPath{timeseries::kBucketMetaFieldName}.concat(
+                                    eventField.tail());
+                            };
+
+                            for (auto sortIter = sortPattern.begin(),
+                                      // Skip the last field, which is time: only check the meta
+                                      // fields.
+                                 end = std::prev(sortPattern.end());
+                                 !badStage && sortIter != end;
+                                 ++sortIter) {
+                                auto bucketFieldPath = rename(*sortIter->fieldPath);
+                                if (modPaths.canModify(bucketFieldPath))
+                                    badStage = true;
                             }
                         }
-
                     } else {
                         badStage = true;
                     }
diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h
index 4a88600b114..663f1126e90 100644
--- a/src/mongo/db/pipeline/pipeline_d.h
+++ b/src/mongo/db/pipeline/pipeline_d.h
@@ -306,7 +306,7 @@ Unpacking with Sort Optimization.
     }
 
     static bool sortAndKeyPatternPartAgreeAndOnMeta(const BucketUnpacker& bucketUnpacker,
-                                                    const char* keyPatternFieldName,
+                                                    StringData keyPatternFieldName,
                                                     const FieldPath& sortFieldPath) {
         FieldPath keyPatternFieldPath = FieldPath(keyPatternFieldName);
 
diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp
index 39bf4f67737..ecb1f21208b 100644
--- a/src/mongo/db/query/index_bounds.cpp
+++ b/src/mongo/db/query/index_bounds.cpp
@@ -244,6 +244,10 @@ bool OrderedIntervalList::isMinToMax() const {
     return intervals.size() == 1 && intervals[0].isMinToMax();
 }
 
+bool OrderedIntervalList::isPoint() const {
+    return intervals.size() == 1 && intervals[0].isPoint();
+}
+
 // static
 void OrderedIntervalList::complement() {
     BSONObjBuilder minBob;
diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h
index f9838065106..d828f275164 100644
--- a/src/mongo/db/query/index_bounds.h
+++ b/src/mongo/db/query/index_bounds.h
@@ -93,6 +93,14 @@ struct OrderedIntervalList {
      * Returns true if this OIL represents a single [MinKey, MaxKey] bound.
      */
     bool isMinToMax() const;
+
+    /**
+     * Returns true if this OIL represents a point predicate: [N, N].
+     *
+     * These predicates are interesting because if you have an index on {a:1, b:1},
+     * and a point predicate on 'a', then the index provides a sort on {b: 1}.
+     */
+    bool isPoint() const;
 };
 
 /**
author	David Percy <david.percy@mongodb.com>	2022-04-25 16:52:45 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-05-05 17:38:40 +0000
commit	2442d4b0d3adade21eb32e688b95a6ba3d98d5f3 (patch)
tree	21105f0f18839cd2c6ebe3b008fc368a387061a9 /src
parent	c23236c1b63f147f950d921a5411749e637d54ae (diff)
download	mongo-2442d4b0d3adade21eb32e688b95a6ba3d98d5f3.tar.gz