SERVER-33005 Fix planner to avoid incorrect OR pushdown through $elemMatch.

The PlanEnumerator now tracks pushdown routes which descend through an $elemMatch object. These routes are pruned when subsequently descending through an OR.
author: David Storch <david.storch@10gen.com> 2018-02-08 14:55:41 -0500
committer: David Storch <david.storch@10gen.com> 2018-02-09 17:04:04 -0500
commit: 17b4094c4d781ffd486b27869f46eea706e490af (patch)
tree: ad5d0bcfa1293ccf13c74f8d2c611fa6f498ba9d /src/mongo
parent: d6ad188cd77fb4fcd155cf2c9165fad9e6f2c589 (diff)
download: mongo-17b4094c4d781ffd486b27869f46eea706e490af.tar.gz
3 files changed, 230 insertions, 24 deletions
diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp
index 7016d57b043..04bc0d93ec8 100644
--- a/src/mongo/db/query/plan_enumerator.cpp
+++ b/src/mongo/db/query/plan_enumerator.cpp
@@ -396,8 +396,24 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
 
             // Extend the path through the indexed ORs of each outside predicate.
             auto childContextCopy = childContext;
-            for (auto& pred : childContextCopy.outsidePreds) {
-                pred.second.push_back(i);
+            for (auto it = childContextCopy.outsidePreds.begin();
+                 it != childContextCopy.outsidePreds.end();) {
+                // If the route has already traversed through an $elemMatch object, then we cannot
+                // push down through this OR. Here we remove such routes from our context object.
+                //
+                // For example, suppose we have index {a: 1, "b.c": 1} and the following query:
+                //
+                //   {a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}
+                //
+                // It is not correct to push the 'a' predicate down such that it is a sibling of
+                // either of the predicates on 'c', since this would change the predicate's meaning
+                // from a==1 to "b.a"==1.
+                if (it->second.traversedThroughElemMatchObj) {
+                    it = childContextCopy.outsidePreds.erase(it);
+                } else {
+                    it->second.route.push_back(i);
+                    ++it;
+                }
             }
 
             if (!prepMemo(node->getChild(i), childContextCopy)) {
@@ -423,6 +439,7 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
 
         if (MatchExpression::ELEM_MATCH_OBJECT == node->matchType()) {
             childContext.elemMatchExpr = node;
+            markTraversedThroughElemMatchObj(&childContext);
         }
 
         // For an OR to be indexed, all its children must be indexed.
@@ -462,23 +479,22 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
         // (e.g. an OR which contains a TEXT child).
         vector<MemoID> mandatorySubnodes;
 
-        // A list of predicates contained in the subtree rooted at 'node'
-        // obtained by traversing deeply through $and and $elemMatch children.
-        vector<MatchExpression*> indexedPreds;
+        // A list of predicates contained in the subtree rooted at 'node' obtained by traversing
+        // deeply through $and and $elemMatch children.
+        std::vector<MatchExpression*> indexedPreds;
 
-        // Partition the childen into the children that aren't predicates which may or may
-        // not be indexed ('subnodes'), children that aren't predicates which must use the
-        // index ('mandatorySubnodes'). and children that are predicates ('indexedPreds').
+        // Partition the childen into the children that aren't predicates which may or may not be
+        // indexed ('subnodes'), children that aren't predicates which must use the index
+        // ('mandatorySubnodes'). and children that are predicates ('indexedPreds').
         //
-        // We have to get the subnodes with mandatory assignments rather than adding the
-        // mandatory preds to 'indexedPreds'. Adding the mandatory preds directly to
-        // 'indexedPreds' would lead to problems such as pulling a predicate beneath an OR
-        // into a set joined by an AND.
+        // We have to get the subnodes with mandatory assignments rather than adding the mandatory
+        // preds to 'indexedPreds'. Adding the mandatory preds directly to 'indexedPreds' would lead
+        // to problems such as pulling a predicate beneath an OR into a set joined by an AND.
         getIndexedPreds(node, childContext, &indexedPreds);
         // Pass in the indexed predicates as outside predicates when prepping the subnodes.
         auto childContextCopy = childContext;
         for (auto pred : indexedPreds) {
-            childContextCopy.outsidePreds[pred] = std::deque<size_t>();
+            childContextCopy.outsidePreds[pred] = OutsidePredRoute{};
         }
         if (!prepSubNodes(node, childContextCopy, &subnodes, &mandatorySubnodes)) {
             return false;
@@ -667,14 +683,14 @@ bool PlanEnumerator::enumerateMandatoryIndex(const IndexToPredMap& idxToFirst,
                     // Assign any predicates on the non-leading index fields to 'indexAssign' that
                     // don't violate the intersecting or compounding rules for multikey indexes.
                     // We do not currently try to assign outside predicates to mandatory indexes.
-                    const unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds{};
+                    const unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds{};
                     assignMultikeySafePredicates(compIt->second, outsidePreds, &indexAssign);
                 }
             } else {
                 // Assign any predicates on the leading index field to 'indexAssign' that don't
                 // violate the intersecting rules for multikey indexes.
                 // We do not currently try to assign outside predicates to mandatory indexes.
-                const unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds{};
+                const unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds{};
                 assignMultikeySafePredicates(predsOverLeadingField, outsidePreds, &indexAssign);
 
                 // Assign the mandatory predicate to 'thisIndex'. Due to how keys are generated for
@@ -778,13 +794,13 @@ bool PlanEnumerator::enumerateMandatoryIndex(const IndexToPredMap& idxToFirst,
 }
 
 void PlanEnumerator::assignPredicate(
-    const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+    const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
     MatchExpression* pred,
     size_t position,
     OneIndexAssignment* indexAssignment) {
     if (outsidePreds.find(pred) != outsidePreds.end()) {
         OrPushdownTag::Destination dest;
-        dest.route = outsidePreds.at(pred);
+        dest.route = outsidePreds.at(pred).route;
 
         // This method should only be called if we can combine bounds.
         const bool canCombineBounds = true;
@@ -797,11 +813,30 @@ void PlanEnumerator::assignPredicate(
     }
 }
 
+void PlanEnumerator::markTraversedThroughElemMatchObj(PrepMemoContext* context) {
+    invariant(context);
+    for (auto&& pred : context->outsidePreds) {
+        auto relevantTag = static_cast<RelevantTag*>(pred.first->getTag());
+        // Only indexed predicates should ever be considered as outside predicates eligible for
+        // pushdown.
+        invariant(relevantTag);
+
+        // Check whether the current $elemMatch through which we are traversing is the same as the
+        // outside predicate's $elemMatch context. If so, then that outside predicate hasn't
+        // actually traversed through an $elemMatch (it has simply been promoted by
+        // getIndexedPreds() into the set of AND-related indexed predicates). If not, then the OR
+        // pushdown route descends through an $elemMatch object node, and must be marked as such.
+        if (relevantTag->elemMatchExpr != context->elemMatchExpr) {
+            pred.second.traversedThroughElemMatchObj = true;
+        }
+    }
+}
+
 void PlanEnumerator::enumerateOneIndex(
     IndexToPredMap idxToFirst,
     IndexToPredMap idxToNotFirst,
     const vector<MemoID>& subnodes,
-    const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+    const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
     AndAssignment* andAssignment) {
     // Each choice in the 'andAssignment' will consist of a single subnode to index (an OR or array
     // operator) or a OneIndexAssignment. When creating a OneIndexAssignment, we ensure that at
@@ -1203,7 +1238,7 @@ void PlanEnumerator::enumerateAndIntersect(const IndexToPredMap& idxToFirst,
 
 void PlanEnumerator::getIndexedPreds(MatchExpression* node,
                                      PrepMemoContext context,
-                                     vector<MatchExpression*>* indexedPreds) {
+                                     std::vector<MatchExpression*>* indexedPreds) {
     if (Indexability::nodeCanUseIndexOnOwnField(node)) {
         RelevantTag* rt = static_cast<RelevantTag*>(node->getTag());
         if (context.elemMatchExpr) {
@@ -1261,6 +1296,7 @@ bool PlanEnumerator::prepSubNodes(MatchExpression* node,
             PrepMemoContext childContext;
             childContext.elemMatchExpr = child;
             childContext.outsidePreds = context.outsidePreds;
+            markTraversedThroughElemMatchObj(&childContext);
             prepSubNodes(child, childContext, subnodesOut, mandatorySubnodes);
         } else if (MatchExpression::AND == child->matchType()) {
             prepSubNodes(child, context, subnodesOut, mandatorySubnodes);
@@ -1351,7 +1387,7 @@ void PlanEnumerator::getMultikeyCompoundablePreds(const vector<MatchExpression*>
 
 void PlanEnumerator::assignMultikeySafePredicates(
     const std::vector<MatchExpression*>& couldAssign,
-    const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+    const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
     OneIndexAssignment* indexAssignment) {
     invariant(indexAssignment);
     invariant(indexAssignment->preds.size() == indexAssignment->positions.size());
diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h
index 91522d27488..cd9c88b895c 100644
--- a/src/mongo/db/query/plan_enumerator.h
+++ b/src/mongo/db/query/plan_enumerator.h
@@ -124,13 +124,45 @@ private:
     // The position of a field in a possibly compound index.
     typedef size_t IndexPosition;
 
+    /**
+     * Represents the route that an outside predicate has taken during the PlanEnumerator's
+     * recursive descent of the match expression tree.
+     */
+    struct OutsidePredRoute {
+        /**
+         * Whether or not the route has traversed through an $elemMatch object node. This is needed
+         * because it is not correct to push down a predicate through an $elemMatch object.
+         */
+        bool traversedThroughElemMatchObj = false;
+
+        /**
+         * The route of the outside predicate. This starts at the indexed OR sibling of the
+         * predicate.  Each value in 'route' is the index of a child in an indexed OR.
+         *
+         * For example, if the MatchExpression tree is:
+         *         AND
+         *        /    \
+         *   {a: 5}    OR
+         *           /    \
+         *         AND    {e: 9}
+         *       /     \
+         *    {b: 6}   OR
+         *           /    \
+         *       {c: 7}  {d: 8}
+         *
+         * and the predicate is {a: 5}, then the route will be {0, 1} when the recursive descent
+         * reaches {d: 8}.
+         */
+        std::deque<size_t> route;
+    };
+
     struct PrepMemoContext {
         PrepMemoContext() : elemMatchExpr(NULL) {}
         MatchExpression* elemMatchExpr;
 
         // Maps from indexable predicates that can be pushed into the current node to the route
         // through ORs that they have taken to get to this node.
-        unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds;
+        unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds;
     };
 
     /**
@@ -372,7 +404,7 @@ private:
      */
     void assignMultikeySafePredicates(
         const std::vector<MatchExpression*>& couldAssign,
-        const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+        const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
         OneIndexAssignment* indexAssignment);
 
     /**
@@ -419,7 +451,7 @@ private:
     void enumerateOneIndex(IndexToPredMap idxToFirst,
                            IndexToPredMap idxToNotFirst,
                            const std::vector<MemoID>& subnodes,
-                           const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+                           const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
                            AndAssignment* andAssignment);
 
     /**
@@ -469,12 +501,17 @@ private:
      * 'outsidePreds'. 'pred' must be able to use the index and be multikey-safe to add to
      * 'indexAssignment'.
      */
-    void assignPredicate(const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+    void assignPredicate(const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
                          MatchExpression* pred,
                          size_t position,
                          OneIndexAssignment* indexAssignment);
 
     /**
+     * Sets a flag on all outside pred routes that descend through an $elemMatch object node.
+     */
+    void markTraversedThroughElemMatchObj(PrepMemoContext* context);
+
+    /**
      * Return the memo entry for 'node'.  Does some sanity checking to ensure that a memo entry
      * actually exists.
      */
diff --git a/src/mongo/db/query/query_planner_array_test.cpp b/src/mongo/db/query/query_planner_array_test.cpp
index 3d2c3c1aa25..68521862a55 100644
--- a/src/mongo/db/query/query_planner_array_test.cpp
+++ b/src/mongo/db/query/query_planner_array_test.cpp
@@ -2054,6 +2054,139 @@ TEST_F(QueryPlannerTest, ContainedOrPathLevelMultikeyCannotCompoundTrailingOutsi
     assertSolutionExists("{cscan: {dir: 1}}}}");
 }
 
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughElemMatchObj) {
+    addIndex(BSON("a" << 1 << "b.c" << 1));
+
+    runQuery(fromjson("{a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}, node: "
+        "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+        "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughElemMatchObjWithMultikeyPaths) {
+    MultikeyPaths multikeyPaths{{}, {0U}};
+    addIndex(BSON("a" << 1 << "b.c" << 1), multikeyPaths);
+
+    runQuery(fromjson("{a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}, node: "
+        "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+        "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughOrElemMatchObjOrPattern) {
+    addIndex(BSON("a" << 1 << "b.c" << 1));
+
+    runQuery(fromjson("{a: 1, $or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {$or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}, node: "
+        "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+        "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughOrElemMatchObjOrPatternWithMultikeyPaths) {
+    MultikeyPaths multikeyPaths{{}, {0U}};
+    addIndex(BSON("a" << 1 << "b.c" << 1), multikeyPaths);
+
+    runQuery(fromjson("{a: 1, $or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {$or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}, node: "
+        "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+        "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInAndInNestedElemMatch) {
+    addIndex(BSON("b.d" << 1 << "b.c.f" << 1));
+    addIndex(BSON("b.e" << 1 << "b.c.f" << 1));
+
+    runQuery(
+        fromjson("{$and: [{a: 5}, {b: {$elemMatch: {$and: ["
+                 "{c: {$elemMatch: {f: 5}}}, {$or: [{d: 6}, {e: 7}]}]}}}]}"));
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {$and: [{a: 5}, {b: {$elemMatch: {$and: [{c: {$elemMatch: {f: 5}}}, "
+        "{$or: [{d: 6}, {e: 7}]}]}}}]}, "
+        "node: {or: {nodes: ["
+        "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}},"
+        "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}}"
+        "]}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInAndInNestedElemMatchWithMultikeyPaths) {
+    MultikeyPaths multikeyPaths{{0U}, {0U, 1U}};
+    addIndex(BSON("b.d" << 1 << "b.c.f" << 1), multikeyPaths);
+    addIndex(BSON("b.e" << 1 << "b.c.f" << 1), multikeyPaths);
+
+    runQuery(
+        fromjson("{$and: [{a: 5}, {b: {$elemMatch: {$and: ["
+                 "{c: {$elemMatch: {f: 5}}}, {$or: [{d: 6}, {e: 7}]}]}}}]}"));
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {$and: [{a: 5}, {b: {$elemMatch: {$and: [{c: {$elemMatch: {f: 5}}}, "
+        "{$or: [{d: 6}, {e: 7}]}]}}}]}, "
+        "node: {or: {nodes: ["
+        "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}},"
+        "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}}"
+        "]}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInNestedElemMatchWithMultikeyPaths) {
+    MultikeyPaths multikeyPaths{{0U}, {0U, 1U}};
+    addIndex(BSON("b.d" << 1 << "b.c.f" << 1), multikeyPaths);
+    addIndex(BSON("b.e" << 1 << "b.c.f" << 1), multikeyPaths);
+
+    runQuery(fromjson("{b: {$elemMatch: {c: {$elemMatch: {f: 5}}, $or: [{d: 6}, {e: 7}]}}}"));
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {b: {$elemMatch: {c: {$elemMatch: {f: 5}}, $or: [{d: 6}, {e: 7}]}}}, "
+        "node: {or: {nodes: ["
+        "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}},"
+        "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+        "[['MinKey', 'MaxKey', true, true]]}}}"
+        "]}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrMoveElemMatchToNestedElemMatchObject) {
+    addIndex(BSON("b.c.d" << 1 << "a.f" << 1), MultikeyPaths{{0U, 1U}, {0U}});
+    addIndex(BSON("e" << 1 << "a.f" << 1), MultikeyPaths{{}, {0U}});
+
+    runQuery(fromjson(
+        "{a: {$elemMatch: {f: 5}}, $or: [{b: {$elemMatch: {c: {$elemMatch: {d: 6}}}}}, {e: 7}]}"));
+    assertNumSolutions(2U);
+    assertSolutionExists(
+        "{fetch: {filter: {a: {$elemMatch: {f: 5}}}, node: {or: {nodes: ["
+        "{fetch: {filter: {b: {$elemMatch: {c: {$elemMatch: {d: 6}}}}}, node: {ixscan: {pattern: "
+        "{'b.c.d': 1, 'a.f': 1}, bounds: {'b.c.d': [[6, 6, true, true]], 'a.f': [[5, 5, true, "
+        "true]]}}}}},"
+        "{ixscan: {pattern: {e: 1, 'a.f': 1}, bounds: {e: [[7, 7, true, true]], 'a.f': [[5, 5, "
+        "true, true]]}}}]}}}}");
+    assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
 TEST_F(QueryPlannerTest, TypeArrayUsingTypeCodeMustFetchAndFilter) {
     addIndex(BSON("a" << 1));
     runQuery(fromjson("{a: {$type: 4}}"));
author	David Storch <david.storch@10gen.com>	2018-02-08 14:55:41 -0500
committer	David Storch <david.storch@10gen.com>	2018-02-09 17:04:04 -0500
commit	17b4094c4d781ffd486b27869f46eea706e490af (patch)
tree	ad5d0bcfa1293ccf13c74f8d2c611fa6f498ba9d /src/mongo
parent	d6ad188cd77fb4fcd155cf2c9165fad9e6f2c589 (diff)
download	mongo-17b4094c4d781ffd486b27869f46eea706e490af.tar.gz