summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2018-02-08 14:55:41 -0500
committerDavid Storch <david.storch@10gen.com>2018-02-09 17:04:04 -0500
commit17b4094c4d781ffd486b27869f46eea706e490af (patch)
treead5d0bcfa1293ccf13c74f8d2c611fa6f498ba9d /src/mongo
parentd6ad188cd77fb4fcd155cf2c9165fad9e6f2c589 (diff)
downloadmongo-17b4094c4d781ffd486b27869f46eea706e490af.tar.gz
SERVER-33005 Fix planner to avoid incorrect OR pushdown through $elemMatch.
The PlanEnumerator now tracks pushdown routes which descend through an $elemMatch object. These routes are pruned when subsequently descending through an OR.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/query/plan_enumerator.cpp76
-rw-r--r--src/mongo/db/query/plan_enumerator.h45
-rw-r--r--src/mongo/db/query/query_planner_array_test.cpp133
3 files changed, 230 insertions, 24 deletions
diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp
index 7016d57b043..04bc0d93ec8 100644
--- a/src/mongo/db/query/plan_enumerator.cpp
+++ b/src/mongo/db/query/plan_enumerator.cpp
@@ -396,8 +396,24 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
// Extend the path through the indexed ORs of each outside predicate.
auto childContextCopy = childContext;
- for (auto& pred : childContextCopy.outsidePreds) {
- pred.second.push_back(i);
+ for (auto it = childContextCopy.outsidePreds.begin();
+ it != childContextCopy.outsidePreds.end();) {
+ // If the route has already traversed through an $elemMatch object, then we cannot
+ // push down through this OR. Here we remove such routes from our context object.
+ //
+ // For example, suppose we have index {a: 1, "b.c": 1} and the following query:
+ //
+ // {a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}
+ //
+ // It is not correct to push the 'a' predicate down such that it is a sibling of
+ // either of the predicates on 'c', since this would change the predicate's meaning
+ // from a==1 to "b.a"==1.
+ if (it->second.traversedThroughElemMatchObj) {
+ it = childContextCopy.outsidePreds.erase(it);
+ } else {
+ it->second.route.push_back(i);
+ ++it;
+ }
}
if (!prepMemo(node->getChild(i), childContextCopy)) {
@@ -423,6 +439,7 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
if (MatchExpression::ELEM_MATCH_OBJECT == node->matchType()) {
childContext.elemMatchExpr = node;
+ markTraversedThroughElemMatchObj(&childContext);
}
// For an OR to be indexed, all its children must be indexed.
@@ -462,23 +479,22 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) {
// (e.g. an OR which contains a TEXT child).
vector<MemoID> mandatorySubnodes;
- // A list of predicates contained in the subtree rooted at 'node'
- // obtained by traversing deeply through $and and $elemMatch children.
- vector<MatchExpression*> indexedPreds;
+ // A list of predicates contained in the subtree rooted at 'node' obtained by traversing
+ // deeply through $and and $elemMatch children.
+ std::vector<MatchExpression*> indexedPreds;
- // Partition the childen into the children that aren't predicates which may or may
- // not be indexed ('subnodes'), children that aren't predicates which must use the
- // index ('mandatorySubnodes'). and children that are predicates ('indexedPreds').
+ // Partition the childen into the children that aren't predicates which may or may not be
+ // indexed ('subnodes'), children that aren't predicates which must use the index
+ // ('mandatorySubnodes'). and children that are predicates ('indexedPreds').
//
- // We have to get the subnodes with mandatory assignments rather than adding the
- // mandatory preds to 'indexedPreds'. Adding the mandatory preds directly to
- // 'indexedPreds' would lead to problems such as pulling a predicate beneath an OR
- // into a set joined by an AND.
+ // We have to get the subnodes with mandatory assignments rather than adding the mandatory
+ // preds to 'indexedPreds'. Adding the mandatory preds directly to 'indexedPreds' would lead
+ // to problems such as pulling a predicate beneath an OR into a set joined by an AND.
getIndexedPreds(node, childContext, &indexedPreds);
// Pass in the indexed predicates as outside predicates when prepping the subnodes.
auto childContextCopy = childContext;
for (auto pred : indexedPreds) {
- childContextCopy.outsidePreds[pred] = std::deque<size_t>();
+ childContextCopy.outsidePreds[pred] = OutsidePredRoute{};
}
if (!prepSubNodes(node, childContextCopy, &subnodes, &mandatorySubnodes)) {
return false;
@@ -667,14 +683,14 @@ bool PlanEnumerator::enumerateMandatoryIndex(const IndexToPredMap& idxToFirst,
// Assign any predicates on the non-leading index fields to 'indexAssign' that
// don't violate the intersecting or compounding rules for multikey indexes.
// We do not currently try to assign outside predicates to mandatory indexes.
- const unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds{};
+ const unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds{};
assignMultikeySafePredicates(compIt->second, outsidePreds, &indexAssign);
}
} else {
// Assign any predicates on the leading index field to 'indexAssign' that don't
// violate the intersecting rules for multikey indexes.
// We do not currently try to assign outside predicates to mandatory indexes.
- const unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds{};
+ const unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds{};
assignMultikeySafePredicates(predsOverLeadingField, outsidePreds, &indexAssign);
// Assign the mandatory predicate to 'thisIndex'. Due to how keys are generated for
@@ -778,13 +794,13 @@ bool PlanEnumerator::enumerateMandatoryIndex(const IndexToPredMap& idxToFirst,
}
void PlanEnumerator::assignPredicate(
- const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
MatchExpression* pred,
size_t position,
OneIndexAssignment* indexAssignment) {
if (outsidePreds.find(pred) != outsidePreds.end()) {
OrPushdownTag::Destination dest;
- dest.route = outsidePreds.at(pred);
+ dest.route = outsidePreds.at(pred).route;
// This method should only be called if we can combine bounds.
const bool canCombineBounds = true;
@@ -797,11 +813,30 @@ void PlanEnumerator::assignPredicate(
}
}
+void PlanEnumerator::markTraversedThroughElemMatchObj(PrepMemoContext* context) {
+ invariant(context);
+ for (auto&& pred : context->outsidePreds) {
+ auto relevantTag = static_cast<RelevantTag*>(pred.first->getTag());
+ // Only indexed predicates should ever be considered as outside predicates eligible for
+ // pushdown.
+ invariant(relevantTag);
+
+ // Check whether the current $elemMatch through which we are traversing is the same as the
+ // outside predicate's $elemMatch context. If so, then that outside predicate hasn't
+ // actually traversed through an $elemMatch (it has simply been promoted by
+ // getIndexedPreds() into the set of AND-related indexed predicates). If not, then the OR
+ // pushdown route descends through an $elemMatch object node, and must be marked as such.
+ if (relevantTag->elemMatchExpr != context->elemMatchExpr) {
+ pred.second.traversedThroughElemMatchObj = true;
+ }
+ }
+}
+
void PlanEnumerator::enumerateOneIndex(
IndexToPredMap idxToFirst,
IndexToPredMap idxToNotFirst,
const vector<MemoID>& subnodes,
- const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
AndAssignment* andAssignment) {
// Each choice in the 'andAssignment' will consist of a single subnode to index (an OR or array
// operator) or a OneIndexAssignment. When creating a OneIndexAssignment, we ensure that at
@@ -1203,7 +1238,7 @@ void PlanEnumerator::enumerateAndIntersect(const IndexToPredMap& idxToFirst,
void PlanEnumerator::getIndexedPreds(MatchExpression* node,
PrepMemoContext context,
- vector<MatchExpression*>* indexedPreds) {
+ std::vector<MatchExpression*>* indexedPreds) {
if (Indexability::nodeCanUseIndexOnOwnField(node)) {
RelevantTag* rt = static_cast<RelevantTag*>(node->getTag());
if (context.elemMatchExpr) {
@@ -1261,6 +1296,7 @@ bool PlanEnumerator::prepSubNodes(MatchExpression* node,
PrepMemoContext childContext;
childContext.elemMatchExpr = child;
childContext.outsidePreds = context.outsidePreds;
+ markTraversedThroughElemMatchObj(&childContext);
prepSubNodes(child, childContext, subnodesOut, mandatorySubnodes);
} else if (MatchExpression::AND == child->matchType()) {
prepSubNodes(child, context, subnodesOut, mandatorySubnodes);
@@ -1351,7 +1387,7 @@ void PlanEnumerator::getMultikeyCompoundablePreds(const vector<MatchExpression*>
void PlanEnumerator::assignMultikeySafePredicates(
const std::vector<MatchExpression*>& couldAssign,
- const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
OneIndexAssignment* indexAssignment) {
invariant(indexAssignment);
invariant(indexAssignment->preds.size() == indexAssignment->positions.size());
diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h
index 91522d27488..cd9c88b895c 100644
--- a/src/mongo/db/query/plan_enumerator.h
+++ b/src/mongo/db/query/plan_enumerator.h
@@ -124,13 +124,45 @@ private:
// The position of a field in a possibly compound index.
typedef size_t IndexPosition;
+ /**
+ * Represents the route that an outside predicate has taken during the PlanEnumerator's
+ * recursive descent of the match expression tree.
+ */
+ struct OutsidePredRoute {
+ /**
+ * Whether or not the route has traversed through an $elemMatch object node. This is needed
+ * because it is not correct to push down a predicate through an $elemMatch object.
+ */
+ bool traversedThroughElemMatchObj = false;
+
+ /**
+ * The route of the outside predicate. This starts at the indexed OR sibling of the
+ * predicate. Each value in 'route' is the index of a child in an indexed OR.
+ *
+ * For example, if the MatchExpression tree is:
+ * AND
+ * / \
+ * {a: 5} OR
+ * / \
+ * AND {e: 9}
+ * / \
+ * {b: 6} OR
+ * / \
+ * {c: 7} {d: 8}
+ *
+ * and the predicate is {a: 5}, then the route will be {0, 1} when the recursive descent
+ * reaches {d: 8}.
+ */
+ std::deque<size_t> route;
+ };
+
struct PrepMemoContext {
PrepMemoContext() : elemMatchExpr(NULL) {}
MatchExpression* elemMatchExpr;
// Maps from indexable predicates that can be pushed into the current node to the route
// through ORs that they have taken to get to this node.
- unordered_map<MatchExpression*, std::deque<size_t>> outsidePreds;
+ unordered_map<MatchExpression*, OutsidePredRoute> outsidePreds;
};
/**
@@ -372,7 +404,7 @@ private:
*/
void assignMultikeySafePredicates(
const std::vector<MatchExpression*>& couldAssign,
- const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
OneIndexAssignment* indexAssignment);
/**
@@ -419,7 +451,7 @@ private:
void enumerateOneIndex(IndexToPredMap idxToFirst,
IndexToPredMap idxToNotFirst,
const std::vector<MemoID>& subnodes,
- const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
AndAssignment* andAssignment);
/**
@@ -469,12 +501,17 @@ private:
* 'outsidePreds'. 'pred' must be able to use the index and be multikey-safe to add to
* 'indexAssignment'.
*/
- void assignPredicate(const unordered_map<MatchExpression*, std::deque<size_t>>& outsidePreds,
+ void assignPredicate(const unordered_map<MatchExpression*, OutsidePredRoute>& outsidePreds,
MatchExpression* pred,
size_t position,
OneIndexAssignment* indexAssignment);
/**
+ * Sets a flag on all outside pred routes that descend through an $elemMatch object node.
+ */
+ void markTraversedThroughElemMatchObj(PrepMemoContext* context);
+
+ /**
* Return the memo entry for 'node'. Does some sanity checking to ensure that a memo entry
* actually exists.
*/
diff --git a/src/mongo/db/query/query_planner_array_test.cpp b/src/mongo/db/query/query_planner_array_test.cpp
index 3d2c3c1aa25..68521862a55 100644
--- a/src/mongo/db/query/query_planner_array_test.cpp
+++ b/src/mongo/db/query/query_planner_array_test.cpp
@@ -2054,6 +2054,139 @@ TEST_F(QueryPlannerTest, ContainedOrPathLevelMultikeyCannotCompoundTrailingOutsi
assertSolutionExists("{cscan: {dir: 1}}}}");
}
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughElemMatchObj) {
+ addIndex(BSON("a" << 1 << "b.c" << 1));
+
+ runQuery(fromjson("{a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}, node: "
+ "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+ "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughElemMatchObjWithMultikeyPaths) {
+ MultikeyPaths multikeyPaths{{}, {0U}};
+ addIndex(BSON("a" << 1 << "b.c" << 1), multikeyPaths);
+
+ runQuery(fromjson("{a: 1, b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$elemMatch: {$or: [{c: 2}, {c: 3}]}}}, node: "
+ "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+ "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughOrElemMatchObjOrPattern) {
+ addIndex(BSON("a" << 1 << "b.c" << 1));
+
+ runQuery(fromjson("{a: 1, $or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {$or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}, node: "
+ "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+ "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrCannotPushdownThroughOrElemMatchObjOrPatternWithMultikeyPaths) {
+ MultikeyPaths multikeyPaths{{}, {0U}};
+ addIndex(BSON("a" << 1 << "b.c" << 1), multikeyPaths);
+
+ runQuery(fromjson("{a: 1, $or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {$or: [{a: 2}, {b: {$elemMatch: {$or: [{c: 3}, {c: 4}]}}}]}, node: "
+ "{ixscan: {filter: null, pattern: {a: 1, 'b.c': 1}, "
+ "bounds: {a: [[1,1,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInAndInNestedElemMatch) {
+ addIndex(BSON("b.d" << 1 << "b.c.f" << 1));
+ addIndex(BSON("b.e" << 1 << "b.c.f" << 1));
+
+ runQuery(
+ fromjson("{$and: [{a: 5}, {b: {$elemMatch: {$and: ["
+ "{c: {$elemMatch: {f: 5}}}, {$or: [{d: 6}, {e: 7}]}]}}}]}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {$and: [{a: 5}, {b: {$elemMatch: {$and: [{c: {$elemMatch: {f: 5}}}, "
+ "{$or: [{d: 6}, {e: 7}]}]}}}]}, "
+ "node: {or: {nodes: ["
+ "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}},"
+ "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}}"
+ "]}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInAndInNestedElemMatchWithMultikeyPaths) {
+ MultikeyPaths multikeyPaths{{0U}, {0U, 1U}};
+ addIndex(BSON("b.d" << 1 << "b.c.f" << 1), multikeyPaths);
+ addIndex(BSON("b.e" << 1 << "b.c.f" << 1), multikeyPaths);
+
+ runQuery(
+ fromjson("{$and: [{a: 5}, {b: {$elemMatch: {$and: ["
+ "{c: {$elemMatch: {f: 5}}}, {$or: [{d: 6}, {e: 7}]}]}}}]}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {$and: [{a: 5}, {b: {$elemMatch: {$and: [{c: {$elemMatch: {f: 5}}}, "
+ "{$or: [{d: 6}, {e: 7}]}]}}}]}, "
+ "node: {or: {nodes: ["
+ "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}},"
+ "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}}"
+ "]}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+// TODO SERVER-30145: Fixing this ticket should allow us to generate tight bounds on "b.c.f" below.
+TEST_F(QueryPlannerTest, ContainedOrInNestedElemMatchWithMultikeyPaths) {
+ MultikeyPaths multikeyPaths{{0U}, {0U, 1U}};
+ addIndex(BSON("b.d" << 1 << "b.c.f" << 1), multikeyPaths);
+ addIndex(BSON("b.e" << 1 << "b.c.f" << 1), multikeyPaths);
+
+ runQuery(fromjson("{b: {$elemMatch: {c: {$elemMatch: {f: 5}}, $or: [{d: 6}, {e: 7}]}}}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$elemMatch: {c: {$elemMatch: {f: 5}}, $or: [{d: 6}, {e: 7}]}}}, "
+ "node: {or: {nodes: ["
+ "{ixscan: {pattern: {'b.d': 1, 'b.c.f': 1}, bounds: {'b.d': [[6, 6, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}},"
+ "{ixscan: {pattern: {'b.e': 1, 'b.c.f': 1}, bounds: {'b.e': [[7, 7, true, true]], 'b.c.f': "
+ "[['MinKey', 'MaxKey', true, true]]}}}"
+ "]}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrMoveElemMatchToNestedElemMatchObject) {
+ addIndex(BSON("b.c.d" << 1 << "a.f" << 1), MultikeyPaths{{0U, 1U}, {0U}});
+ addIndex(BSON("e" << 1 << "a.f" << 1), MultikeyPaths{{}, {0U}});
+
+ runQuery(fromjson(
+ "{a: {$elemMatch: {f: 5}}, $or: [{b: {$elemMatch: {c: {$elemMatch: {d: 6}}}}}, {e: 7}]}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {a: {$elemMatch: {f: 5}}}, node: {or: {nodes: ["
+ "{fetch: {filter: {b: {$elemMatch: {c: {$elemMatch: {d: 6}}}}}, node: {ixscan: {pattern: "
+ "{'b.c.d': 1, 'a.f': 1}, bounds: {'b.c.d': [[6, 6, true, true]], 'a.f': [[5, 5, true, "
+ "true]]}}}}},"
+ "{ixscan: {pattern: {e: 1, 'a.f': 1}, bounds: {e: [[7, 7, true, true]], 'a.f': [[5, 5, "
+ "true, true]]}}}]}}}}");
+ assertSolutionExists("{cscan: {dir: 1}}}}");
+}
+
TEST_F(QueryPlannerTest, TypeArrayUsingTypeCodeMustFetchAndFilter) {
addIndex(BSON("a" << 1));
runQuery(fromjson("{a: {$type: 4}}"));