diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2020-08-05 09:55:01 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-18 15:43:38 +0000 |
commit | 03fb28b42c1b9e36c6ebd5c6aa4b8afc29dde099 (patch) | |
tree | 8a7876fde49b01526a8d2b0e85b0029e285bc13d | |
parent | 5e56df1dfc7c5edd3ba1ccce1b64cc9fa361a988 (diff) | |
download | mongo-03fb28b42c1b9e36c6ebd5c6aa4b8afc29dde099.tar.gz |
SERVER-36393 Add new opt-in $or enumeration order
(cherry picked from commit bffdd28e183d5b720c897b56278821e65762ccd7)
(cherry picked from commit e81be8db078eb541495dee9a47433b92f5140e18)
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/plan_enumerator.cpp | 187 | ||||
-rw-r--r-- | src/mongo/db/query/plan_enumerator.h | 55 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.idl | 19 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 17 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 389 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 1 |
8 files changed, 655 insertions, 29 deletions
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 32d63cd41f4..642a48fbd5b 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -297,6 +297,10 @@ void fillOutPlannerParams(OperationContext* opCtx, plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION; } + if (internalQueryEnumerationPreferLockstepOrEnumeration.load()) { + plannerParams->options |= QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + } + if (internalQueryPlannerGenerateCoveredWholeIndexScans.load()) { plannerParams->options |= QueryPlannerParams::GENERATE_COVERED_IXSCANS; } diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp index 910b306502d..be2bbd4284b 100644 --- a/src/mongo/db/query/plan_enumerator.cpp +++ b/src/mongo/db/query/plan_enumerator.cpp @@ -259,6 +259,7 @@ PlanEnumerator::PlanEnumerator(const PlanEnumeratorParams& params) : _root(params.root), _indices(params.indices), _ixisect(params.intersect), + _enumerateOrChildrenLockstep(params.enumerateOrChildrenLockstep), _orLimit(params.maxSolutionsPerOr), _intersectLimit(params.maxIntersectPerAnd) {} @@ -324,8 +325,7 @@ string PlanEnumerator::NodeAssignment::toString() const { } ss << "]"; return ss; - } else { - verify(NULL != orAssignment); + } else if (nullptr != orAssignment) { str::stream ss; ss << "ALL OF: [ "; for (size_t i = 0; i < orAssignment->subnodes.size(); ++i) { @@ -333,7 +333,26 @@ string PlanEnumerator::NodeAssignment::toString() const { } ss << "]"; return ss; + } else if (nullptr != lockstepOrAssignment) { + str::stream ss; + ss << "ALL OF (lockstep): {"; + ss << "\n\ttotalEnumerated: " << lockstepOrAssignment->totalEnumerated; + ss << "\n\tsubnodes: [ "; + for (auto&& node : lockstepOrAssignment->subnodes) { + ss << "\n\t\t{"; + ss << "memoId: " << node.memoId << ", "; + ss << "iterationCount: " << node.iterationCount << ", "; + if (node.maxIterCount) { + ss << "maxIterCount: " << node.maxIterCount; + } else { + ss << "maxIterCount: none"; + } + ss << "},"; + } + ss << "\n]"; + return ss; } + MONGO_UNREACHABLE; } PlanEnumerator::MemoID PlanEnumerator::memoIDForNode(MatchExpression* node) { @@ -426,11 +445,19 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) { NodeAssignment* assign; allocateAssignment(node, &assign, &myMemoID); - OrAssignment* orAssignment = new OrAssignment(); - for (size_t i = 0; i < node->numChildren(); ++i) { - orAssignment->subnodes.push_back(memoIDForNode(node->getChild(i))); + if (_enumerateOrChildrenLockstep) { + LockstepOrAssignment* newOrAssign = new LockstepOrAssignment(); + for (size_t i = 0; i < node->numChildren(); ++i) { + newOrAssign->subnodes.push_back({memoIDForNode(node->getChild(i)), 0, boost::none}); + } + assign->lockstepOrAssignment.reset(newOrAssign); + } else { + OrAssignment* orAssignment = new OrAssignment(); + for (size_t i = 0; i < node->numChildren(); ++i) { + orAssignment->subnodes.push_back(memoIDForNode(node->getChild(i))); + } + assign->orAssignment.reset(orAssignment); } - assign->orAssignment.reset(orAssignment); return true; } else if (Indexability::arrayUsesIndexOnChildren(node)) { // Add each of our children as a subnode. We enumerate through each subnode one at a @@ -1577,7 +1604,12 @@ void PlanEnumerator::tagMemo(size_t id) { for (size_t i = 0; i < oa->subnodes.size(); ++i) { tagMemo(oa->subnodes[i]); } - } else if (NULL != assign->arrayAssignment) { + } else if (nullptr != assign->lockstepOrAssignment) { + LockstepOrAssignment* oa = assign->lockstepOrAssignment.get(); + for (auto&& node : oa->subnodes) { + tagMemo(node.memoId); + } + } else if (nullptr != assign->arrayAssignment) { ArrayAssignment* aa = assign->arrayAssignment.get(); tagMemo(aa->subnodes[aa->counter]); } else if (NULL != assign->andAssignment) { @@ -1620,6 +1652,117 @@ void PlanEnumerator::tagMemo(size_t id) { } } +bool PlanEnumerator::LockstepOrAssignment::allIdentical() const { + const auto firstCounter = subnodes[0].iterationCount; + for (auto&& subnode : subnodes) { + if (subnode.iterationCount != firstCounter) { + return false; + } + } + return true; +} + +bool PlanEnumerator::LockstepOrAssignment::shouldResetBeforeProceeding( + size_t totalEnumerated) const { + if (totalEnumerated == 0 || !exhaustedLockstepIteration) { + return false; + } + + size_t totalPossibleEnumerations = 1; + for (auto&& subnode : subnodes) { + if (!subnode.maxIterCount) { + return false; // Haven't yet looped over this child entirely, not ready yet. + } + totalPossibleEnumerations *= subnode.maxIterCount.get(); + } + + // If we're able to compute a total number expected enumerations, we must have already cycled + // through each of the subnodes at least once. So if we've done that and then iterated all + // possible enumerations, we're about to repeat ourselves. + return totalEnumerated % totalPossibleEnumerations == 0; +} + +bool PlanEnumerator::_nextMemoForLockstepOrAssignment( + PlanEnumerator::LockstepOrAssignment* assignment) { + + if (!assignment->exhaustedLockstepIteration) { + // We have not yet finished advancing all children simultaneously, so we'll loop over + // each child and advance it. + + // Because we're doing things in a special order, we have to be careful to not duplicate + // ourselves. If each child has the same number of alternatives, we will eventually + // "carry" or roll over each child back to the beginning. When this happens, we should + // not return that plan again. + bool everyoneRolledOver = true; + + for (auto&& node : assignment->subnodes) { + ++node.iterationCount; + const bool wrappedAround = nextMemo(node.memoId); + if (wrappedAround) { + node.maxIterCount = node.iterationCount; + node.iterationCount = 0; + // We ran out of "lockstep runway" of sorts. At least one of the subnodes was + // exhausted, so this will be our last time advancing all children in lockstep. + assignment->exhaustedLockstepIteration = true; + } else { + everyoneRolledOver = false; + } + } + // Edge case: if every child has only one option available, we are already finished + // enumerating. + if (assignment->shouldResetBeforeProceeding(assignment->totalEnumerated)) { + assignment->exhaustedLockstepIteration = false; + return true; // We're back at the beginning, no need to reset. + } + if (!everyoneRolledOver) { + // Either there's more lockstep iteration to come, or the subnodes have different + // amounts of options. In either case, we are now in a new enumeration state so just + // return. + return false; + } + // Otherwise we just rolled over and went back to the first enumeration state, so we need to + // keep going to avoid duplicating that state. Fall through to below to start "normal", not + // lockstep iteration. + } + + auto advanceOnce = [this, assignment]() { + for (auto&& node : assignment->subnodes) { + ++node.iterationCount; + const bool wrappedAround = nextMemo(node.memoId); + if (!wrappedAround) { + return; + } + node.maxIterCount = node.iterationCount; + node.iterationCount = 0; + } + }; + advanceOnce(); + while (assignment->allIdentical()) { + // All sub-nodes have the same enumeration state, skip this one since we already did + // it above. This is expected to happen pretty often. For example, if we have two subnodes + // each enumerating two states, we'd expect the order to be: 00, 11 (these two iterated + // above), then 00 (skipped by falling through above after finishing lockstep iteration), + // then 10, 11 (skipped here), 00 (skipped here), then finally 01. + advanceOnce(); + } + + // This special ordering is tricky to reset. Because it iterates the sub nodes in such a + // unique order, it can be difficult to know when it has actually finished iterating. Our + // strategy is just to compute a total and go back to the beginning once we hit that total. + if (!assignment->shouldResetBeforeProceeding(assignment->totalEnumerated)) { + return false; + } + // Reset! + for (auto&& subnode : assignment->subnodes) { + while (!nextMemo(subnode.memoId)) { + // Keep advancing till it rolls over. + } + subnode.iterationCount = 0; + } + assignment->exhaustedLockstepIteration = false; + return true; +} + bool PlanEnumerator::nextMemo(size_t id) { NodeAssignment* assign = _memo[id]; verify(NULL != assign); @@ -1627,16 +1770,18 @@ bool PlanEnumerator::nextMemo(size_t id) { if (NULL != assign->orAssignment) { OrAssignment* oa = assign->orAssignment.get(); - // Limit the number of OR enumerations + // Limit the number of OR enumerations. oa->counter++; if (oa->counter >= _orLimit) { + LOG(1) + << "plan enumerator exceeded threshold for OR enumerations. orEnumerationLimit = " + << _orLimit; return true; } - // OR just walks through telling its children to - // move forward. + // OR just walks through telling its children to move forward. for (size_t i = 0; i < oa->subnodes.size(); ++i) { - // If there's no carry, we just stop. If there's a carry, we move the next child + // If there's no carry, we just stop. If there's a carry, we move the next child // forward. if (!nextMemo(oa->subnodes[i])) { return false; @@ -1644,7 +1789,19 @@ bool PlanEnumerator::nextMemo(size_t id) { } // If we're here, the last subnode had a carry, therefore the OR has a carry. return true; - } else if (NULL != assign->arrayAssignment) { + } else if (nullptr != assign->lockstepOrAssignment) { + LockstepOrAssignment* assignment = assign->lockstepOrAssignment.get(); + + // Limit the number of OR enumerations. + ++assignment->totalEnumerated; + if (assignment->totalEnumerated >= _orLimit) { + LOG(1) + << "plan enumerator exceeded threshold for OR enumerations. orEnumerationLimit = " + << _orLimit; + return true; + } + return _nextMemoForLockstepOrAssignment(assignment); + } else if (nullptr != assign->arrayAssignment) { ArrayAssignment* aa = assign->arrayAssignment.get(); // moving to next on current subnode is OK if (!nextMemo(aa->subnodes[aa->counter])) { @@ -1676,11 +1833,9 @@ bool PlanEnumerator::nextMemo(size_t id) { } aa->counter = 0; return true; + } else { + MONGO_UNREACHABLE; } - - // This shouldn't happen. - verify(0); - return false; } } // namespace mongo diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h index 7bd5d3812cb..90e99a2e262 100644 --- a/src/mongo/db/query/plan_enumerator.h +++ b/src/mongo/db/query/plan_enumerator.h @@ -42,13 +42,16 @@ namespace mongo { struct PlanEnumeratorParams { PlanEnumeratorParams() - : intersect(false), - maxSolutionsPerOr(internalQueryEnumerationMaxOrSolutions.load()), + : maxSolutionsPerOr(internalQueryEnumerationMaxOrSolutions.load()), maxIntersectPerAnd(internalQueryEnumerationMaxIntersectPerAnd.load()) {} // Do we provide solutions that use more indices than the minimum required to provide // an indexed solution? - bool intersect; + bool intersect = false; + + // Do we enumerate children of an $or in a special order to prioritize solutions which have the + // same assignment on each branch? + bool enumerateOrChildrenLockstep = false; // Not owned here. MatchExpression* root; @@ -94,14 +97,13 @@ public: Status init(); /** - * Outputs a possible plan. Leaves in the plan are tagged with an index to use. - * Returns a MatchExpression representing a point in the query tree (which can be - * used to build a QueryAssignment) or nullptr if no more plans will be outputted. - * While owned by the caller, the MatchExpression returned points into data that is - * owned elsewhere. + * Outputs a possible plan. Leaves in the plan are tagged with an index to use. Returns a + * MatchExpression representing a point in the query tree (which can be used to build a + * QuerySolutionNode tree) or nullptr if no more plans will be outputted. While owned by the + * caller, the MatchExpression returned points into data that is owned elsewhere. * - * Nodes in 'tree' are tagged with indices that should be used to answer the tagged nodes. - * Only nodes that have a field name (getCategory() != kLogical) will be tagged. + * Nodes in 'tree' are tagged with indices that should be used to answer the tagged nodes. Only + * nodes that have a field name (getCategory() != kLogical) will be tagged. * * The output tree is a clone identical to that used to initialize the enumerator, with tags * added in order to indicate index usage. @@ -224,6 +226,29 @@ private: size_t counter; }; + struct LockstepOrAssignment { + struct PreferFirstSubNode { + MemoID memoId; + size_t iterationCount; + boost::optional<size_t> maxIterCount; + }; + std::vector<PreferFirstSubNode> subnodes; + + bool exhaustedLockstepIteration = false; + size_t totalEnumerated = 0; + + /** + * Returns true if 'totalEnumerated' matches the total number of expected plans for this + * assignment. + */ + bool shouldResetBeforeProceeding(size_t totalEnumerated) const; + + /** + * Returns true if each sub node is at the same iterationCount. + */ + bool allIdentical() const; + }; + // This is used by AndAssignment and is not an actual assignment. struct OneIndexAssignment { // 'preds[i]' is uses index 'index' at position 'positions[i]' @@ -266,6 +291,7 @@ private: */ struct NodeAssignment { std::unique_ptr<OrAssignment> orAssignment; + std::unique_ptr<LockstepOrAssignment> lockstepOrAssignment; std::unique_ptr<AndAssignment> andAssignment; std::unique_ptr<ArrayAssignment> arrayAssignment; std::string toString() const; @@ -526,6 +552,11 @@ private: */ MemoID memoIDForNode(MatchExpression* node); + /** + * Helper for advancing the enumeration for a LockstepOrAssignment node. + */ + bool _nextMemoForLockstepOrAssignment(LockstepOrAssignment*); + std::string dumpMemo(); // Map from expression to its MemoID. @@ -551,6 +582,10 @@ private: // Do we output >1 index per AND (index intersection)? bool _ixisect; + // Do we enumerate children of an $or in a special order to prioritize solutions which have the + // same assignment on each branch? + bool _enumerateOrChildrenLockstep; + // How many enumerations are we willing to produce from each OR? size_t _orLimit; diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index a74414f5f48..3987713d4f2 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -151,6 +151,25 @@ server_parameters: validator: gte: 0 + internalQueryEnumerationPreferLockstepOrEnumeration: + description: "If set to true, instructs the plan enumerator to enumerate contained $ors in a + special order. $or enumeration can generate an exponential number of plans, and is therefore + limited at some arbitrary cutoff controlled by a parameter. When this limit is hit, the order of + enumeration is important. For example, a query like the following has a 'contained $or' (within + an $and): {a: 1, $or: [{b: 1, c: 1}, {b: 2, c: 2}]} For this query if there are indexes + a_b={a: 1, b: 1} and a_c={a: 1, c: 1}, the normal enumeration order would output assignments + [a_b, a_b], [a_c, a_b], [a_b, a_c], then [a_c, a_c]. This flag will instruct the enumerator to + instead prefer a different order. It's hard to summarize, but perhaps the phrases 'lockstep + enumeration', 'simultaneous advancement', or 'parallel iteration' will help the reader. The + effect is to give earlier enumeration to plans which use the same choice across all branches. In + this order, we would get assignments [a_b, a_b], [a_c, a_c], [a_c, a_b], then [a_b, a_c]. This + is thought to be helpful in general, but particularly in cases where all children of the $or use + the same fields and have the same indexes available, as in this example." + set_at: [ startup, runtime ] + cpp_varname: "internalQueryEnumerationPreferLockstepOrEnumeration" + cpp_vartype: AtomicWord<bool> + default: false + internalQueryEnumerationMaxOrSolutions: description: "How many solutions will the enumerator consider at each OR?" set_at: [ startup, runtime ] diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 778592b242b..316044294ee 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -135,6 +135,9 @@ string optionString(size_t options) { case QueryPlannerParams::STRICT_DISTINCT_ONLY: ss << "STRICT_DISTINCT_ONLY "; break; + case QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP: + ss << "ENUMERATE_OR_CHILDREN_LOCKSTEP "; + break; case QueryPlannerParams::DEFAULT: MONGO_UNREACHABLE; break; @@ -780,12 +783,15 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION; enumParams.root = query.root(); enumParams.indices = &relevantIndices; + enumParams.enumerateOrChildrenLockstep = + params.options & QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; - PlanEnumerator isp(enumParams); - isp.init().transitional_ignore(); + PlanEnumerator planEnumerator(enumParams); + uassertStatusOKWithContext(planEnumerator.init(), "failed to initialize plan enumerator"); unique_ptr<MatchExpression> nextTaggedTree; - while ((nextTaggedTree = isp.getNext()) && (out.size() < params.maxIndexedSolutions)) { + while ((nextTaggedTree = planEnumerator.getNext()) && + (out.size() < params.maxIndexedSolutions)) { LOG(5) << "About to build solntree from tagged tree:" << endl << redact(nextTaggedTree->debugString()); diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 9229dbb06ce..f8848b2011e 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -98,6 +98,23 @@ struct QueryPlannerParams { // return exactly one document per value of the distinct field. See the comments above the // declaration of getExecutorDistinct() for more detail. STRICT_DISTINCT_ONLY = 1 << 11, + + // Instruct the plan enumerator to enumerate contained $ors in a special order. $or + // enumeration can generate an exponential number of plans, and is therefore limited at some + // arbitrary cutoff controlled by a parameter. When this limit is hit, the order of + // enumeration is important. For example, a query like the following has a "contained $or" + // (within an $and): + // {a: 1, $or: [{b: 1, c: 1}, {b: 2, c: 2}]} + // For this query if there are indexes a_b={a: 1, b: 1} and a_c={a: 1, c: 1}, the normal + // enumeration order would output assignments [a_b, a_b], [a_c, a_b], [a_b, a_c], then [a_c, + // a_c]. This flag will instruct the enumerator to instead prefer a different order. It's + // hard to summarize, but perhaps the phrases "lockstep enumeration", "simultaneous + // advancement", or "parallel iteration" will help the reader. The effect is to give earlier + // enumeration to plans which use the same index of alternative across all branches. In this + // order, we would get assignments [a_b, a_b], [a_c, a_c], [a_c, a_b], then [a_b, a_c]. This + // is thought to be helpful in general, but particularly in cases where all children of the + // $or use the same fields and have the same indexes available, as in this example. + ENUMERATE_OR_CHILDREN_LOCKSTEP = 1 << 12, }; // See Options enum above. diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index ef5635e5730..dc34029f148 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -6047,4 +6047,393 @@ TEST_F(QueryPlannerTest, SolutionSetStableWhenOrEnumerationLimitIsReached) { "1}}}}}]}}"); } +// Test that we enumerate the expected plans with the special parameter set. In this test we have +// two branches of an $or, each with two possible indexed solutions. +TEST_F(QueryPlannerTest, LockstepOrEnumerationSanityCheckTwoChildrenTwoIndexesEach) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: 1, $or: [{b: 1, c: 1}, {b: 2, c: 2}]}}")); + + assertNumSolutions(6U); + + assertSolutionExists( + "{or: {nodes: [{fetch: {filter: {c: {$eq: 1} }, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}]}}"); + assertSolutionExists( + "{or: {nodes: [{fetch: {filter: {b: {$eq: 1} }, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}]}}"); + assertSolutionExists( + "{or: {nodes: [{fetch: {filter: {c: {$eq: 1} }, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}]}}"); + assertSolutionExists( + "{or: {nodes: [{fetch: {filter: {b: {$eq: 1} }, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}]}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}, c: {$eq: 1}}, {b: {$eq: 2}, c: {$eq: 2}}]}, node: " + "{ixscan: {pattern: {a: 1, b: 1}}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}, c: {$eq: 1}}, {b: {$eq: 2}, c: {$eq: 2}}]}, node: " + "{ixscan: {pattern: {a: 1, c: 1}}}}}}}"); +} + +// Test that we enumerate the expected plans with the special parameter set. In this test we have +// two branches of an $or, each with one possible indexed solution. +TEST_F(QueryPlannerTest, LockstepOrEnumerationSanityCheckTwoChildrenOneIndexEach) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + + runQueryAsCommand(fromjson("{find: 'testns', filter: {a: 1, $or: [{b: 1}, {c: 2}]}}")); + + assertNumSolutions(3U); + + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [{ixscan: {pattern: {a: 1, b: 1}}}, {ixscan: " + "{pattern: {a: 1, c: 1}}}]}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}}, {c: {$eq: 2}}]}, node: {ixscan: {pattern: {a: 1, " + "b: 1}}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}}, {c: {$eq: 2}}]}, node: {ixscan: {pattern: {a: 1, " + "c: 1}}}}}}}"); +} + +// Test that we enumerate the expected plans with the special parameter set. In this test we have +// two branches of an $or, one with one possible indexed solution, the other with two possible +// indexed solutions. +TEST_F(QueryPlannerTest, LockstepOrEnumerationSanityCheckTwoChildrenDifferentNumSolutions) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + + runQueryAsCommand(fromjson("{find: 'testns', filter: {a: 1, $or: [{b: 1}, {b: 2, c: 2}]}}")); + + assertNumSolutions(4U); + + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [{ixscan: {pattern: {a: 1, b: 1}}}, {fetch: " + "{filter: {c: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}]}}}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [{ixscan: {pattern: {a: 1, b: 1}}}, {fetch: " + "{filter: {b: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}]}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}}, {b: {$eq: 2}, c: {$eq: 2}}]}, node: {ixscan: " + "{pattern: {a: 1, b: 1}}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 1}}, {b: {$eq: 2}, c: {$eq: 2}}]}, node: {ixscan: " + "{pattern: {a: 1, c: 1}}}}}}}"); +} + +// Test that the special parameter does in fact impact the order of enumeration. Here we rely on the +// cap of number of or enumerations to prove that the plans we're interested in are enumerated +// before we hit the limit. +TEST_F(QueryPlannerTest, NormalOrEnumerationDoesNotPrioritizeLockstepIteration) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + ASSERT_EQ(internalQueryEnumerationMaxOrSolutions.load(), 10); + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + addIndex(BSON("a" << 1 << "d" << 1)); + + // For this query and the above indexes, each clause of the $or has three options to choose + // from, for a total of 3 * 3 * 3 = 27 possible enumerations for just that $or sub-branch. + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: 1, $or: [{b: 1, c: 1, d: 1}, {b: 2, c: 2, d: 2}, " + "{b: 3, c: 3, d: 3}]}}")); + + // The $or enumeration is limited to 10, and then we have three plans where just the {a: 1} + // predicate is indexed. + assertNumSolutions(13U); + + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 1}, d: {$eq: 1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2}, d: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}} " + "]}}"); + // Because we did not set the 'ENUMERATE_OR_CHILDREN_LOCKSTEP' flag, we don't expect this + // solution to be generated. This is in contrast to the next test case. + ASSERT_THROWS( + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 1}, c: {$eq: 1}}, node: {ixscan: {pattern: {a: 1, d: " + "1}}}}}, " + "{fetch: {filter: {b: {$eq: 2}, c: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, d: " + "1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: " + "1}}}}} " + "]}}"), + unittest::TestAssertionFailureException); + + // We still expect to generate the solutions which don't index the $or. + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 1}, c: {$eq: 1}, d: {$eq: 1}}, " + "{b: {$eq: 2}, c: {$eq: 2}, d: {$eq: 2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}} " + "]}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}}"); +} + +TEST_F(QueryPlannerTest, LockstepOrEnumerationDoesPrioritizeLockstepIteration) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + ASSERT_EQ(internalQueryEnumerationMaxOrSolutions.load(), 10); + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + addIndex(BSON("a" << 1 << "d" << 1)); + + // For this query and the above indexes, each clause of the $or has three options to choose + // from, for a total of 3 * 3 * 3 = 27 possible enumerations for just that $or sub-branch. + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: 1, $or: [{b: 1, c: 1, d: 1}, {b: 2, c: 2, d: 2}, " + "{b: 3, c: 3, d: 3}]}}")); + + // The $or enumeration is limited to 10, and then we have three plans where just the {a: 1} + // predicate is indexed. + assertNumSolutions(13U); + + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 1}, d: {$eq: 1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2}, d: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}} " + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 1}, d: {$eq: 1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2}, d: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}} " + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 1}, c: {$eq: 1}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2}, c: {$eq: 2}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}} " + "]}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 1}, c: {$eq: 1}, d: {$eq: 1}}, " + "{b: {$eq: 2}, c: {$eq: 2}, d: {$eq: 2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}} " + "]}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}}"); +} + +TEST_F(QueryPlannerTest, LockstepOrEnumerationDoesPrioritizeLockstepIterationMixedChildren) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + ASSERT_EQ(internalQueryEnumerationMaxOrSolutions.load(), 10); + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + addIndex(BSON("a" << 1 << "d" << 1)); + addIndex(BSON("a" << 1 << "e" << 1)); + + // For this query and the above indexes, each clause of the $or has a varying number options to + // choose from, for a total of 2 * 3 * 4 * 2 = 48 possible enumerations for just that $or + // sub-branch. + runQueryAsCommand( + fromjson("{find: 'testns', filter: {" + " a: 1," + " $or: [" + " {b: 2.1, c: 2.1}," + " {b: 3, c: 3, d: 3}," + " {b: 4, c: 4, d: 4, e: 4}," + " {b: 2.2, c: 2.2}" + "]}}")); + + // The $or enumeration is limited to 10, and then we have four plans where just the {a: 1} + // predicate is indexed. + assertNumSolutions(14U); + + // Lockstep enumerations. Definitely expected. + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}" + "]}}"); + // Everyone advances one more time, no longer lock step. + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}" + "]}}"); + // Normal enumeration. Here we observe an interesting phenomena. Before we get into plan + // enumeration, the query is parsed and "normalized". This process involves putting the query in + // a canonical order, in part so that similar queries can be recognized as such for caching. In + // this case, it orders the $or children by their respective number of children. So our original + // query will be enumerated as if it were typed in this order: + // {a: 1, + // $or: [ + // {b: 2.1, c: 2.1}, + // {b: 2.2, c: 2.2}, + // {b: 3, c: 3, d: 3}, + // {b: 4, c: 4, d: 4, e: 4} + // ] + // } + // Here are the exact plans: + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, d: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, d: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 3}, c: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, e: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, d: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, e: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, e: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, e: 1}}}}}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + "{fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + "{fetch: {filter: {c: {$eq: 3}, d: {$eq: 3}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + "{fetch: {filter: {b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}}," + " node: {ixscan: {pattern: {a: 1, e: 1}}}}}" + "]}}"); + + // Now to the solutions which don't index the $or. + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 2.1}, c: {$eq: 2.1}}, " + "{b: {$eq: 2.2}, c: {$eq: 2.2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}}, " + "{b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}} " + "]}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 2.1}, c: {$eq: 2.1}}, " + "{b: {$eq: 2.2}, c: {$eq: 2.2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}}, " + "{b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}} " + "]}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 2.1}, c: {$eq: 2.1}}, " + "{b: {$eq: 2.2}, c: {$eq: 2.2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}}, " + "{b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}} " + "]}, node: {ixscan: {pattern: {a: 1, d: 1}}}}}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [" + "{b: {$eq: 2.1}, c: {$eq: 2.1}}, " + "{b: {$eq: 2.2}, c: {$eq: 2.2}}, " + "{b: {$eq: 3}, c: {$eq: 3}, d: {$eq: 3}}, " + "{b: {$eq: 4}, c: {$eq: 4}, d: {$eq: 4}, e: {$eq: 4}} " + "]}, node: {ixscan: {pattern: {a: 1, e: 1}}}}}}"); +} + +TEST_F(QueryPlannerTest, LockstepOrEnumerationApplysToEachOrInTree) { + params.options = + QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::ENUMERATE_OR_CHILDREN_LOCKSTEP; + ASSERT_EQ(internalQueryEnumerationMaxOrSolutions.load(), 10); + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "c" << 1)); + addIndex(BSON("a" << 1 << "x" << 1)); + addIndex(BSON("a" << 1 << "y" << 1)); + + // For this query and the above indexes, each clause of the $or has 2 indexes to choose from, + // for a total of 2 * 2 * 2 * 2 = 16 possible enumerations for just that $or sub-branch. + runQueryAsCommand( + fromjson("{find: 'testns', filter: {" + " a: 1," + " $or: [" + " {b: 2.1, c: 2.1}," + " {b: 2.2, c: 2.2}," + " {$and: [" + " {unindexed: 'thisPredicateToEnsureNestedOrsAreNotCombined'}," + " {$or: [" + " {x: 3.0, y: 3.0}," + " {x: 3.1, y: 3.1}" + " ]}" + " ]}" + "]}}")); + + // The $or enumeration is limited to 10, and then we have 4 plans where just the {a: 1} + // predicate is indexed. + assertNumSolutions(14U); + + // Both lockstep enumerations should be present. + assertSolutionExists( + "{or: {nodes: [" + " {fetch: {filter: {c: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + " {fetch: {filter: {c: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, b: 1}}}}}, " + " {fetch: {" + " filter: {unindexed: {$eq: 'thisPredicateToEnsureNestedOrsAreNotCombined'}}," + " node: {" + " or: {nodes: [" + " {fetch: {filter: {y: {$eq: 3.0}}, node: {ixscan: {pattern: {a: 1, x: 1}}}}}," + " {fetch: {filter: {y: {$eq: 3.1}}, node: {ixscan: {pattern: {a: 1, x: 1}}}}}" + " ]}}" + " }}" + "]}}"); + assertSolutionExists( + "{or: {nodes: [" + " {fetch: {filter: {b: {$eq: 2.1}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + " {fetch: {filter: {b: {$eq: 2.2}}, node: {ixscan: {pattern: {a: 1, c: 1}}}}}, " + " {fetch: {" + " filter: {unindexed: {$eq: 'thisPredicateToEnsureNestedOrsAreNotCombined'}}," + " node: {" + " or: {nodes: [" + " {fetch: {filter: {x: {$eq: 3.0}}, node: {ixscan: {pattern: {a: 1, y: 1}}}}}," + " {fetch: {filter: {x: {$eq: 3.1}}, node: {ixscan: {pattern: {a: 1, y: 1}}}}}" + " ]}}" + " }}" + "]}}"); +} + } // namespace diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 20fe9824450..3115496fce1 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -528,6 +528,7 @@ void IndexScanNode::appendToString(str::stream* ss, int indent) const { *ss << "IXSCAN\n"; addIndent(ss, indent + 1); *ss << "indexName = " << index.identifier.catalogName << '\n'; + addIndent(ss, indent + 1); *ss << "keyPattern = " << index.keyPattern << '\n'; if (NULL != filter) { addIndent(ss, indent + 1); |