diff options
author | David Storch <david.storch@10gen.com> | 2015-05-29 19:15:19 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2015-07-10 17:11:16 -0400 |
commit | 15c72c8570c63e2e6ba0a3d339a8286d0be604db (patch) | |
tree | 96cc37b3259e9ce7472132b8c414eb614317c037 /src/mongo | |
parent | 9c63b79a8d5c8d19663850f9d668a3581dea77d5 (diff) | |
download | mongo-15c72c8570c63e2e6ba0a3d339a8286d0be604db.tar.gz |
SERVER-13732 rewrite contained $or queries to rooted $or in the SubplanStage
This allows queries with an $or contained within an explicit or implicit $and to be answered with
more efficient plans. It also expands the use of the SubplanStage to include contained $or queries
and therefore may reduce the number of plans considered for these queries.
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/subplan.cpp | 118 | ||||
-rw-r--r-- | src/mongo/db/exec/subplan.h | 19 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.cpp | 74 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query_test.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds.h | 7 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_test.cpp | 269 | ||||
-rw-r--r-- | src/mongo/db/query/interval.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 107 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 22 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_common.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 122 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 3 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_subplan.cpp | 270 |
17 files changed, 1021 insertions, 133 deletions
diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp index 352dd5ca36c..b0a9717ba64 100644 --- a/src/mongo/db/exec/subplan.cpp +++ b/src/mongo/db/exec/subplan.cpp @@ -40,6 +40,7 @@ #include "mongo/db/query/planner_analysis.h" #include "mongo/db/query/planner_access.h" #include "mongo/db/query/query_planner.h" +#include "mongo/db/query/query_planner_common.h" #include "mongo/db/query/stage_builder.h" #include "mongo/stdx/memory.h" #include "mongo/util/log.h" @@ -51,7 +52,6 @@ using std::unique_ptr; using std::vector; using stdx::make_unique; -// static const char* SubplanStage::kStageType = "SUBPLAN"; SubplanStage::SubplanStage(OperationContext* txn, @@ -64,21 +64,36 @@ SubplanStage::SubplanStage(OperationContext* txn, _ws(ws), _plannerParams(params), _query(cq), - _child(nullptr), _commonStats(kStageType) { invariant(_collection); } -// static -bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { - const LiteParsedQuery& lpq = query.getParsed(); - const MatchExpression* expr = query.root(); +namespace { - // Only rooted ORs work with the subplan scheme. - if (MatchExpression::OR != expr->matchType()) { +/** + * Returns true if 'expr' is an AND that contains a single OR child. + */ +bool isContainedOr(const MatchExpression* expr) { + if (MatchExpression::AND != expr->matchType()) { return false; } + size_t numOrs = 0; + for (size_t i = 0; i < expr->numChildren(); ++i) { + if (MatchExpression::OR == expr->getChild(i)->matchType()) { + ++numOrs; + } + } + + return (numOrs == 1U); +} + +} // namespace + +bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { + const LiteParsedQuery& lpq = query.getParsed(); + const MatchExpression* expr = query.root(); + // Hint provided if (!lpq.getHint().isEmpty()) { return false; @@ -106,7 +121,52 @@ bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { return false; } - return true; + // If it's a rooted $or and has none of the disqualifications above, then subplanning is + // allowed. + if (MatchExpression::OR == expr->matchType()) { + return true; + } + + // We also allow subplanning if it's a contained OR that does not have a TEXT or GEO_NEAR node. + const bool hasTextOrGeoNear = QueryPlannerCommon::hasNode(expr, MatchExpression::TEXT) || + QueryPlannerCommon::hasNode(expr, MatchExpression::GEO_NEAR); + return isContainedOr(expr) && !hasTextOrGeoNear; +} + +std::unique_ptr<MatchExpression> SubplanStage::rewriteToRootedOr( + std::unique_ptr<MatchExpression> root) { + dassert(isContainedOr(root.get())); + + // Detach the OR from the root. + std::vector<MatchExpression*>& rootChildren = *root->getChildVector(); + std::unique_ptr<MatchExpression> orChild; + for (size_t i = 0; i < rootChildren.size(); ++i) { + if (MatchExpression::OR == rootChildren[i]->matchType()) { + orChild.reset(rootChildren[i]); + rootChildren.erase(rootChildren.begin() + i); + break; + } + } + + // We should have found an OR, and the OR should have at least 2 children. + invariant(orChild); + invariant(orChild->getChildVector()); + invariant(orChild->getChildVector()->size() > 1U); + + // AND the existing root with each OR child. + std::vector<MatchExpression*>& orChildren = *orChild->getChildVector(); + for (size_t i = 0; i < orChildren.size(); ++i) { + std::unique_ptr<AndMatchExpression> ama = stdx::make_unique<AndMatchExpression>(); + ama->add(orChildren[i]); + ama->add(root->shallowClone().release()); + orChildren[i] = ama.release(); + } + + // Normalize and sort the resulting match expression. + orChild = std::unique_ptr<MatchExpression>(CanonicalQuery::normalizeTree(orChild.release())); + CanonicalQuery::sortTree(orChild.get()); + + return orChild; } Status SubplanStage::planSubqueries() { @@ -114,22 +174,26 @@ Status SubplanStage::planSubqueries() { // work that happens here, so this is needed for the time accounting to make sense. ScopedTimer timer(&_commonStats.executionTimeMillis); - MatchExpression* orExpr = _query->root(); + _orExpression = _query->root()->shallowClone(); + if (isContainedOr(_orExpression.get())) { + _orExpression = rewriteToRootedOr(std::move(_orExpression)); + invariant(CanonicalQuery::isValid(_orExpression.get(), _query->getParsed()).isOK()); + } for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { const IndexEntry& ie = _plannerParams.indices[i]; _indexMap[ie.keyPattern] = i; - LOG(5) << "Subplanner: index " << i << " is " << ie.toString() << endl; + LOG(5) << "Subplanner: index " << i << " is " << ie.toString(); } const WhereCallbackReal whereCallback(_txn, _collection->ns().db()); - for (size_t i = 0; i < orExpr->numChildren(); ++i) { + for (size_t i = 0; i < _orExpression->numChildren(); ++i) { // We need a place to shove the results from planning this branch. _branchResults.push_back(new BranchPlanningResult()); BranchPlanningResult* branchResult = _branchResults.back(); - MatchExpression* orChild = orExpr->getChild(i); + MatchExpression* orChild = _orExpression->getChild(i); // Turn the i-th child into its own query. auto statusWithCQ = CanonicalQuery::canonicalize(*_query, orChild, whereCallback); @@ -152,12 +216,12 @@ Status SubplanStage::planSubqueries() { .isOK()) { // We have a CachedSolution. Store it for later. LOG(5) << "Subplanner: cached plan found for child " << i << " of " - << orExpr->numChildren(); + << _orExpression->numChildren(); branchResult->cachedSolution.reset(rawCS); } else { // No CachedSolution found. We'll have to plan from scratch. - LOG(5) << "Subplanner: planning child " << i << " of " << orExpr->numChildren(); + LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren(); // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from // considering any plan that's a collscan. @@ -230,15 +294,11 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData, } // namespace Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { - // This is what we annotate with the index selections and then turn into a solution. - unique_ptr<OrMatchExpression> orExpr( - static_cast<OrMatchExpression*>(_query->root()->shallowClone().release())); - // This is the skeleton of index selections that is inserted into the cache. - unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); + std::unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); - for (size_t i = 0; i < orExpr->numChildren(); ++i) { - MatchExpression* orChild = orExpr->getChild(i); + for (size_t i = 0; i < _orExpression->numChildren(); ++i) { + MatchExpression* orChild = _orExpression->getChild(i); BranchPlanningResult* branchResult = _branchResults[i]; if (branchResult->cachedSolution.get()) { @@ -319,11 +379,11 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { } // Must do this before using the planner functionality. - sortUsingTags(orExpr.get()); + sortUsingTags(_orExpression.get()); - // Use the cached index assignments to build solnRoot. Takes ownership of 'orExpr'. + // Use the cached index assignments to build solnRoot. Takes ownership of '_orExpression'. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( - *_query, orExpr.release(), false, _plannerParams.indices, _plannerParams); + *_query, _orExpression.release(), false, _plannerParams.indices, _plannerParams); if (NULL == solnRoot) { mongoutils::str::stream ss; @@ -343,7 +403,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { return Status(ErrorCodes::BadValue, ss); } - LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString() << endl; + LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString(); // Use the index tags from planning each branch to construct the composite solution, // and set that solution as our child stage. @@ -360,7 +420,7 @@ Status SubplanStage::choosePlanWholeQuery(PlanYieldPolicy* yieldPolicy) { _ws->clear(); // Use the query planning module to plan the whole query. - vector<QuerySolution*> rawSolutions; + std::vector<QuerySolution*> rawSolutions; Status status = QueryPlanner::plan(*_query, _plannerParams, &rawSolutions); if (!status.isOK()) { return Status(ErrorCodes::BadValue, @@ -499,8 +559,8 @@ void SubplanStage::invalidate(OperationContext* txn, const RecordId& dl, Invalid } } -vector<PlanStage*> SubplanStage::getChildren() const { - vector<PlanStage*> children; +std::vector<PlanStage*> SubplanStage::getChildren() const { + std::vector<PlanStage*> children; if (NULL != _child.get()) { children.push_back(_child.get()); } diff --git a/src/mongo/db/exec/subplan.h b/src/mongo/db/exec/subplan.h index fe0d322db56..c2db2c34fee 100644 --- a/src/mongo/db/exec/subplan.h +++ b/src/mongo/db/exec/subplan.h @@ -39,6 +39,7 @@ #include "mongo/db/query/query_planner_params.h" #include "mongo/db/query/query_solution.h" #include "mongo/db/record_id.h" +#include "mongo/stdx/memory.h" namespace mongo { @@ -109,6 +110,19 @@ public: */ Status pickBestPlan(PlanYieldPolicy* yieldPolicy); + /** + * Takes a match expression, 'root', which has a single "contained OR". This means that + * 'root' is an AND with exactly one OR child. + * + * Returns a logically equivalent query after rewriting so that the contained OR is at the + * root of the expression tree. + * + * Used internally so that the subplanner can be used for contained OR type queries, but + * exposed for testing. + */ + static std::unique_ptr<MatchExpression> rewriteToRootedOr( + std::unique_ptr<MatchExpression> root); + // // For testing. // @@ -180,6 +194,11 @@ private: // Not owned here. CanonicalQuery* _query; + // The copy of the query that we will annotate with tags and use to construct the composite + // solution. Must be a rooted $or query, or a contained $or that has been rewritten to a + // rooted $or. + std::unique_ptr<MatchExpression> _orExpression; + // If we successfully create a "composite solution" by planning each $or branch // independently, that solution is owned here. std::unique_ptr<QuerySolution> _compositeSolution; diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index e9cbd0153bd..26bbd2946ce 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -235,23 +235,26 @@ StatusWith<std::unique_ptr<CanonicalQuery>> CanonicalQuery::canonicalize( const CanonicalQuery& baseQuery, MatchExpression* root, const MatchExpressionParser::WhereCallback& whereCallback) { + // Obtain filter for our LPQ by serializing the new match expression root. + BSONObjBuilder bob; + root->toBSON(&bob); + BSONObj filter = bob.obj(); + // Pass empty sort and projection. BSONObj emptyObj; - // 0, 0, 0 is 'ntoskip', 'ntoreturn', and 'queryoptions' - // false, false is 'snapshot' and 'explain' auto lpqStatus = LiteParsedQuery::makeAsOpQuery(baseQuery.nss(), - 0, - 0, - 0, - baseQuery.getParsed().getFilter(), + 0, // ntoskip + 0, // ntoreturn + 0, // queryOptions + filter, baseQuery.getParsed().getProj(), baseQuery.getParsed().getSort(), - emptyObj, - emptyObj, - emptyObj, - false, - false); + emptyObj, // hint + emptyObj, // min + emptyObj, // max + false, // snapshot + baseQuery.getParsed().isExplain()); if (!lpqStatus.isOK()) { return lpqStatus.getStatus(); } @@ -568,55 +571,6 @@ Status CanonicalQuery::isValid(MatchExpression* root, const LiteParsedQuery& par return Status::OK(); } -// static -// XXX TODO: This does not belong here at all. -MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) { - // Only thing we do is pull an OR up at the root. - if (MatchExpression::AND != tree->matchType()) { - return tree; - } - - // We want to bail out ASAP if we have nothing to do here. - size_t numOrs = 0; - for (size_t i = 0; i < tree->numChildren(); ++i) { - if (MatchExpression::OR == tree->getChild(i)->matchType()) { - ++numOrs; - } - } - - // Only do this for one OR right now. - if (1 != numOrs) { - return tree; - } - - // Detach the OR from the root. - invariant(NULL != tree->getChildVector()); - std::vector<MatchExpression*>& rootChildren = *tree->getChildVector(); - MatchExpression* orChild = NULL; - for (size_t i = 0; i < rootChildren.size(); ++i) { - if (MatchExpression::OR == rootChildren[i]->matchType()) { - orChild = rootChildren[i]; - rootChildren.erase(rootChildren.begin() + i); - break; - } - } - - // AND the existing root with each or child. - invariant(NULL != orChild); - invariant(NULL != orChild->getChildVector()); - std::vector<MatchExpression*>& orChildren = *orChild->getChildVector(); - for (size_t i = 0; i < orChildren.size(); ++i) { - AndMatchExpression* ama = new AndMatchExpression(); - ama->add(orChildren[i]); - ama->add(tree->shallowClone().release()); - orChildren[i] = ama; - } - delete tree; - - // Clean up any consequences from this tomfoolery. - return normalizeTree(orChild); -} - std::string CanonicalQuery::toString() const { str::stream ss; ss << "ns=" << _pq->ns(); diff --git a/src/mongo/db/query/canonical_query.h b/src/mongo/db/query/canonical_query.h index 81df734f1f9..34c93db9ab1 100644 --- a/src/mongo/db/query/canonical_query.h +++ b/src/mongo/db/query/canonical_query.h @@ -208,16 +208,6 @@ public: */ static size_t countNodes(const MatchExpression* root, MatchExpression::MatchType type); - /** - * Takes ownership of 'tree'. Performs some rewriting of the query to a logically - * equivalent but more digestible form. - * - * TODO: This doesn't entirely belong here. Really we'd do this while exploring - * solutions in an enumeration setting but given the current lack of pruning - * while exploring the enumeration space we do it here. - */ - static MatchExpression* logicalRewrite(MatchExpression* tree); - private: // You must go through canonicalize to create a CanonicalQuery. CanonicalQuery() {} diff --git a/src/mongo/db/query/canonical_query_test.cpp b/src/mongo/db/query/canonical_query_test.cpp index 35f3976afdf..7d90620e818 100644 --- a/src/mongo/db/query/canonical_query_test.cpp +++ b/src/mongo/db/query/canonical_query_test.cpp @@ -29,6 +29,7 @@ #include "mongo/db/query/canonical_query.h" #include "mongo/db/json.h" +#include "mongo/db/namespace_string.h" #include "mongo/unittest/unittest.h" namespace mongo { @@ -454,10 +455,6 @@ TEST(CanonicalQueryTest, SortTreeNumChildrenComparison) { "{$or: [{a: 1, b: 1}, {a: 1, b: 1, c: 1}]}"); } -// -// Tests for CanonicalQuery::logicalRewrite -// - /** * Utility function to create a CanonicalQuery */ @@ -479,31 +476,6 @@ std::unique_ptr<CanonicalQuery> canonicalize(const char* queryStr, return std::move(statusWithCQ.getValue()); } -// Don't do anything with a double OR. -TEST(CanonicalQueryTest, RewriteNoDoubleOr) { - string queryStr = "{$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}"; - BSONObj queryObj = fromjson(queryStr); - unique_ptr<MatchExpression> base(parseMatchExpression(queryObj)); - unique_ptr<MatchExpression> rewrite( - CanonicalQuery::logicalRewrite(base->shallowClone().release())); - assertEquivalent(queryStr.c_str(), base.get(), rewrite.get()); -} - -// Do something with a single or. -TEST(CanonicalQueryTest, RewriteSingleOr) { - // Rewrite of this... - string queryStr = "{$or:[{a:1}, {b:1}], e:1}"; - BSONObj queryObj = fromjson(queryStr); - unique_ptr<MatchExpression> rewrite( - CanonicalQuery::logicalRewrite(parseMatchExpression(queryObj))); - - // Should look like this. - string rewriteStr = "{$or:[{a:1, e:1}, {b:1, e:1}]}"; - BSONObj rewriteObj = fromjson(rewriteStr); - unique_ptr<MatchExpression> base(parseMatchExpression(rewriteObj)); - assertEquivalent(queryStr.c_str(), base.get(), rewrite.get()); -} - /** * Test function for CanonicalQuery::normalize. */ @@ -537,5 +509,25 @@ TEST(CanonicalQueryTest, NormalizeQueryTree) { "{$and: [{a: 1}, {b: 1}, {c: 1}]}"); } +TEST(CanonicalQueryTest, CanonicalizeFromBaseQuery) { + const bool isExplain = true; + const std::string cmdStr = + "{find:'bogusns', filter:{$or:[{a:1,b:1},{a:1,c:1}]}, projection:{a:1}, sort:{b:1}}"; + auto lpq = assertGet(LiteParsedQuery::makeFromFindCommand(nss, fromjson(cmdStr), isExplain)); + auto baseCq = assertGet(CanonicalQuery::canonicalize(lpq.release())); + + MatchExpression* firstClauseExpr = baseCq->root()->getChild(0); + auto childCq = assertGet(CanonicalQuery::canonicalize(*baseCq, firstClauseExpr)); + + BSONObjBuilder expectedFilterBuilder; + firstClauseExpr->toBSON(&expectedFilterBuilder); + BSONObj expectedFilter = expectedFilterBuilder.obj(); + + ASSERT_EQ(childCq->getParsed().getFilter(), expectedFilter); + ASSERT_EQ(childCq->getParsed().getProj(), baseCq->getParsed().getProj()); + ASSERT_EQ(childCq->getParsed().getSort(), baseCq->getParsed().getSort()); + ASSERT_TRUE(childCq->getParsed().isExplain()); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp index dafd6e7fce8..85708c6c09c 100644 --- a/src/mongo/db/query/index_bounds.cpp +++ b/src/mongo/db/query/index_bounds.cpp @@ -29,6 +29,7 @@ #include "mongo/db/query/index_bounds.h" #include <algorithm> +#include <tuple> #include <utility> namespace mongo { @@ -93,6 +94,33 @@ Interval IndexBounds::getInterval(size_t i, size_t j) const { } } +bool IndexBounds::operator==(const IndexBounds& other) const { + if (this->isSimpleRange != other.isSimpleRange) { + return false; + } + + if (this->isSimpleRange) { + return std::tie(this->startKey, this->endKey, this->endKeyInclusive) == + std::tie(other.startKey, other.endKey, other.endKeyInclusive); + } + + if (this->fields.size() != other.fields.size()) { + return false; + } + + for (size_t i = 0; i < this->fields.size(); ++i) { + if (this->fields[i] != other.fields[i]) { + return false; + } + } + + return true; +} + +bool IndexBounds::operator!=(const IndexBounds& other) const { + return !(*this == other); +} + string OrderedIntervalList::toString() const { mongoutils::str::stream ss; ss << "['" << name << "']: "; @@ -105,6 +133,28 @@ string OrderedIntervalList::toString() const { return ss; } +bool OrderedIntervalList::operator==(const OrderedIntervalList& other) const { + if (this->name != other.name) { + return false; + } + + if (this->intervals.size() != other.intervals.size()) { + return false; + } + + for (size_t i = 0; i < this->intervals.size(); ++i) { + if (this->intervals[i] != other.intervals[i]) { + return false; + } + } + + return true; +} + +bool OrderedIntervalList::operator!=(const OrderedIntervalList& other) const { + return !(*this == other); +} + // static void OrderedIntervalList::complement() { BSONObjBuilder minBob; diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h index f70f75def8b..e3fabde17b7 100644 --- a/src/mongo/db/query/index_bounds.h +++ b/src/mongo/db/query/index_bounds.h @@ -47,7 +47,6 @@ struct OrderedIntervalList { // Must be ordered according to the index order. std::vector<Interval> intervals; - // TODO: We could drop this. Only used in IndexBounds::isValidFor. std::string name; bool isValidFor(int expectedOrientation) const; @@ -65,6 +64,9 @@ struct OrderedIntervalList { * where this OIL has direction==1. */ void complement(); + + bool operator==(const OrderedIntervalList& other) const; + bool operator!=(const OrderedIntervalList& other) const; }; /** @@ -95,6 +97,9 @@ struct IndexBounds { Interval getInterval(size_t i, size_t j) const; std::string toString() const; + bool operator==(const IndexBounds& other) const; + bool operator!=(const IndexBounds& other) const; + /** * BSON format for explain. The format is an array of strings for each field. * Each string represents an interval. The strings use "[" and "]" if the interval diff --git a/src/mongo/db/query/index_bounds_test.cpp b/src/mongo/db/query/index_bounds_test.cpp index d1613ca0c94..f04d495cf4d 100644 --- a/src/mongo/db/query/index_bounds_test.cpp +++ b/src/mongo/db/query/index_bounds_test.cpp @@ -249,6 +249,275 @@ TEST(IndexBoundsTest, ComplementRanges2) { } // +// Equality +// + +TEST(IndexBoundsTest, IndexBoundsEqual) { + // Both sets of bounds are {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil1; + oil1.name = "a"; + oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil2; + oil2.name = "b"; + oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds1; + bounds1.fields.push_back(oil1); + bounds1.fields.push_back(oil2); + + OrderedIntervalList oil3; + oil3.name = "a"; + oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil4; + oil4.name = "b"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds2; + bounds2.fields.push_back(oil3); + bounds2.fields.push_back(oil4); + + ASSERT_TRUE(bounds1 == bounds2); + ASSERT_FALSE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, EmptyBoundsEqual) { + IndexBounds bounds1; + IndexBounds bounds2; + ASSERT_TRUE(bounds1 == bounds2); + ASSERT_FALSE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, SimpleRangeBoundsEqual) { + IndexBounds bounds1; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + IndexBounds bounds2; + bounds2.isSimpleRange = true; + bounds2.startKey = BSON("" << 1 << "" << 3); + bounds2.endKey = BSON("" << 2 << "" << 4); + bounds2.endKeyInclusive = false; + + ASSERT_TRUE(bounds1 == bounds2); + ASSERT_FALSE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, BoundsNotEqualDifferentFieldNames) { + // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil1; + oil1.name = "a"; + oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil2; + oil2.name = "b"; + oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds1; + bounds1.fields.push_back(oil1); + bounds1.fields.push_back(oil2); + + // Second set of bounds identical except for the second field name: + // {a: [[1, 3), (4, 5]], c: [[1,1]]} + OrderedIntervalList oil3; + oil3.name = "a"; + oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil4; + oil4.name = "c"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds2; + bounds2.fields.push_back(oil3); + bounds2.fields.push_back(oil4); + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, BoundsNotEqualOneRangeExclusive) { + // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil1; + oil1.name = "a"; + oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil2; + oil2.name = "b"; + oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds1; + bounds1.fields.push_back(oil1); + bounds1.fields.push_back(oil2); + + // Second set of bounds identical except for (4, 5] changed to (4, 5): + // {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil3; + oil3.name = "a"; + oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, false)); + + OrderedIntervalList oil4; + oil4.name = "b"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds2; + bounds2.fields.push_back(oil3); + bounds2.fields.push_back(oil4); + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, BoundsNotEqualExtraInterval) { + // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil1; + oil1.name = "a"; + oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil2; + oil2.name = "b"; + oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds1; + bounds1.fields.push_back(oil1); + bounds1.fields.push_back(oil2); + + // Second set of bounds has an additional interval for field 'a': + // {a: [[1, 3), (4, 5], (6, 7)], b: [[1,1]]} + OrderedIntervalList oil3; + oil3.name = "a"; + oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + oil3.intervals.push_back(Interval(BSON("" << 6 << "" << 7), false, false)); + + OrderedIntervalList oil4; + oil4.name = "b"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds2; + bounds2.fields.push_back(oil3); + bounds2.fields.push_back(oil4); + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, BoundsNotEqualExtraField) { + // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]} + OrderedIntervalList oil1; + oil1.name = "a"; + oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil2; + oil2.name = "b"; + oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds1; + bounds1.fields.push_back(oil1); + bounds1.fields.push_back(oil2); + + // Second set of bounds has an additional field 'c': + // {a: [[1, 3), (4, 5]], b: [[1,1]], c: [[1]]} + OrderedIntervalList oil3; + oil3.name = "a"; + oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false)); + oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true)); + + OrderedIntervalList oil4; + oil4.name = "b"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + OrderedIntervalList oil5; + oil4.name = "c"; + oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + + IndexBounds bounds2; + bounds2.fields.push_back(oil3); + bounds2.fields.push_back(oil4); + bounds2.fields.push_back(oil5); + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualToRegularBounds) { + IndexBounds bounds1; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + IndexBounds bounds2; + OrderedIntervalList oil; + oil.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true)); + bounds2.fields.push_back(oil); + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentStartKey) { + IndexBounds bounds1; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + IndexBounds bounds2; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 1); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKey) { + IndexBounds bounds1; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + IndexBounds bounds2; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 99); + bounds1.endKeyInclusive = false; + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKeyInclusive) { + IndexBounds bounds1; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = false; + + IndexBounds bounds2; + bounds1.isSimpleRange = true; + bounds1.startKey = BSON("" << 1 << "" << 3); + bounds1.endKey = BSON("" << 2 << "" << 4); + bounds1.endKeyInclusive = true; + + ASSERT_FALSE(bounds1 == bounds2); + ASSERT_TRUE(bounds1 != bounds2); +} + +// // Iteration over // diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h index 88309d33e05..9743a989321 100644 --- a/src/mongo/db/query/interval.h +++ b/src/mongo/db/query/interval.h @@ -190,4 +190,8 @@ inline bool operator==(const Interval& lhs, const Interval& rhs) { return lhs.compare(rhs) == Interval::INTERVAL_EQUALS; } +inline bool operator!=(const Interval& lhs, const Interval& rhs) { + return !(lhs == rhs); +} + } // namespace mongo diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index c91b93bb785..fc494f255aa 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -28,11 +28,14 @@ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery +#include "mongo/platform/basic.h" + #include "mongo/db/query/planner_access.h" #include <algorithm> #include <vector> +#include "mongo/base/owned_pointer_vector.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" @@ -56,6 +59,51 @@ bool isTextNode(const QuerySolutionNode* node) { return STAGE_TEXT == node->getType(); } +/** + * Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null. + */ +FetchNode* getFetchNode(QuerySolutionNode* node) { + if (STAGE_FETCH != node->getType()) { + return nullptr; + } + + return static_cast<FetchNode*>(node); +} + +/** + * If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an + * IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns + * null. + */ +const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) { + if (STAGE_IXSCAN == node->getType()) { + return static_cast<const IndexScanNode*>(node); + } else if (STAGE_FETCH == node->getType()) { + invariant(1U == node->children.size()); + const QuerySolutionNode* child = node->children[0]; + if (STAGE_IXSCAN == child->getType()) { + return static_cast<const IndexScanNode*>(child); + } + } + + return nullptr; +} + +/** + * Takes as input two query solution nodes returned by processIndexScans(). If both are + * IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical + * (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false. + */ +bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) { + const IndexScanNode* leftIxscan = getIndexScanNode(lhs); + const IndexScanNode* rightIxscan = getIndexScanNode(rhs); + if (!leftIxscan || !rightIxscan) { + return false; + } + + return *leftIxscan == *rightIxscan; +} + } // namespace namespace mongo { @@ -567,6 +615,61 @@ void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node, } // static +std::vector<QuerySolutionNode*> QueryPlannerAccess::collapseEquivalentScans( + const std::vector<QuerySolutionNode*> scans) { + OwnedPointerVector<QuerySolutionNode> ownedScans(scans); + invariant(ownedScans.size() > 0); + + // Scans that need to be collapsed will be adjacent to each other in the list due to how we + // sort the query predicate. We step through the list, either merging the current scan into + // the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't + // be merged. + OwnedPointerVector<QuerySolutionNode> collapsedScans; + + collapsedScans.push_back(ownedScans.releaseAt(0)); + for (size_t i = 1; i < ownedScans.size(); ++i) { + if (scansAreEquivalent(collapsedScans.back(), ownedScans[i])) { + // We collapse the entry from 'ownedScans' into the back of 'collapsedScans'. + std::unique_ptr<QuerySolutionNode> collapseFrom(ownedScans.releaseAt(i)); + FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get()); + FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back()); + + // If there's no filter associated with a fetch node on 'collapseFrom', all we have to + // do is clear the filter on the node that we are collapsing into. + if (!collapseFromFetch || !collapseFromFetch->filter.get()) { + if (collapseIntoFetch) { + collapseIntoFetch->filter.reset(); + } + continue; + } + + // If there's no filter associated with a fetch node on the back of the 'collapsedScans' + // list, then there's nothing more to do. + if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) { + continue; + } + + // Both the 'from' and 'into' nodes have filters. We join them with an + // OrMatchExpression. + std::unique_ptr<OrMatchExpression> collapsedFilter = + stdx::make_unique<OrMatchExpression>(); + collapsedFilter->add(collapseFromFetch->filter.release()); + collapsedFilter->add(collapseIntoFetch->filter.release()); + + // Normalize the filter and add it to 'into'. + collapseIntoFetch->filter.reset( + CanonicalQuery::normalizeTree(collapsedFilter.release())); + } else { + // Scans are not equivalent and can't be collapsed. + collapsedScans.push_back(ownedScans.releaseAt(i)); + } + } + + invariant(collapsedScans.size() > 0); + return collapsedScans.release(); +} + +// static bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, MatchExpression* root, bool inArrayOperator, @@ -962,6 +1065,10 @@ QuerySolutionNode* QueryPlannerAccess::buildIndexedOr(const CanonicalQuery& quer return NULL; } + // If all index scans are identical, then we collapse them into a single scan. This prevents + // us from creating OR plans where the branches of the OR perform duplicate work. + ixscanNodes = collapseEquivalentScans(ixscanNodes); + QuerySolutionNode* orResult = NULL; // An OR of one node is just that node. diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index 8ab6bf9d58a..a5844f4fa58 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -265,6 +265,28 @@ public: std::vector<MatchExpression*>* subnodesOut); /** + * Given a list of OR-related subtrees returned by processIndexScans(), looks for logically + * equivalent IndexScanNodes and combines them. This is an optimization to avoid creating + * plans that repeat index access work. + * + * Example: + * Suppose processIndexScans() returns a list of the following three query solutions: + * 1) IXSCAN (bounds: {b: [[2,2]]}) + * 2) FETCH (filter: {d:1}) -> IXSCAN (bounds: {c: [[3,3]]}) + * 3) FETCH (filter: {e:1}) -> IXSCAN (bounds: {c: [[3,3]]}) + * This method would collapse scans #2 and #3, resulting in the following output: + * 1) IXSCAN (bounds: {b: [[2,2]]}) + * 2) FETCH (filter: {$or:[{d:1}, {e:1}]}) -> IXSCAN (bounds: {c: [[3,3]]}) + * + * Used as a helper for buildIndexedOr(). + * + * Takes ownership of 'scans'. The caller assumes ownership of the pointers in the returned + * list of QuerySolutionNode*. + */ + static std::vector<QuerySolutionNode*> collapseEquivalentScans( + const std::vector<QuerySolutionNode*> scans); + + /** * Helper used by buildIndexedAnd and buildIndexedOr. * * The children of AND and OR nodes are sorted by the index that the subtree rooted at diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 073e60f10fa..8cffd85bd5c 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -662,7 +662,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query, << query.root()->toString(); // If there is a GEO_NEAR it must have an index it can use directly. - MatchExpression* gnNode = NULL; + const MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); @@ -677,7 +677,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query, } // Likewise, if there is a TEXT it must have an index it can use directly. - MatchExpression* textNode = NULL; + const MatchExpression* textNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); diff --git a/src/mongo/db/query/query_planner_common.h b/src/mongo/db/query/query_planner_common.h index 4fdf8f30815..d62c9c7a3ba 100644 --- a/src/mongo/db/query/query_planner_common.h +++ b/src/mongo/db/query/query_planner_common.h @@ -44,9 +44,9 @@ public: * * If 'out' is not NULL, sets 'out' to the first node of type 'type' encountered. */ - static bool hasNode(MatchExpression* root, + static bool hasNode(const MatchExpression* root, MatchExpression::MatchType type, - MatchExpression** out = NULL) { + const MatchExpression** out = NULL) { if (type == root->matchType()) { if (NULL != out) { *out = root; diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 4768eed4312..9950b295cc1 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -706,6 +706,128 @@ TEST_F(QueryPlannerTest, OrOfAnd6) { " b: [[1,1,true,true], [5,5,true,true]]}}}]}}}}"); } +// We do collapse OR of ANDs if branches of the OR plan are using identical index scans. +TEST_F(QueryPlannerTest, RootedOrOfAndCollapseIndenticalScans) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{$or: [{a:1, b:2}, {a:1, b:2}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {pattern: {a: 1, b: 1}}," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}," + "filter: null}}}"); +} + +TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScans) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}}," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}," + "filter: null}}}"); +} + +TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2, d:3}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}}," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}," + "filter: null}}}"); +} + +TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter2) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{c: 1, $or: [{a:{$gte:1,$lte:1}, b:2}, {a:1, b:2, d:3}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {c: 1}, node: {fetch: {filter: null, node: " + "{ixscan: {pattern: {a: 1, b: 1}}," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}," + "filter: null}}}}}"); +} + +TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIdenticalScansTwoFilters) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{c: 1, $or: [{a:1, b:2, d:3}, {a:1, b:2, e:4}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {c: 1}, node: {fetch: {filter: {$or:[{e:4},{d:3}]}," + "node: {ixscan: {pattern: {a: 1, b: 1}, filter: null," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}}}"); +} + +TEST_F(QueryPlannerTest, RootedOrOfAndCollapseScansExistingOrFilter) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{$or: [{a:1, b:2, $or: [{c:3}, {d:4}]}, {a:1, b:2, e:5}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {$or: [{e:5},{c:3},{d:4}]}, node: {ixscan: " + "{filter: null, pattern: {a: 1, b: 1}, " + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThird) { + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("c" << 1 << "d" << 1)); + runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 2}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [" + "{ixscan: {pattern: {a: 1, b: 1}, filter: null," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}," + "{ixscan: {pattern: {c: 1, d: 1}, filter: null," + "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}"); +} + +TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThirdWithFilters) { + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("c" << 1 << "d" << 1)); + runQuery(fromjson("{$or: [{a:1, b:2, e:5}, {c:3, d:4}, {a:1, b:2, f:6}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [" + "{fetch: {filter: {$or: [{f:6},{e:5}]}, node: " + "{ixscan: {pattern: {a: 1, b: 1}, filter: null," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}," + "{ixscan: {pattern: {c: 1, d: 1}, filter: null," + "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}"); +} + +TEST_F(QueryPlannerTest, RootedOrOfAndDontCollapseDifferentBounds) { + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("c" << 1 << "d" << 1)); + runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 99}]}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [" + "{ixscan: {pattern: {a: 1, b: 1}, filter: null," + "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}," + "{ixscan: {pattern: {a: 1, b: 1}, filter: null," + "bounds: {a: [[1,1,true,true]], b: [[99,99,true,true]]}}}," + "{ixscan: {pattern: {c: 1, d: 1}, filter: null," + "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}"); +} + // SERVER-13960: properly handle $or with a mix of exact and inexact predicates. TEST_F(QueryPlannerTest, OrInexactWithExact) { addIndex(BSON("name" << 1)); diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 85b0cb7933c..5bc7ff5276c 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -578,6 +578,27 @@ QuerySolutionNode* IndexScanNode::clone() const { return copy; } +namespace { + +bool filtersAreEquivalent(const MatchExpression* lhs, const MatchExpression* rhs) { + if (!lhs && !rhs) { + return true; + } else if (!lhs || !rhs) { + return false; + } else { + return lhs->equivalent(rhs); + } +} + +} // namespace + +bool IndexScanNode::operator==(const IndexScanNode& other) const { + return filtersAreEquivalent(filter.get(), other.filter.get()) && + indexKeyPattern == other.indexKeyPattern && indexIsMultiKey == other.indexIsMultiKey && + direction == other.direction && maxScan == other.maxScan && + addKeyMetadata == other.addKeyMetadata && bounds == other.bounds; +} + // // ProjectionNode // diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 8de5dabb40f..c5c6ca36dd2 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -28,6 +28,7 @@ #pragma once +#include <memory> #include "mongo/db/jsobj.h" #include "mongo/db/matcher/expression.h" @@ -458,6 +459,8 @@ struct IndexScanNode : public QuerySolutionNode { QuerySolutionNode* clone() const; + bool operator==(const IndexScanNode& other) const; + BSONObjSet _sorts; BSONObj indexKeyPattern; diff --git a/src/mongo/dbtests/query_stage_subplan.cpp b/src/mongo/dbtests/query_stage_subplan.cpp index 2231240f2c4..ffe592eba90 100644 --- a/src/mongo/dbtests/query_stage_subplan.cpp +++ b/src/mongo/dbtests/query_stage_subplan.cpp @@ -26,6 +26,10 @@ * then also delete it in the license file. */ +#include "mongo/platform/basic.h" + +#include <memory> + #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/database.h" #include "mongo/db/db_raii.h" @@ -62,6 +66,21 @@ public: } protected: + /** + * Parses the json string 'findCmd', specifying a find command, to a CanonicalQuery. + */ + std::unique_ptr<CanonicalQuery> cqFromFindCommand(const std::string& findCmd) { + BSONObj cmdObj = fromjson(findCmd); + + const NamespaceString nss("testns.testcoll"); + bool isExplain = false; + auto lpq = + unittest::assertGet(LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain)); + + auto cq = unittest::assertGet(CanonicalQuery::canonicalize(lpq.release())); + return cq; + } + OperationContextImpl _txn; private: @@ -163,6 +182,254 @@ public: } }; +/** + * Unit test the subplan stage's canUseSubplanning() method. + */ +class QueryStageSubplanCanUseSubplanning : public QueryStageSubplanBase { +public: + void run() { + // We won't try and subplan something that doesn't have an $or. + { + std::string findCmd = "{find: 'testns', filter: {$and:[{a:1}, {b:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Don't try and subplan if there is no filter. + { + std::string findCmd = "{find: 'testns'}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // We won't try and subplan two contained ORs. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning if there is a hint. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}," + "hint: {a:1, b:1}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning with min. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}," + "min: {a:1, b:1}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning with max. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}," + "max: {a:2, b:2}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning with tailable. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}," + "tailable: true}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning with snapshot. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}," + "snapshot: true}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can use subplanning for rooted $or. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq)); + + std::string findCmd2 = + "{find: 'testns'," + "filter: {$or: [{a:1}, {c:1}]}}"; + std::unique_ptr<CanonicalQuery> cq2 = cqFromFindCommand(findCmd2); + ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq2)); + } + + // Can use subplanning for a single contained $or. + { + std::string findCmd = + "{find: 'testns'," + "filter: {e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can use subplanning if the contained $or query has a geo predicate. + { + std::string findCmd = + "{find: 'testns'," + "filter: {loc: {$geoWithin: {$centerSphere: [[0,0], 1]}}," + "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning if the contained $or query also has a $text predicate. + { + std::string findCmd = + "{find: 'testns'," + "filter: {$text: {$search: 'foo'}," + "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + + // Can't use subplanning if the contained $or query also has a $near predicate. + { + std::string findCmd = + "{find: 'testns'," + "filter: {loc: {$near: [0, 0]}," + "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}"; + std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd); + ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq)); + } + } +}; + +/** + * Unit test the subplan stage's rewriteToRootedOr() method. + */ +class QueryStageSubplanRewriteToRootedOr : public QueryStageSubplanBase { +public: + void run() { + // Rewrite (AND (OR a b) e) => (OR (AND a e) (AND b e)) + { + BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1}"); + StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj); + ASSERT_OK(expr.getStatus()); + std::unique_ptr<MatchExpression> rewrittenExpr = + SubplanStage::rewriteToRootedOr(std::move(expr.getValue())); + + std::string findCmdRewritten = + "{find: 'testns'," + "filter: {$or:[{a:1,e:1}, {b:1,e:1}]}}"; + std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten); + + ASSERT(rewrittenExpr->equivalent(cqRewritten->root())); + } + + // Rewrite (AND (OR a b) e f) => (OR (AND a e f) (AND b e f)) + { + BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1, f:1}"); + StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj); + ASSERT_OK(expr.getStatus()); + std::unique_ptr<MatchExpression> rewrittenExpr = + SubplanStage::rewriteToRootedOr(std::move(expr.getValue())); + + std::string findCmdRewritten = + "{find: 'testns'," + "filter: {$or:[{a:1,e:1,f:1}, {b:1,e:1,f:1}]}}"; + std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten); + + ASSERT(rewrittenExpr->equivalent(cqRewritten->root())); + } + + // Rewrite (AND (OR (AND a b) (AND c d) e f) => (OR (AND a b e f) (AND c d e f)) + { + BSONObj queryObj = fromjson("{$or:[{a:1,b:1}, {c:1,d:1}], e:1,f:1}"); + StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj); + ASSERT_OK(expr.getStatus()); + std::unique_ptr<MatchExpression> rewrittenExpr = + SubplanStage::rewriteToRootedOr(std::move(expr.getValue())); + + std::string findCmdRewritten = + "{find: 'testns'," + "filter: {$or:[{a:1,b:1,e:1,f:1}," + "{c:1,d:1,e:1,f:1}]}}"; + std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten); + + ASSERT(rewrittenExpr->equivalent(cqRewritten->root())); + } + } +}; + +/** + * Test the subplan stage's ability to answer a contained $or query. + */ +class QueryStageSubplanPlanRootedOr : public QueryStageSubplanBase { +public: + void run() { + OldClientWriteContext ctx(&_txn, ns()); + addIndex(BSON("b" << 1 << "a" << 1)); + addIndex(BSON("c" << 1 << "a" << 1)); + + BSONObj query = fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}"); + + // Two of these documents match. + insert(BSON("_id" << 1 << "a" << 1 << "b" << 2)); + insert(BSON("_id" << 2 << "a" << 2 << "b" << 2)); + insert(BSON("_id" << 3 << "a" << 1 << "c" << 3)); + insert(BSON("_id" << 4 << "a" << 1 << "c" << 4)); + + auto cq = unittest::assertGet(CanonicalQuery::canonicalize(ns(), query)); + + Collection* collection = ctx.getCollection(); + + // Get planner params. + QueryPlannerParams plannerParams; + fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams); + + WorkingSet ws; + std::unique_ptr<SubplanStage> subplan( + new SubplanStage(&_txn, collection, &ws, plannerParams, cq.get())); + + // Plan selection should succeed due to falling back on regular planning. + PlanYieldPolicy yieldPolicy(NULL, PlanExecutor::YIELD_MANUAL); + ASSERT_OK(subplan->pickBestPlan(&yieldPolicy)); + + // Work the stage until it produces all results. + size_t numResults = 0; + PlanStage::StageState stageState = PlanStage::NEED_TIME; + while (stageState != PlanStage::IS_EOF) { + WorkingSetID id = WorkingSet::INVALID_ID; + stageState = subplan->work(&id); + ASSERT_NE(stageState, PlanStage::DEAD); + ASSERT_NE(stageState, PlanStage::FAILURE); + + if (stageState == PlanStage::ADVANCED) { + ++numResults; + WorkingSetMember* member = ws.get(id); + ASSERT(member->hasObj()); + ASSERT(member->obj.value() == BSON("_id" << 1 << "a" << 1 << "b" << 2) || + member->obj.value() == BSON("_id" << 3 << "a" << 1 << "c" << 3)); + } + } + + ASSERT_EQ(numResults, 2U); + } +}; + class All : public Suite { public: All() : Suite("query_stage_subplan") {} @@ -170,6 +437,9 @@ public: void setupTests() { add<QueryStageSubplanGeo2dOr>(); add<QueryStageSubplanPlanFromCache>(); + add<QueryStageSubplanCanUseSubplanning>(); + add<QueryStageSubplanRewriteToRootedOr>(); + add<QueryStageSubplanPlanRootedOr>(); } }; |