summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2015-05-29 19:15:19 -0400
committerDavid Storch <david.storch@10gen.com>2015-07-10 17:11:16 -0400
commit15c72c8570c63e2e6ba0a3d339a8286d0be604db (patch)
tree96cc37b3259e9ce7472132b8c414eb614317c037
parent9c63b79a8d5c8d19663850f9d668a3581dea77d5 (diff)
downloadmongo-15c72c8570c63e2e6ba0a3d339a8286d0be604db.tar.gz
SERVER-13732 rewrite contained $or queries to rooted $or in the SubplanStage
This allows queries with an $or contained within an explicit or implicit $and to be answered with more efficient plans. It also expands the use of the SubplanStage to include contained $or queries and therefore may reduce the number of plans considered for these queries.
-rw-r--r--src/mongo/db/exec/subplan.cpp118
-rw-r--r--src/mongo/db/exec/subplan.h19
-rw-r--r--src/mongo/db/query/canonical_query.cpp74
-rw-r--r--src/mongo/db/query/canonical_query.h10
-rw-r--r--src/mongo/db/query/canonical_query_test.cpp50
-rw-r--r--src/mongo/db/query/index_bounds.cpp50
-rw-r--r--src/mongo/db/query/index_bounds.h7
-rw-r--r--src/mongo/db/query/index_bounds_test.cpp269
-rw-r--r--src/mongo/db/query/interval.h4
-rw-r--r--src/mongo/db/query/planner_access.cpp107
-rw-r--r--src/mongo/db/query/planner_access.h22
-rw-r--r--src/mongo/db/query/query_planner.cpp4
-rw-r--r--src/mongo/db/query/query_planner_common.h4
-rw-r--r--src/mongo/db/query/query_planner_test.cpp122
-rw-r--r--src/mongo/db/query/query_solution.cpp21
-rw-r--r--src/mongo/db/query/query_solution.h3
-rw-r--r--src/mongo/dbtests/query_stage_subplan.cpp270
17 files changed, 1021 insertions, 133 deletions
diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp
index 352dd5ca36c..b0a9717ba64 100644
--- a/src/mongo/db/exec/subplan.cpp
+++ b/src/mongo/db/exec/subplan.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/query/planner_analysis.h"
#include "mongo/db/query/planner_access.h"
#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/query_planner_common.h"
#include "mongo/db/query/stage_builder.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/log.h"
@@ -51,7 +52,6 @@ using std::unique_ptr;
using std::vector;
using stdx::make_unique;
-// static
const char* SubplanStage::kStageType = "SUBPLAN";
SubplanStage::SubplanStage(OperationContext* txn,
@@ -64,21 +64,36 @@ SubplanStage::SubplanStage(OperationContext* txn,
_ws(ws),
_plannerParams(params),
_query(cq),
- _child(nullptr),
_commonStats(kStageType) {
invariant(_collection);
}
-// static
-bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
- const LiteParsedQuery& lpq = query.getParsed();
- const MatchExpression* expr = query.root();
+namespace {
- // Only rooted ORs work with the subplan scheme.
- if (MatchExpression::OR != expr->matchType()) {
+/**
+ * Returns true if 'expr' is an AND that contains a single OR child.
+ */
+bool isContainedOr(const MatchExpression* expr) {
+ if (MatchExpression::AND != expr->matchType()) {
return false;
}
+ size_t numOrs = 0;
+ for (size_t i = 0; i < expr->numChildren(); ++i) {
+ if (MatchExpression::OR == expr->getChild(i)->matchType()) {
+ ++numOrs;
+ }
+ }
+
+ return (numOrs == 1U);
+}
+
+} // namespace
+
+bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
+ const LiteParsedQuery& lpq = query.getParsed();
+ const MatchExpression* expr = query.root();
+
// Hint provided
if (!lpq.getHint().isEmpty()) {
return false;
@@ -106,7 +121,52 @@ bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
return false;
}
- return true;
+ // If it's a rooted $or and has none of the disqualifications above, then subplanning is
+ // allowed.
+ if (MatchExpression::OR == expr->matchType()) {
+ return true;
+ }
+
+ // We also allow subplanning if it's a contained OR that does not have a TEXT or GEO_NEAR node.
+ const bool hasTextOrGeoNear = QueryPlannerCommon::hasNode(expr, MatchExpression::TEXT) ||
+ QueryPlannerCommon::hasNode(expr, MatchExpression::GEO_NEAR);
+ return isContainedOr(expr) && !hasTextOrGeoNear;
+}
+
+std::unique_ptr<MatchExpression> SubplanStage::rewriteToRootedOr(
+ std::unique_ptr<MatchExpression> root) {
+ dassert(isContainedOr(root.get()));
+
+ // Detach the OR from the root.
+ std::vector<MatchExpression*>& rootChildren = *root->getChildVector();
+ std::unique_ptr<MatchExpression> orChild;
+ for (size_t i = 0; i < rootChildren.size(); ++i) {
+ if (MatchExpression::OR == rootChildren[i]->matchType()) {
+ orChild.reset(rootChildren[i]);
+ rootChildren.erase(rootChildren.begin() + i);
+ break;
+ }
+ }
+
+ // We should have found an OR, and the OR should have at least 2 children.
+ invariant(orChild);
+ invariant(orChild->getChildVector());
+ invariant(orChild->getChildVector()->size() > 1U);
+
+ // AND the existing root with each OR child.
+ std::vector<MatchExpression*>& orChildren = *orChild->getChildVector();
+ for (size_t i = 0; i < orChildren.size(); ++i) {
+ std::unique_ptr<AndMatchExpression> ama = stdx::make_unique<AndMatchExpression>();
+ ama->add(orChildren[i]);
+ ama->add(root->shallowClone().release());
+ orChildren[i] = ama.release();
+ }
+
+ // Normalize and sort the resulting match expression.
+ orChild = std::unique_ptr<MatchExpression>(CanonicalQuery::normalizeTree(orChild.release()));
+ CanonicalQuery::sortTree(orChild.get());
+
+ return orChild;
}
Status SubplanStage::planSubqueries() {
@@ -114,22 +174,26 @@ Status SubplanStage::planSubqueries() {
// work that happens here, so this is needed for the time accounting to make sense.
ScopedTimer timer(&_commonStats.executionTimeMillis);
- MatchExpression* orExpr = _query->root();
+ _orExpression = _query->root()->shallowClone();
+ if (isContainedOr(_orExpression.get())) {
+ _orExpression = rewriteToRootedOr(std::move(_orExpression));
+ invariant(CanonicalQuery::isValid(_orExpression.get(), _query->getParsed()).isOK());
+ }
for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
const IndexEntry& ie = _plannerParams.indices[i];
_indexMap[ie.keyPattern] = i;
- LOG(5) << "Subplanner: index " << i << " is " << ie.toString() << endl;
+ LOG(5) << "Subplanner: index " << i << " is " << ie.toString();
}
const WhereCallbackReal whereCallback(_txn, _collection->ns().db());
- for (size_t i = 0; i < orExpr->numChildren(); ++i) {
+ for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
// We need a place to shove the results from planning this branch.
_branchResults.push_back(new BranchPlanningResult());
BranchPlanningResult* branchResult = _branchResults.back();
- MatchExpression* orChild = orExpr->getChild(i);
+ MatchExpression* orChild = _orExpression->getChild(i);
// Turn the i-th child into its own query.
auto statusWithCQ = CanonicalQuery::canonicalize(*_query, orChild, whereCallback);
@@ -152,12 +216,12 @@ Status SubplanStage::planSubqueries() {
.isOK()) {
// We have a CachedSolution. Store it for later.
LOG(5) << "Subplanner: cached plan found for child " << i << " of "
- << orExpr->numChildren();
+ << _orExpression->numChildren();
branchResult->cachedSolution.reset(rawCS);
} else {
// No CachedSolution found. We'll have to plan from scratch.
- LOG(5) << "Subplanner: planning child " << i << " of " << orExpr->numChildren();
+ LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren();
// We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from
// considering any plan that's a collscan.
@@ -230,15 +294,11 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData,
} // namespace
Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
- // This is what we annotate with the index selections and then turn into a solution.
- unique_ptr<OrMatchExpression> orExpr(
- static_cast<OrMatchExpression*>(_query->root()->shallowClone().release()));
-
// This is the skeleton of index selections that is inserted into the cache.
- unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());
+ std::unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());
- for (size_t i = 0; i < orExpr->numChildren(); ++i) {
- MatchExpression* orChild = orExpr->getChild(i);
+ for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
+ MatchExpression* orChild = _orExpression->getChild(i);
BranchPlanningResult* branchResult = _branchResults[i];
if (branchResult->cachedSolution.get()) {
@@ -319,11 +379,11 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
}
// Must do this before using the planner functionality.
- sortUsingTags(orExpr.get());
+ sortUsingTags(_orExpression.get());
- // Use the cached index assignments to build solnRoot. Takes ownership of 'orExpr'.
+ // Use the cached index assignments to build solnRoot. Takes ownership of '_orExpression'.
QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
- *_query, orExpr.release(), false, _plannerParams.indices, _plannerParams);
+ *_query, _orExpression.release(), false, _plannerParams.indices, _plannerParams);
if (NULL == solnRoot) {
mongoutils::str::stream ss;
@@ -343,7 +403,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
return Status(ErrorCodes::BadValue, ss);
}
- LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString() << endl;
+ LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString();
// Use the index tags from planning each branch to construct the composite solution,
// and set that solution as our child stage.
@@ -360,7 +420,7 @@ Status SubplanStage::choosePlanWholeQuery(PlanYieldPolicy* yieldPolicy) {
_ws->clear();
// Use the query planning module to plan the whole query.
- vector<QuerySolution*> rawSolutions;
+ std::vector<QuerySolution*> rawSolutions;
Status status = QueryPlanner::plan(*_query, _plannerParams, &rawSolutions);
if (!status.isOK()) {
return Status(ErrorCodes::BadValue,
@@ -499,8 +559,8 @@ void SubplanStage::invalidate(OperationContext* txn, const RecordId& dl, Invalid
}
}
-vector<PlanStage*> SubplanStage::getChildren() const {
- vector<PlanStage*> children;
+std::vector<PlanStage*> SubplanStage::getChildren() const {
+ std::vector<PlanStage*> children;
if (NULL != _child.get()) {
children.push_back(_child.get());
}
diff --git a/src/mongo/db/exec/subplan.h b/src/mongo/db/exec/subplan.h
index fe0d322db56..c2db2c34fee 100644
--- a/src/mongo/db/exec/subplan.h
+++ b/src/mongo/db/exec/subplan.h
@@ -39,6 +39,7 @@
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/db/record_id.h"
+#include "mongo/stdx/memory.h"
namespace mongo {
@@ -109,6 +110,19 @@ public:
*/
Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
+ /**
+ * Takes a match expression, 'root', which has a single "contained OR". This means that
+ * 'root' is an AND with exactly one OR child.
+ *
+ * Returns a logically equivalent query after rewriting so that the contained OR is at the
+ * root of the expression tree.
+ *
+ * Used internally so that the subplanner can be used for contained OR type queries, but
+ * exposed for testing.
+ */
+ static std::unique_ptr<MatchExpression> rewriteToRootedOr(
+ std::unique_ptr<MatchExpression> root);
+
//
// For testing.
//
@@ -180,6 +194,11 @@ private:
// Not owned here.
CanonicalQuery* _query;
+ // The copy of the query that we will annotate with tags and use to construct the composite
+ // solution. Must be a rooted $or query, or a contained $or that has been rewritten to a
+ // rooted $or.
+ std::unique_ptr<MatchExpression> _orExpression;
+
// If we successfully create a "composite solution" by planning each $or branch
// independently, that solution is owned here.
std::unique_ptr<QuerySolution> _compositeSolution;
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index e9cbd0153bd..26bbd2946ce 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -235,23 +235,26 @@ StatusWith<std::unique_ptr<CanonicalQuery>> CanonicalQuery::canonicalize(
const CanonicalQuery& baseQuery,
MatchExpression* root,
const MatchExpressionParser::WhereCallback& whereCallback) {
+ // Obtain filter for our LPQ by serializing the new match expression root.
+ BSONObjBuilder bob;
+ root->toBSON(&bob);
+ BSONObj filter = bob.obj();
+
// Pass empty sort and projection.
BSONObj emptyObj;
- // 0, 0, 0 is 'ntoskip', 'ntoreturn', and 'queryoptions'
- // false, false is 'snapshot' and 'explain'
auto lpqStatus = LiteParsedQuery::makeAsOpQuery(baseQuery.nss(),
- 0,
- 0,
- 0,
- baseQuery.getParsed().getFilter(),
+ 0, // ntoskip
+ 0, // ntoreturn
+ 0, // queryOptions
+ filter,
baseQuery.getParsed().getProj(),
baseQuery.getParsed().getSort(),
- emptyObj,
- emptyObj,
- emptyObj,
- false,
- false);
+ emptyObj, // hint
+ emptyObj, // min
+ emptyObj, // max
+ false, // snapshot
+ baseQuery.getParsed().isExplain());
if (!lpqStatus.isOK()) {
return lpqStatus.getStatus();
}
@@ -568,55 +571,6 @@ Status CanonicalQuery::isValid(MatchExpression* root, const LiteParsedQuery& par
return Status::OK();
}
-// static
-// XXX TODO: This does not belong here at all.
-MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) {
- // Only thing we do is pull an OR up at the root.
- if (MatchExpression::AND != tree->matchType()) {
- return tree;
- }
-
- // We want to bail out ASAP if we have nothing to do here.
- size_t numOrs = 0;
- for (size_t i = 0; i < tree->numChildren(); ++i) {
- if (MatchExpression::OR == tree->getChild(i)->matchType()) {
- ++numOrs;
- }
- }
-
- // Only do this for one OR right now.
- if (1 != numOrs) {
- return tree;
- }
-
- // Detach the OR from the root.
- invariant(NULL != tree->getChildVector());
- std::vector<MatchExpression*>& rootChildren = *tree->getChildVector();
- MatchExpression* orChild = NULL;
- for (size_t i = 0; i < rootChildren.size(); ++i) {
- if (MatchExpression::OR == rootChildren[i]->matchType()) {
- orChild = rootChildren[i];
- rootChildren.erase(rootChildren.begin() + i);
- break;
- }
- }
-
- // AND the existing root with each or child.
- invariant(NULL != orChild);
- invariant(NULL != orChild->getChildVector());
- std::vector<MatchExpression*>& orChildren = *orChild->getChildVector();
- for (size_t i = 0; i < orChildren.size(); ++i) {
- AndMatchExpression* ama = new AndMatchExpression();
- ama->add(orChildren[i]);
- ama->add(tree->shallowClone().release());
- orChildren[i] = ama;
- }
- delete tree;
-
- // Clean up any consequences from this tomfoolery.
- return normalizeTree(orChild);
-}
-
std::string CanonicalQuery::toString() const {
str::stream ss;
ss << "ns=" << _pq->ns();
diff --git a/src/mongo/db/query/canonical_query.h b/src/mongo/db/query/canonical_query.h
index 81df734f1f9..34c93db9ab1 100644
--- a/src/mongo/db/query/canonical_query.h
+++ b/src/mongo/db/query/canonical_query.h
@@ -208,16 +208,6 @@ public:
*/
static size_t countNodes(const MatchExpression* root, MatchExpression::MatchType type);
- /**
- * Takes ownership of 'tree'. Performs some rewriting of the query to a logically
- * equivalent but more digestible form.
- *
- * TODO: This doesn't entirely belong here. Really we'd do this while exploring
- * solutions in an enumeration setting but given the current lack of pruning
- * while exploring the enumeration space we do it here.
- */
- static MatchExpression* logicalRewrite(MatchExpression* tree);
-
private:
// You must go through canonicalize to create a CanonicalQuery.
CanonicalQuery() {}
diff --git a/src/mongo/db/query/canonical_query_test.cpp b/src/mongo/db/query/canonical_query_test.cpp
index 35f3976afdf..7d90620e818 100644
--- a/src/mongo/db/query/canonical_query_test.cpp
+++ b/src/mongo/db/query/canonical_query_test.cpp
@@ -29,6 +29,7 @@
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/json.h"
+#include "mongo/db/namespace_string.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -454,10 +455,6 @@ TEST(CanonicalQueryTest, SortTreeNumChildrenComparison) {
"{$or: [{a: 1, b: 1}, {a: 1, b: 1, c: 1}]}");
}
-//
-// Tests for CanonicalQuery::logicalRewrite
-//
-
/**
* Utility function to create a CanonicalQuery
*/
@@ -479,31 +476,6 @@ std::unique_ptr<CanonicalQuery> canonicalize(const char* queryStr,
return std::move(statusWithCQ.getValue());
}
-// Don't do anything with a double OR.
-TEST(CanonicalQueryTest, RewriteNoDoubleOr) {
- string queryStr = "{$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}";
- BSONObj queryObj = fromjson(queryStr);
- unique_ptr<MatchExpression> base(parseMatchExpression(queryObj));
- unique_ptr<MatchExpression> rewrite(
- CanonicalQuery::logicalRewrite(base->shallowClone().release()));
- assertEquivalent(queryStr.c_str(), base.get(), rewrite.get());
-}
-
-// Do something with a single or.
-TEST(CanonicalQueryTest, RewriteSingleOr) {
- // Rewrite of this...
- string queryStr = "{$or:[{a:1}, {b:1}], e:1}";
- BSONObj queryObj = fromjson(queryStr);
- unique_ptr<MatchExpression> rewrite(
- CanonicalQuery::logicalRewrite(parseMatchExpression(queryObj)));
-
- // Should look like this.
- string rewriteStr = "{$or:[{a:1, e:1}, {b:1, e:1}]}";
- BSONObj rewriteObj = fromjson(rewriteStr);
- unique_ptr<MatchExpression> base(parseMatchExpression(rewriteObj));
- assertEquivalent(queryStr.c_str(), base.get(), rewrite.get());
-}
-
/**
* Test function for CanonicalQuery::normalize.
*/
@@ -537,5 +509,25 @@ TEST(CanonicalQueryTest, NormalizeQueryTree) {
"{$and: [{a: 1}, {b: 1}, {c: 1}]}");
}
+TEST(CanonicalQueryTest, CanonicalizeFromBaseQuery) {
+ const bool isExplain = true;
+ const std::string cmdStr =
+ "{find:'bogusns', filter:{$or:[{a:1,b:1},{a:1,c:1}]}, projection:{a:1}, sort:{b:1}}";
+ auto lpq = assertGet(LiteParsedQuery::makeFromFindCommand(nss, fromjson(cmdStr), isExplain));
+ auto baseCq = assertGet(CanonicalQuery::canonicalize(lpq.release()));
+
+ MatchExpression* firstClauseExpr = baseCq->root()->getChild(0);
+ auto childCq = assertGet(CanonicalQuery::canonicalize(*baseCq, firstClauseExpr));
+
+ BSONObjBuilder expectedFilterBuilder;
+ firstClauseExpr->toBSON(&expectedFilterBuilder);
+ BSONObj expectedFilter = expectedFilterBuilder.obj();
+
+ ASSERT_EQ(childCq->getParsed().getFilter(), expectedFilter);
+ ASSERT_EQ(childCq->getParsed().getProj(), baseCq->getParsed().getProj());
+ ASSERT_EQ(childCq->getParsed().getSort(), baseCq->getParsed().getSort());
+ ASSERT_TRUE(childCq->getParsed().isExplain());
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp
index dafd6e7fce8..85708c6c09c 100644
--- a/src/mongo/db/query/index_bounds.cpp
+++ b/src/mongo/db/query/index_bounds.cpp
@@ -29,6 +29,7 @@
#include "mongo/db/query/index_bounds.h"
#include <algorithm>
+#include <tuple>
#include <utility>
namespace mongo {
@@ -93,6 +94,33 @@ Interval IndexBounds::getInterval(size_t i, size_t j) const {
}
}
+bool IndexBounds::operator==(const IndexBounds& other) const {
+ if (this->isSimpleRange != other.isSimpleRange) {
+ return false;
+ }
+
+ if (this->isSimpleRange) {
+ return std::tie(this->startKey, this->endKey, this->endKeyInclusive) ==
+ std::tie(other.startKey, other.endKey, other.endKeyInclusive);
+ }
+
+ if (this->fields.size() != other.fields.size()) {
+ return false;
+ }
+
+ for (size_t i = 0; i < this->fields.size(); ++i) {
+ if (this->fields[i] != other.fields[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool IndexBounds::operator!=(const IndexBounds& other) const {
+ return !(*this == other);
+}
+
string OrderedIntervalList::toString() const {
mongoutils::str::stream ss;
ss << "['" << name << "']: ";
@@ -105,6 +133,28 @@ string OrderedIntervalList::toString() const {
return ss;
}
+bool OrderedIntervalList::operator==(const OrderedIntervalList& other) const {
+ if (this->name != other.name) {
+ return false;
+ }
+
+ if (this->intervals.size() != other.intervals.size()) {
+ return false;
+ }
+
+ for (size_t i = 0; i < this->intervals.size(); ++i) {
+ if (this->intervals[i] != other.intervals[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool OrderedIntervalList::operator!=(const OrderedIntervalList& other) const {
+ return !(*this == other);
+}
+
// static
void OrderedIntervalList::complement() {
BSONObjBuilder minBob;
diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h
index f70f75def8b..e3fabde17b7 100644
--- a/src/mongo/db/query/index_bounds.h
+++ b/src/mongo/db/query/index_bounds.h
@@ -47,7 +47,6 @@ struct OrderedIntervalList {
// Must be ordered according to the index order.
std::vector<Interval> intervals;
- // TODO: We could drop this. Only used in IndexBounds::isValidFor.
std::string name;
bool isValidFor(int expectedOrientation) const;
@@ -65,6 +64,9 @@ struct OrderedIntervalList {
* where this OIL has direction==1.
*/
void complement();
+
+ bool operator==(const OrderedIntervalList& other) const;
+ bool operator!=(const OrderedIntervalList& other) const;
};
/**
@@ -95,6 +97,9 @@ struct IndexBounds {
Interval getInterval(size_t i, size_t j) const;
std::string toString() const;
+ bool operator==(const IndexBounds& other) const;
+ bool operator!=(const IndexBounds& other) const;
+
/**
* BSON format for explain. The format is an array of strings for each field.
* Each string represents an interval. The strings use "[" and "]" if the interval
diff --git a/src/mongo/db/query/index_bounds_test.cpp b/src/mongo/db/query/index_bounds_test.cpp
index d1613ca0c94..f04d495cf4d 100644
--- a/src/mongo/db/query/index_bounds_test.cpp
+++ b/src/mongo/db/query/index_bounds_test.cpp
@@ -249,6 +249,275 @@ TEST(IndexBoundsTest, ComplementRanges2) {
}
//
+// Equality
+//
+
+TEST(IndexBoundsTest, IndexBoundsEqual) {
+ // Both sets of bounds are {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil1;
+ oil1.name = "a";
+ oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil2;
+ oil2.name = "b";
+ oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds1;
+ bounds1.fields.push_back(oil1);
+ bounds1.fields.push_back(oil2);
+
+ OrderedIntervalList oil3;
+ oil3.name = "a";
+ oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil4;
+ oil4.name = "b";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds2;
+ bounds2.fields.push_back(oil3);
+ bounds2.fields.push_back(oil4);
+
+ ASSERT_TRUE(bounds1 == bounds2);
+ ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, EmptyBoundsEqual) {
+ IndexBounds bounds1;
+ IndexBounds bounds2;
+ ASSERT_TRUE(bounds1 == bounds2);
+ ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsEqual) {
+ IndexBounds bounds1;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ IndexBounds bounds2;
+ bounds2.isSimpleRange = true;
+ bounds2.startKey = BSON("" << 1 << "" << 3);
+ bounds2.endKey = BSON("" << 2 << "" << 4);
+ bounds2.endKeyInclusive = false;
+
+ ASSERT_TRUE(bounds1 == bounds2);
+ ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualDifferentFieldNames) {
+ // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil1;
+ oil1.name = "a";
+ oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil2;
+ oil2.name = "b";
+ oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds1;
+ bounds1.fields.push_back(oil1);
+ bounds1.fields.push_back(oil2);
+
+ // Second set of bounds identical except for the second field name:
+ // {a: [[1, 3), (4, 5]], c: [[1,1]]}
+ OrderedIntervalList oil3;
+ oil3.name = "a";
+ oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil4;
+ oil4.name = "c";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds2;
+ bounds2.fields.push_back(oil3);
+ bounds2.fields.push_back(oil4);
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualOneRangeExclusive) {
+ // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil1;
+ oil1.name = "a";
+ oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil2;
+ oil2.name = "b";
+ oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds1;
+ bounds1.fields.push_back(oil1);
+ bounds1.fields.push_back(oil2);
+
+ // Second set of bounds identical except for (4, 5] changed to (4, 5):
+ // {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil3;
+ oil3.name = "a";
+ oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, false));
+
+ OrderedIntervalList oil4;
+ oil4.name = "b";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds2;
+ bounds2.fields.push_back(oil3);
+ bounds2.fields.push_back(oil4);
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualExtraInterval) {
+ // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil1;
+ oil1.name = "a";
+ oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil2;
+ oil2.name = "b";
+ oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds1;
+ bounds1.fields.push_back(oil1);
+ bounds1.fields.push_back(oil2);
+
+ // Second set of bounds has an additional interval for field 'a':
+ // {a: [[1, 3), (4, 5], (6, 7)], b: [[1,1]]}
+ OrderedIntervalList oil3;
+ oil3.name = "a";
+ oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+ oil3.intervals.push_back(Interval(BSON("" << 6 << "" << 7), false, false));
+
+ OrderedIntervalList oil4;
+ oil4.name = "b";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds2;
+ bounds2.fields.push_back(oil3);
+ bounds2.fields.push_back(oil4);
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualExtraField) {
+ // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+ OrderedIntervalList oil1;
+ oil1.name = "a";
+ oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil2;
+ oil2.name = "b";
+ oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds1;
+ bounds1.fields.push_back(oil1);
+ bounds1.fields.push_back(oil2);
+
+ // Second set of bounds has an additional field 'c':
+ // {a: [[1, 3), (4, 5]], b: [[1,1]], c: [[1]]}
+ OrderedIntervalList oil3;
+ oil3.name = "a";
+ oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+ oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+ OrderedIntervalList oil4;
+ oil4.name = "b";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ OrderedIntervalList oil5;
+ oil4.name = "c";
+ oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+ IndexBounds bounds2;
+ bounds2.fields.push_back(oil3);
+ bounds2.fields.push_back(oil4);
+ bounds2.fields.push_back(oil5);
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualToRegularBounds) {
+ IndexBounds bounds1;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ IndexBounds bounds2;
+ OrderedIntervalList oil;
+ oil.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+ bounds2.fields.push_back(oil);
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentStartKey) {
+ IndexBounds bounds1;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ IndexBounds bounds2;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 1);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKey) {
+ IndexBounds bounds1;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ IndexBounds bounds2;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 99);
+ bounds1.endKeyInclusive = false;
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKeyInclusive) {
+ IndexBounds bounds1;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = false;
+
+ IndexBounds bounds2;
+ bounds1.isSimpleRange = true;
+ bounds1.startKey = BSON("" << 1 << "" << 3);
+ bounds1.endKey = BSON("" << 2 << "" << 4);
+ bounds1.endKeyInclusive = true;
+
+ ASSERT_FALSE(bounds1 == bounds2);
+ ASSERT_TRUE(bounds1 != bounds2);
+}
+
+//
// Iteration over
//
diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h
index 88309d33e05..9743a989321 100644
--- a/src/mongo/db/query/interval.h
+++ b/src/mongo/db/query/interval.h
@@ -190,4 +190,8 @@ inline bool operator==(const Interval& lhs, const Interval& rhs) {
return lhs.compare(rhs) == Interval::INTERVAL_EQUALS;
}
+inline bool operator!=(const Interval& lhs, const Interval& rhs) {
+ return !(lhs == rhs);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index c91b93bb785..fc494f255aa 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -28,11 +28,14 @@
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+#include "mongo/platform/basic.h"
+
#include "mongo/db/query/planner_access.h"
#include <algorithm>
#include <vector>
+#include "mongo/base/owned_pointer_vector.h"
#include "mongo/db/matcher/expression_array.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_text.h"
@@ -56,6 +59,51 @@ bool isTextNode(const QuerySolutionNode* node) {
return STAGE_TEXT == node->getType();
}
+/**
+ * Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null.
+ */
+FetchNode* getFetchNode(QuerySolutionNode* node) {
+ if (STAGE_FETCH != node->getType()) {
+ return nullptr;
+ }
+
+ return static_cast<FetchNode*>(node);
+}
+
+/**
+ * If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an
+ * IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns
+ * null.
+ */
+const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) {
+ if (STAGE_IXSCAN == node->getType()) {
+ return static_cast<const IndexScanNode*>(node);
+ } else if (STAGE_FETCH == node->getType()) {
+ invariant(1U == node->children.size());
+ const QuerySolutionNode* child = node->children[0];
+ if (STAGE_IXSCAN == child->getType()) {
+ return static_cast<const IndexScanNode*>(child);
+ }
+ }
+
+ return nullptr;
+}
+
+/**
+ * Takes as input two query solution nodes returned by processIndexScans(). If both are
+ * IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical
+ * (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false.
+ */
+bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) {
+ const IndexScanNode* leftIxscan = getIndexScanNode(lhs);
+ const IndexScanNode* rightIxscan = getIndexScanNode(rhs);
+ if (!leftIxscan || !rightIxscan) {
+ return false;
+ }
+
+ return *leftIxscan == *rightIxscan;
+}
+
} // namespace
namespace mongo {
@@ -567,6 +615,61 @@ void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node,
}
// static
+std::vector<QuerySolutionNode*> QueryPlannerAccess::collapseEquivalentScans(
+ const std::vector<QuerySolutionNode*> scans) {
+ OwnedPointerVector<QuerySolutionNode> ownedScans(scans);
+ invariant(ownedScans.size() > 0);
+
+ // Scans that need to be collapsed will be adjacent to each other in the list due to how we
+ // sort the query predicate. We step through the list, either merging the current scan into
+ // the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't
+ // be merged.
+ OwnedPointerVector<QuerySolutionNode> collapsedScans;
+
+ collapsedScans.push_back(ownedScans.releaseAt(0));
+ for (size_t i = 1; i < ownedScans.size(); ++i) {
+ if (scansAreEquivalent(collapsedScans.back(), ownedScans[i])) {
+ // We collapse the entry from 'ownedScans' into the back of 'collapsedScans'.
+ std::unique_ptr<QuerySolutionNode> collapseFrom(ownedScans.releaseAt(i));
+ FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get());
+ FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back());
+
+ // If there's no filter associated with a fetch node on 'collapseFrom', all we have to
+ // do is clear the filter on the node that we are collapsing into.
+ if (!collapseFromFetch || !collapseFromFetch->filter.get()) {
+ if (collapseIntoFetch) {
+ collapseIntoFetch->filter.reset();
+ }
+ continue;
+ }
+
+ // If there's no filter associated with a fetch node on the back of the 'collapsedScans'
+ // list, then there's nothing more to do.
+ if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) {
+ continue;
+ }
+
+ // Both the 'from' and 'into' nodes have filters. We join them with an
+ // OrMatchExpression.
+ std::unique_ptr<OrMatchExpression> collapsedFilter =
+ stdx::make_unique<OrMatchExpression>();
+ collapsedFilter->add(collapseFromFetch->filter.release());
+ collapsedFilter->add(collapseIntoFetch->filter.release());
+
+ // Normalize the filter and add it to 'into'.
+ collapseIntoFetch->filter.reset(
+ CanonicalQuery::normalizeTree(collapsedFilter.release()));
+ } else {
+ // Scans are not equivalent and can't be collapsed.
+ collapsedScans.push_back(ownedScans.releaseAt(i));
+ }
+ }
+
+ invariant(collapsedScans.size() > 0);
+ return collapsedScans.release();
+}
+
+// static
bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
@@ -962,6 +1065,10 @@ QuerySolutionNode* QueryPlannerAccess::buildIndexedOr(const CanonicalQuery& quer
return NULL;
}
+ // If all index scans are identical, then we collapse them into a single scan. This prevents
+ // us from creating OR plans where the branches of the OR perform duplicate work.
+ ixscanNodes = collapseEquivalentScans(ixscanNodes);
+
QuerySolutionNode* orResult = NULL;
// An OR of one node is just that node.
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 8ab6bf9d58a..a5844f4fa58 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -265,6 +265,28 @@ public:
std::vector<MatchExpression*>* subnodesOut);
/**
+ * Given a list of OR-related subtrees returned by processIndexScans(), looks for logically
+ * equivalent IndexScanNodes and combines them. This is an optimization to avoid creating
+ * plans that repeat index access work.
+ *
+ * Example:
+ * Suppose processIndexScans() returns a list of the following three query solutions:
+ * 1) IXSCAN (bounds: {b: [[2,2]]})
+ * 2) FETCH (filter: {d:1}) -> IXSCAN (bounds: {c: [[3,3]]})
+ * 3) FETCH (filter: {e:1}) -> IXSCAN (bounds: {c: [[3,3]]})
+ * This method would collapse scans #2 and #3, resulting in the following output:
+ * 1) IXSCAN (bounds: {b: [[2,2]]})
+ * 2) FETCH (filter: {$or:[{d:1}, {e:1}]}) -> IXSCAN (bounds: {c: [[3,3]]})
+ *
+ * Used as a helper for buildIndexedOr().
+ *
+ * Takes ownership of 'scans'. The caller assumes ownership of the pointers in the returned
+ * list of QuerySolutionNode*.
+ */
+ static std::vector<QuerySolutionNode*> collapseEquivalentScans(
+ const std::vector<QuerySolutionNode*> scans);
+
+ /**
* Helper used by buildIndexedAnd and buildIndexedOr.
*
* The children of AND and OR nodes are sorted by the index that the subtree rooted at
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 073e60f10fa..8cffd85bd5c 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -662,7 +662,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query,
<< query.root()->toString();
// If there is a GEO_NEAR it must have an index it can use directly.
- MatchExpression* gnNode = NULL;
+ const MatchExpression* gnNode = NULL;
if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
// No index for GEO_NEAR? No query.
RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
@@ -677,7 +677,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query,
}
// Likewise, if there is a TEXT it must have an index it can use directly.
- MatchExpression* textNode = NULL;
+ const MatchExpression* textNode = NULL;
if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
diff --git a/src/mongo/db/query/query_planner_common.h b/src/mongo/db/query/query_planner_common.h
index 4fdf8f30815..d62c9c7a3ba 100644
--- a/src/mongo/db/query/query_planner_common.h
+++ b/src/mongo/db/query/query_planner_common.h
@@ -44,9 +44,9 @@ public:
*
* If 'out' is not NULL, sets 'out' to the first node of type 'type' encountered.
*/
- static bool hasNode(MatchExpression* root,
+ static bool hasNode(const MatchExpression* root,
MatchExpression::MatchType type,
- MatchExpression** out = NULL) {
+ const MatchExpression** out = NULL) {
if (type == root->matchType()) {
if (NULL != out) {
*out = root;
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 4768eed4312..9950b295cc1 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -706,6 +706,128 @@ TEST_F(QueryPlannerTest, OrOfAnd6) {
" b: [[1,1,true,true], [5,5,true,true]]}}}]}}}}");
}
+// We do collapse OR of ANDs if branches of the OR plan are using identical index scans.
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseIndenticalScans) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{$or: [{a:1, b:2}, {a:1, b:2}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {pattern: {a: 1, b: 1}},"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+ "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScans) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}},"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+ "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2, d:3}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}},"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+ "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter2) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{c: 1, $or: [{a:{$gte:1,$lte:1}, b:2}, {a:1, b:2, d:3}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: {c: 1}, node: {fetch: {filter: null, node: "
+ "{ixscan: {pattern: {a: 1, b: 1}},"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+ "filter: null}}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIdenticalScansTwoFilters) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{c: 1, $or: [{a:1, b:2, d:3}, {a:1, b:2, e:4}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: {c: 1}, node: {fetch: {filter: {$or:[{e:4},{d:3}]},"
+ "node: {ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseScansExistingOrFilter) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ runQuery(fromjson("{$or: [{a:1, b:2, $or: [{c:3}, {d:4}]}, {a:1, b:2, e:5}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: {$or: [{e:5},{c:3},{d:4}]}, node: {ixscan: "
+ "{filter: null, pattern: {a: 1, b: 1}, "
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThird) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ addIndex(BSON("c" << 1 << "d" << 1));
+ runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 2}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {or: {nodes: ["
+ "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}},"
+ "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+ "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThirdWithFilters) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ addIndex(BSON("c" << 1 << "d" << 1));
+ runQuery(fromjson("{$or: [{a:1, b:2, e:5}, {c:3, d:4}, {a:1, b:2, f:6}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {or: {nodes: ["
+ "{fetch: {filter: {$or: [{f:6},{e:5}]}, node: "
+ "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}},"
+ "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+ "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndDontCollapseDifferentBounds) {
+ addIndex(BSON("a" << 1 << "b" << 1));
+ addIndex(BSON("c" << 1 << "d" << 1));
+ runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 99}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {or: {nodes: ["
+ "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+ "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}},"
+ "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+ "bounds: {a: [[1,1,true,true]], b: [[99,99,true,true]]}}},"
+ "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+ "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
// SERVER-13960: properly handle $or with a mix of exact and inexact predicates.
TEST_F(QueryPlannerTest, OrInexactWithExact) {
addIndex(BSON("name" << 1));
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 85b0cb7933c..5bc7ff5276c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -578,6 +578,27 @@ QuerySolutionNode* IndexScanNode::clone() const {
return copy;
}
+namespace {
+
+bool filtersAreEquivalent(const MatchExpression* lhs, const MatchExpression* rhs) {
+ if (!lhs && !rhs) {
+ return true;
+ } else if (!lhs || !rhs) {
+ return false;
+ } else {
+ return lhs->equivalent(rhs);
+ }
+}
+
+} // namespace
+
+bool IndexScanNode::operator==(const IndexScanNode& other) const {
+ return filtersAreEquivalent(filter.get(), other.filter.get()) &&
+ indexKeyPattern == other.indexKeyPattern && indexIsMultiKey == other.indexIsMultiKey &&
+ direction == other.direction && maxScan == other.maxScan &&
+ addKeyMetadata == other.addKeyMetadata && bounds == other.bounds;
+}
+
//
// ProjectionNode
//
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 8de5dabb40f..c5c6ca36dd2 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -28,6 +28,7 @@
#pragma once
+#include <memory>
#include "mongo/db/jsobj.h"
#include "mongo/db/matcher/expression.h"
@@ -458,6 +459,8 @@ struct IndexScanNode : public QuerySolutionNode {
QuerySolutionNode* clone() const;
+ bool operator==(const IndexScanNode& other) const;
+
BSONObjSet _sorts;
BSONObj indexKeyPattern;
diff --git a/src/mongo/dbtests/query_stage_subplan.cpp b/src/mongo/dbtests/query_stage_subplan.cpp
index 2231240f2c4..ffe592eba90 100644
--- a/src/mongo/dbtests/query_stage_subplan.cpp
+++ b/src/mongo/dbtests/query_stage_subplan.cpp
@@ -26,6 +26,10 @@
* then also delete it in the license file.
*/
+#include "mongo/platform/basic.h"
+
+#include <memory>
+
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/db_raii.h"
@@ -62,6 +66,21 @@ public:
}
protected:
+ /**
+ * Parses the json string 'findCmd', specifying a find command, to a CanonicalQuery.
+ */
+ std::unique_ptr<CanonicalQuery> cqFromFindCommand(const std::string& findCmd) {
+ BSONObj cmdObj = fromjson(findCmd);
+
+ const NamespaceString nss("testns.testcoll");
+ bool isExplain = false;
+ auto lpq =
+ unittest::assertGet(LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain));
+
+ auto cq = unittest::assertGet(CanonicalQuery::canonicalize(lpq.release()));
+ return cq;
+ }
+
OperationContextImpl _txn;
private:
@@ -163,6 +182,254 @@ public:
}
};
+/**
+ * Unit test the subplan stage's canUseSubplanning() method.
+ */
+class QueryStageSubplanCanUseSubplanning : public QueryStageSubplanBase {
+public:
+ void run() {
+ // We won't try and subplan something that doesn't have an $or.
+ {
+ std::string findCmd = "{find: 'testns', filter: {$and:[{a:1}, {b:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Don't try and subplan if there is no filter.
+ {
+ std::string findCmd = "{find: 'testns'}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // We won't try and subplan two contained ORs.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning if there is a hint.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+ "hint: {a:1, b:1}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning with min.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+ "min: {a:1, b:1}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning with max.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+ "max: {a:2, b:2}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning with tailable.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+ "tailable: true}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning with snapshot.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+ "snapshot: true}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can use subplanning for rooted $or.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+
+ std::string findCmd2 =
+ "{find: 'testns',"
+ "filter: {$or: [{a:1}, {c:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq2 = cqFromFindCommand(findCmd2);
+ ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq2));
+ }
+
+ // Can use subplanning for a single contained $or.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can use subplanning if the contained $or query has a geo predicate.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {loc: {$geoWithin: {$centerSphere: [[0,0], 1]}},"
+ "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning if the contained $or query also has a $text predicate.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {$text: {$search: 'foo'},"
+ "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+
+ // Can't use subplanning if the contained $or query also has a $near predicate.
+ {
+ std::string findCmd =
+ "{find: 'testns',"
+ "filter: {loc: {$near: [0, 0]},"
+ "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+ std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+ ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+ }
+ }
+};
+
+/**
+ * Unit test the subplan stage's rewriteToRootedOr() method.
+ */
+class QueryStageSubplanRewriteToRootedOr : public QueryStageSubplanBase {
+public:
+ void run() {
+ // Rewrite (AND (OR a b) e) => (OR (AND a e) (AND b e))
+ {
+ BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1}");
+ StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+ ASSERT_OK(expr.getStatus());
+ std::unique_ptr<MatchExpression> rewrittenExpr =
+ SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+ std::string findCmdRewritten =
+ "{find: 'testns',"
+ "filter: {$or:[{a:1,e:1}, {b:1,e:1}]}}";
+ std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+ ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+ }
+
+ // Rewrite (AND (OR a b) e f) => (OR (AND a e f) (AND b e f))
+ {
+ BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1, f:1}");
+ StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+ ASSERT_OK(expr.getStatus());
+ std::unique_ptr<MatchExpression> rewrittenExpr =
+ SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+ std::string findCmdRewritten =
+ "{find: 'testns',"
+ "filter: {$or:[{a:1,e:1,f:1}, {b:1,e:1,f:1}]}}";
+ std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+ ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+ }
+
+ // Rewrite (AND (OR (AND a b) (AND c d) e f) => (OR (AND a b e f) (AND c d e f))
+ {
+ BSONObj queryObj = fromjson("{$or:[{a:1,b:1}, {c:1,d:1}], e:1,f:1}");
+ StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+ ASSERT_OK(expr.getStatus());
+ std::unique_ptr<MatchExpression> rewrittenExpr =
+ SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+ std::string findCmdRewritten =
+ "{find: 'testns',"
+ "filter: {$or:[{a:1,b:1,e:1,f:1},"
+ "{c:1,d:1,e:1,f:1}]}}";
+ std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+ ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+ }
+ }
+};
+
+/**
+ * Test the subplan stage's ability to answer a contained $or query.
+ */
+class QueryStageSubplanPlanRootedOr : public QueryStageSubplanBase {
+public:
+ void run() {
+ OldClientWriteContext ctx(&_txn, ns());
+ addIndex(BSON("b" << 1 << "a" << 1));
+ addIndex(BSON("c" << 1 << "a" << 1));
+
+ BSONObj query = fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}");
+
+ // Two of these documents match.
+ insert(BSON("_id" << 1 << "a" << 1 << "b" << 2));
+ insert(BSON("_id" << 2 << "a" << 2 << "b" << 2));
+ insert(BSON("_id" << 3 << "a" << 1 << "c" << 3));
+ insert(BSON("_id" << 4 << "a" << 1 << "c" << 4));
+
+ auto cq = unittest::assertGet(CanonicalQuery::canonicalize(ns(), query));
+
+ Collection* collection = ctx.getCollection();
+
+ // Get planner params.
+ QueryPlannerParams plannerParams;
+ fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+ WorkingSet ws;
+ std::unique_ptr<SubplanStage> subplan(
+ new SubplanStage(&_txn, collection, &ws, plannerParams, cq.get()));
+
+ // Plan selection should succeed due to falling back on regular planning.
+ PlanYieldPolicy yieldPolicy(NULL, PlanExecutor::YIELD_MANUAL);
+ ASSERT_OK(subplan->pickBestPlan(&yieldPolicy));
+
+ // Work the stage until it produces all results.
+ size_t numResults = 0;
+ PlanStage::StageState stageState = PlanStage::NEED_TIME;
+ while (stageState != PlanStage::IS_EOF) {
+ WorkingSetID id = WorkingSet::INVALID_ID;
+ stageState = subplan->work(&id);
+ ASSERT_NE(stageState, PlanStage::DEAD);
+ ASSERT_NE(stageState, PlanStage::FAILURE);
+
+ if (stageState == PlanStage::ADVANCED) {
+ ++numResults;
+ WorkingSetMember* member = ws.get(id);
+ ASSERT(member->hasObj());
+ ASSERT(member->obj.value() == BSON("_id" << 1 << "a" << 1 << "b" << 2) ||
+ member->obj.value() == BSON("_id" << 3 << "a" << 1 << "c" << 3));
+ }
+ }
+
+ ASSERT_EQ(numResults, 2U);
+ }
+};
+
class All : public Suite {
public:
All() : Suite("query_stage_subplan") {}
@@ -170,6 +437,9 @@ public:
void setupTests() {
add<QueryStageSubplanGeo2dOr>();
add<QueryStageSubplanPlanFromCache>();
+ add<QueryStageSubplanCanUseSubplanning>();
+ add<QueryStageSubplanRewriteToRootedOr>();
+ add<QueryStageSubplanPlanRootedOr>();
}
};