SERVER-13732 rewrite contained $or queries to rooted $or in the SubplanStage

This allows queries with an $or contained within an explicit or implicit $and to be answered with more efficient plans. It also expands the use of the SubplanStage to include contained $or queries and therefore may reduce the number of plans considered for these queries.
author: David Storch <david.storch@10gen.com> 2015-05-29 19:15:19 -0400
committer: David Storch <david.storch@10gen.com> 2015-07-10 17:11:16 -0400
commit: 15c72c8570c63e2e6ba0a3d339a8286d0be604db (patch)
tree: 96cc37b3259e9ce7472132b8c414eb614317c037
parent: 9c63b79a8d5c8d19663850f9d668a3581dea77d5 (diff)
download: mongo-15c72c8570c63e2e6ba0a3d339a8286d0be604db.tar.gz
17 files changed, 1021 insertions, 133 deletions
diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp
index 352dd5ca36c..b0a9717ba64 100644
--- a/src/mongo/db/exec/subplan.cpp
+++ b/src/mongo/db/exec/subplan.cpp
@@ -40,6 +40,7 @@
 #include "mongo/db/query/planner_analysis.h"
 #include "mongo/db/query/planner_access.h"
 #include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/query_planner_common.h"
 #include "mongo/db/query/stage_builder.h"
 #include "mongo/stdx/memory.h"
 #include "mongo/util/log.h"
@@ -51,7 +52,6 @@ using std::unique_ptr;
 using std::vector;
 using stdx::make_unique;
 
-// static
 const char* SubplanStage::kStageType = "SUBPLAN";
 
 SubplanStage::SubplanStage(OperationContext* txn,
@@ -64,21 +64,36 @@ SubplanStage::SubplanStage(OperationContext* txn,
       _ws(ws),
       _plannerParams(params),
       _query(cq),
-      _child(nullptr),
       _commonStats(kStageType) {
     invariant(_collection);
 }
 
-// static
-bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
-    const LiteParsedQuery& lpq = query.getParsed();
-    const MatchExpression* expr = query.root();
+namespace {
 
-    // Only rooted ORs work with the subplan scheme.
-    if (MatchExpression::OR != expr->matchType()) {
+/**
+ * Returns true if 'expr' is an AND that contains a single OR child.
+ */
+bool isContainedOr(const MatchExpression* expr) {
+    if (MatchExpression::AND != expr->matchType()) {
         return false;
     }
 
+    size_t numOrs = 0;
+    for (size_t i = 0; i < expr->numChildren(); ++i) {
+        if (MatchExpression::OR == expr->getChild(i)->matchType()) {
+            ++numOrs;
+        }
+    }
+
+    return (numOrs == 1U);
+}
+
+}  // namespace
+
+bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
+    const LiteParsedQuery& lpq = query.getParsed();
+    const MatchExpression* expr = query.root();
+
     // Hint provided
     if (!lpq.getHint().isEmpty()) {
         return false;
@@ -106,7 +121,52 @@ bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
         return false;
     }
 
-    return true;
+    // If it's a rooted $or and has none of the disqualifications above, then subplanning is
+    // allowed.
+    if (MatchExpression::OR == expr->matchType()) {
+        return true;
+    }
+
+    // We also allow subplanning if it's a contained OR that does not have a TEXT or GEO_NEAR node.
+    const bool hasTextOrGeoNear = QueryPlannerCommon::hasNode(expr, MatchExpression::TEXT) ||
+        QueryPlannerCommon::hasNode(expr, MatchExpression::GEO_NEAR);
+    return isContainedOr(expr) && !hasTextOrGeoNear;
+}
+
+std::unique_ptr<MatchExpression> SubplanStage::rewriteToRootedOr(
+    std::unique_ptr<MatchExpression> root) {
+    dassert(isContainedOr(root.get()));
+
+    // Detach the OR from the root.
+    std::vector<MatchExpression*>& rootChildren = *root->getChildVector();
+    std::unique_ptr<MatchExpression> orChild;
+    for (size_t i = 0; i < rootChildren.size(); ++i) {
+        if (MatchExpression::OR == rootChildren[i]->matchType()) {
+            orChild.reset(rootChildren[i]);
+            rootChildren.erase(rootChildren.begin() + i);
+            break;
+        }
+    }
+
+    // We should have found an OR, and the OR should have at least 2 children.
+    invariant(orChild);
+    invariant(orChild->getChildVector());
+    invariant(orChild->getChildVector()->size() > 1U);
+
+    // AND the existing root with each OR child.
+    std::vector<MatchExpression*>& orChildren = *orChild->getChildVector();
+    for (size_t i = 0; i < orChildren.size(); ++i) {
+        std::unique_ptr<AndMatchExpression> ama = stdx::make_unique<AndMatchExpression>();
+        ama->add(orChildren[i]);
+        ama->add(root->shallowClone().release());
+        orChildren[i] = ama.release();
+    }
+
+    // Normalize and sort the resulting match expression.
+    orChild = std::unique_ptr<MatchExpression>(CanonicalQuery::normalizeTree(orChild.release()));
+    CanonicalQuery::sortTree(orChild.get());
+
+    return orChild;
 }
 
 Status SubplanStage::planSubqueries() {
@@ -114,22 +174,26 @@ Status SubplanStage::planSubqueries() {
     // work that happens here, so this is needed for the time accounting to make sense.
     ScopedTimer timer(&_commonStats.executionTimeMillis);
 
-    MatchExpression* orExpr = _query->root();
+    _orExpression = _query->root()->shallowClone();
+    if (isContainedOr(_orExpression.get())) {
+        _orExpression = rewriteToRootedOr(std::move(_orExpression));
+        invariant(CanonicalQuery::isValid(_orExpression.get(), _query->getParsed()).isOK());
+    }
 
     for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
         const IndexEntry& ie = _plannerParams.indices[i];
         _indexMap[ie.keyPattern] = i;
-        LOG(5) << "Subplanner: index " << i << " is " << ie.toString() << endl;
+        LOG(5) << "Subplanner: index " << i << " is " << ie.toString();
     }
 
     const WhereCallbackReal whereCallback(_txn, _collection->ns().db());
 
-    for (size_t i = 0; i < orExpr->numChildren(); ++i) {
+    for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
         // We need a place to shove the results from planning this branch.
         _branchResults.push_back(new BranchPlanningResult());
         BranchPlanningResult* branchResult = _branchResults.back();
 
-        MatchExpression* orChild = orExpr->getChild(i);
+        MatchExpression* orChild = _orExpression->getChild(i);
 
         // Turn the i-th child into its own query.
         auto statusWithCQ = CanonicalQuery::canonicalize(*_query, orChild, whereCallback);
@@ -152,12 +216,12 @@ Status SubplanStage::planSubqueries() {
                 .isOK()) {
             // We have a CachedSolution. Store it for later.
             LOG(5) << "Subplanner: cached plan found for child " << i << " of "
-                   << orExpr->numChildren();
+                   << _orExpression->numChildren();
 
             branchResult->cachedSolution.reset(rawCS);
         } else {
             // No CachedSolution found. We'll have to plan from scratch.
-            LOG(5) << "Subplanner: planning child " << i << " of " << orExpr->numChildren();
+            LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren();
 
             // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from
             // considering any plan that's a collscan.
@@ -230,15 +294,11 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData,
 }  // namespace
 
 Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
-    // This is what we annotate with the index selections and then turn into a solution.
-    unique_ptr<OrMatchExpression> orExpr(
-        static_cast<OrMatchExpression*>(_query->root()->shallowClone().release()));
-
     // This is the skeleton of index selections that is inserted into the cache.
-    unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());
+    std::unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());
 
-    for (size_t i = 0; i < orExpr->numChildren(); ++i) {
-        MatchExpression* orChild = orExpr->getChild(i);
+    for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
+        MatchExpression* orChild = _orExpression->getChild(i);
         BranchPlanningResult* branchResult = _branchResults[i];
 
         if (branchResult->cachedSolution.get()) {
@@ -319,11 +379,11 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
     }
 
     // Must do this before using the planner functionality.
-    sortUsingTags(orExpr.get());
+    sortUsingTags(_orExpression.get());
 
-    // Use the cached index assignments to build solnRoot.  Takes ownership of 'orExpr'.
+    // Use the cached index assignments to build solnRoot. Takes ownership of '_orExpression'.
     QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
-        *_query, orExpr.release(), false, _plannerParams.indices, _plannerParams);
+        *_query, _orExpression.release(), false, _plannerParams.indices, _plannerParams);
 
     if (NULL == solnRoot) {
         mongoutils::str::stream ss;
@@ -343,7 +403,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
         return Status(ErrorCodes::BadValue, ss);
     }
 
-    LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString() << endl;
+    LOG(5) << "Subplanner: Composite solution is " << _compositeSolution->toString();
 
     // Use the index tags from planning each branch to construct the composite solution,
     // and set that solution as our child stage.
@@ -360,7 +420,7 @@ Status SubplanStage::choosePlanWholeQuery(PlanYieldPolicy* yieldPolicy) {
     _ws->clear();
 
     // Use the query planning module to plan the whole query.
-    vector<QuerySolution*> rawSolutions;
+    std::vector<QuerySolution*> rawSolutions;
     Status status = QueryPlanner::plan(*_query, _plannerParams, &rawSolutions);
     if (!status.isOK()) {
         return Status(ErrorCodes::BadValue,
@@ -499,8 +559,8 @@ void SubplanStage::invalidate(OperationContext* txn, const RecordId& dl, Invalid
     }
 }
 
-vector<PlanStage*> SubplanStage::getChildren() const {
-    vector<PlanStage*> children;
+std::vector<PlanStage*> SubplanStage::getChildren() const {
+    std::vector<PlanStage*> children;
     if (NULL != _child.get()) {
         children.push_back(_child.get());
     }
diff --git a/src/mongo/db/exec/subplan.h b/src/mongo/db/exec/subplan.h
index fe0d322db56..c2db2c34fee 100644
--- a/src/mongo/db/exec/subplan.h
+++ b/src/mongo/db/exec/subplan.h
@@ -39,6 +39,7 @@
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_solution.h"
 #include "mongo/db/record_id.h"
+#include "mongo/stdx/memory.h"
 
 namespace mongo {
 
@@ -109,6 +110,19 @@ public:
      */
     Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
 
+    /**
+     * Takes a match expression, 'root', which has a single "contained OR". This means that
+     * 'root' is an AND with exactly one OR child.
+     *
+     * Returns a logically equivalent query after rewriting so that the contained OR is at the
+     * root of the expression tree.
+     *
+     * Used internally so that the subplanner can be used for contained OR type queries, but
+     * exposed for testing.
+     */
+    static std::unique_ptr<MatchExpression> rewriteToRootedOr(
+        std::unique_ptr<MatchExpression> root);
+
     //
     // For testing.
     //
@@ -180,6 +194,11 @@ private:
     // Not owned here.
     CanonicalQuery* _query;
 
+    // The copy of the query that we will annotate with tags and use to construct the composite
+    // solution. Must be a rooted $or query, or a contained $or that has been rewritten to a
+    // rooted $or.
+    std::unique_ptr<MatchExpression> _orExpression;
+
     // If we successfully create a "composite solution" by planning each $or branch
     // independently, that solution is owned here.
     std::unique_ptr<QuerySolution> _compositeSolution;
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index e9cbd0153bd..26bbd2946ce 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -235,23 +235,26 @@ StatusWith<std::unique_ptr<CanonicalQuery>> CanonicalQuery::canonicalize(
     const CanonicalQuery& baseQuery,
     MatchExpression* root,
     const MatchExpressionParser::WhereCallback& whereCallback) {
+    // Obtain filter for our LPQ by serializing the new match expression root.
+    BSONObjBuilder bob;
+    root->toBSON(&bob);
+    BSONObj filter = bob.obj();
+
     // Pass empty sort and projection.
     BSONObj emptyObj;
 
-    // 0, 0, 0 is 'ntoskip', 'ntoreturn', and 'queryoptions'
-    // false, false is 'snapshot' and 'explain'
     auto lpqStatus = LiteParsedQuery::makeAsOpQuery(baseQuery.nss(),
-                                                    0,
-                                                    0,
-                                                    0,
-                                                    baseQuery.getParsed().getFilter(),
+                                                    0,  // ntoskip
+                                                    0,  // ntoreturn
+                                                    0,  // queryOptions
+                                                    filter,
                                                     baseQuery.getParsed().getProj(),
                                                     baseQuery.getParsed().getSort(),
-                                                    emptyObj,
-                                                    emptyObj,
-                                                    emptyObj,
-                                                    false,
-                                                    false);
+                                                    emptyObj,  // hint
+                                                    emptyObj,  // min
+                                                    emptyObj,  // max
+                                                    false,     // snapshot
+                                                    baseQuery.getParsed().isExplain());
     if (!lpqStatus.isOK()) {
         return lpqStatus.getStatus();
     }
@@ -568,55 +571,6 @@ Status CanonicalQuery::isValid(MatchExpression* root, const LiteParsedQuery& par
     return Status::OK();
 }
 
-// static
-// XXX TODO: This does not belong here at all.
-MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) {
-    // Only thing we do is pull an OR up at the root.
-    if (MatchExpression::AND != tree->matchType()) {
-        return tree;
-    }
-
-    // We want to bail out ASAP if we have nothing to do here.
-    size_t numOrs = 0;
-    for (size_t i = 0; i < tree->numChildren(); ++i) {
-        if (MatchExpression::OR == tree->getChild(i)->matchType()) {
-            ++numOrs;
-        }
-    }
-
-    // Only do this for one OR right now.
-    if (1 != numOrs) {
-        return tree;
-    }
-
-    // Detach the OR from the root.
-    invariant(NULL != tree->getChildVector());
-    std::vector<MatchExpression*>& rootChildren = *tree->getChildVector();
-    MatchExpression* orChild = NULL;
-    for (size_t i = 0; i < rootChildren.size(); ++i) {
-        if (MatchExpression::OR == rootChildren[i]->matchType()) {
-            orChild = rootChildren[i];
-            rootChildren.erase(rootChildren.begin() + i);
-            break;
-        }
-    }
-
-    // AND the existing root with each or child.
-    invariant(NULL != orChild);
-    invariant(NULL != orChild->getChildVector());
-    std::vector<MatchExpression*>& orChildren = *orChild->getChildVector();
-    for (size_t i = 0; i < orChildren.size(); ++i) {
-        AndMatchExpression* ama = new AndMatchExpression();
-        ama->add(orChildren[i]);
-        ama->add(tree->shallowClone().release());
-        orChildren[i] = ama;
-    }
-    delete tree;
-
-    // Clean up any consequences from this tomfoolery.
-    return normalizeTree(orChild);
-}
-
 std::string CanonicalQuery::toString() const {
     str::stream ss;
     ss << "ns=" << _pq->ns();
diff --git a/src/mongo/db/query/canonical_query.h b/src/mongo/db/query/canonical_query.h
index 81df734f1f9..34c93db9ab1 100644
--- a/src/mongo/db/query/canonical_query.h
+++ b/src/mongo/db/query/canonical_query.h
@@ -208,16 +208,6 @@ public:
      */
     static size_t countNodes(const MatchExpression* root, MatchExpression::MatchType type);
 
-    /**
-     * Takes ownership of 'tree'.  Performs some rewriting of the query to a logically
-     * equivalent but more digestible form.
-     *
-     * TODO: This doesn't entirely belong here.  Really we'd do this while exploring
-     * solutions in an enumeration setting but given the current lack of pruning
-     * while exploring the enumeration space we do it here.
-     */
-    static MatchExpression* logicalRewrite(MatchExpression* tree);
-
 private:
     // You must go through canonicalize to create a CanonicalQuery.
     CanonicalQuery() {}
diff --git a/src/mongo/db/query/canonical_query_test.cpp b/src/mongo/db/query/canonical_query_test.cpp
index 35f3976afdf..7d90620e818 100644
--- a/src/mongo/db/query/canonical_query_test.cpp
+++ b/src/mongo/db/query/canonical_query_test.cpp
@@ -29,6 +29,7 @@
 #include "mongo/db/query/canonical_query.h"
 
 #include "mongo/db/json.h"
+#include "mongo/db/namespace_string.h"
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -454,10 +455,6 @@ TEST(CanonicalQueryTest, SortTreeNumChildrenComparison) {
                  "{$or: [{a: 1, b: 1}, {a: 1, b: 1, c: 1}]}");
 }
 
-//
-// Tests for CanonicalQuery::logicalRewrite
-//
-
 /**
  * Utility function to create a CanonicalQuery
  */
@@ -479,31 +476,6 @@ std::unique_ptr<CanonicalQuery> canonicalize(const char* queryStr,
     return std::move(statusWithCQ.getValue());
 }
 
-// Don't do anything with a double OR.
-TEST(CanonicalQueryTest, RewriteNoDoubleOr) {
-    string queryStr = "{$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}";
-    BSONObj queryObj = fromjson(queryStr);
-    unique_ptr<MatchExpression> base(parseMatchExpression(queryObj));
-    unique_ptr<MatchExpression> rewrite(
-        CanonicalQuery::logicalRewrite(base->shallowClone().release()));
-    assertEquivalent(queryStr.c_str(), base.get(), rewrite.get());
-}
-
-// Do something with a single or.
-TEST(CanonicalQueryTest, RewriteSingleOr) {
-    // Rewrite of this...
-    string queryStr = "{$or:[{a:1}, {b:1}], e:1}";
-    BSONObj queryObj = fromjson(queryStr);
-    unique_ptr<MatchExpression> rewrite(
-        CanonicalQuery::logicalRewrite(parseMatchExpression(queryObj)));
-
-    // Should look like this.
-    string rewriteStr = "{$or:[{a:1, e:1}, {b:1, e:1}]}";
-    BSONObj rewriteObj = fromjson(rewriteStr);
-    unique_ptr<MatchExpression> base(parseMatchExpression(rewriteObj));
-    assertEquivalent(queryStr.c_str(), base.get(), rewrite.get());
-}
-
 /**
  * Test function for CanonicalQuery::normalize.
  */
@@ -537,5 +509,25 @@ TEST(CanonicalQueryTest, NormalizeQueryTree) {
                        "{$and: [{a: 1}, {b: 1}, {c: 1}]}");
 }
 
+TEST(CanonicalQueryTest, CanonicalizeFromBaseQuery) {
+    const bool isExplain = true;
+    const std::string cmdStr =
+        "{find:'bogusns', filter:{$or:[{a:1,b:1},{a:1,c:1}]}, projection:{a:1}, sort:{b:1}}";
+    auto lpq = assertGet(LiteParsedQuery::makeFromFindCommand(nss, fromjson(cmdStr), isExplain));
+    auto baseCq = assertGet(CanonicalQuery::canonicalize(lpq.release()));
+
+    MatchExpression* firstClauseExpr = baseCq->root()->getChild(0);
+    auto childCq = assertGet(CanonicalQuery::canonicalize(*baseCq, firstClauseExpr));
+
+    BSONObjBuilder expectedFilterBuilder;
+    firstClauseExpr->toBSON(&expectedFilterBuilder);
+    BSONObj expectedFilter = expectedFilterBuilder.obj();
+
+    ASSERT_EQ(childCq->getParsed().getFilter(), expectedFilter);
+    ASSERT_EQ(childCq->getParsed().getProj(), baseCq->getParsed().getProj());
+    ASSERT_EQ(childCq->getParsed().getSort(), baseCq->getParsed().getSort());
+    ASSERT_TRUE(childCq->getParsed().isExplain());
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp
index dafd6e7fce8..85708c6c09c 100644
--- a/src/mongo/db/query/index_bounds.cpp
+++ b/src/mongo/db/query/index_bounds.cpp
@@ -29,6 +29,7 @@
 #include "mongo/db/query/index_bounds.h"
 
 #include <algorithm>
+#include <tuple>
 #include <utility>
 
 namespace mongo {
@@ -93,6 +94,33 @@ Interval IndexBounds::getInterval(size_t i, size_t j) const {
     }
 }
 
+bool IndexBounds::operator==(const IndexBounds& other) const {
+    if (this->isSimpleRange != other.isSimpleRange) {
+        return false;
+    }
+
+    if (this->isSimpleRange) {
+        return std::tie(this->startKey, this->endKey, this->endKeyInclusive) ==
+            std::tie(other.startKey, other.endKey, other.endKeyInclusive);
+    }
+
+    if (this->fields.size() != other.fields.size()) {
+        return false;
+    }
+
+    for (size_t i = 0; i < this->fields.size(); ++i) {
+        if (this->fields[i] != other.fields[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool IndexBounds::operator!=(const IndexBounds& other) const {
+    return !(*this == other);
+}
+
 string OrderedIntervalList::toString() const {
     mongoutils::str::stream ss;
     ss << "['" << name << "']: ";
@@ -105,6 +133,28 @@ string OrderedIntervalList::toString() const {
     return ss;
 }
 
+bool OrderedIntervalList::operator==(const OrderedIntervalList& other) const {
+    if (this->name != other.name) {
+        return false;
+    }
+
+    if (this->intervals.size() != other.intervals.size()) {
+        return false;
+    }
+
+    for (size_t i = 0; i < this->intervals.size(); ++i) {
+        if (this->intervals[i] != other.intervals[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool OrderedIntervalList::operator!=(const OrderedIntervalList& other) const {
+    return !(*this == other);
+}
+
 // static
 void OrderedIntervalList::complement() {
     BSONObjBuilder minBob;
diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h
index f70f75def8b..e3fabde17b7 100644
--- a/src/mongo/db/query/index_bounds.h
+++ b/src/mongo/db/query/index_bounds.h
@@ -47,7 +47,6 @@ struct OrderedIntervalList {
     // Must be ordered according to the index order.
     std::vector<Interval> intervals;
 
-    // TODO: We could drop this.  Only used in IndexBounds::isValidFor.
     std::string name;
 
     bool isValidFor(int expectedOrientation) const;
@@ -65,6 +64,9 @@ struct OrderedIntervalList {
      *   where this OIL has direction==1.
      */
     void complement();
+
+    bool operator==(const OrderedIntervalList& other) const;
+    bool operator!=(const OrderedIntervalList& other) const;
 };
 
 /**
@@ -95,6 +97,9 @@ struct IndexBounds {
     Interval getInterval(size_t i, size_t j) const;
     std::string toString() const;
 
+    bool operator==(const IndexBounds& other) const;
+    bool operator!=(const IndexBounds& other) const;
+
     /**
      * BSON format for explain. The format is an array of strings for each field.
      * Each string represents an interval. The strings use "[" and "]" if the interval
diff --git a/src/mongo/db/query/index_bounds_test.cpp b/src/mongo/db/query/index_bounds_test.cpp
index d1613ca0c94..f04d495cf4d 100644
--- a/src/mongo/db/query/index_bounds_test.cpp
+++ b/src/mongo/db/query/index_bounds_test.cpp
@@ -249,6 +249,275 @@ TEST(IndexBoundsTest, ComplementRanges2) {
 }
 
 //
+// Equality
+//
+
+TEST(IndexBoundsTest, IndexBoundsEqual) {
+    // Both sets of bounds are {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil1;
+    oil1.name = "a";
+    oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil2;
+    oil2.name = "b";
+    oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds1;
+    bounds1.fields.push_back(oil1);
+    bounds1.fields.push_back(oil2);
+
+    OrderedIntervalList oil3;
+    oil3.name = "a";
+    oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil4;
+    oil4.name = "b";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds2;
+    bounds2.fields.push_back(oil3);
+    bounds2.fields.push_back(oil4);
+
+    ASSERT_TRUE(bounds1 == bounds2);
+    ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, EmptyBoundsEqual) {
+    IndexBounds bounds1;
+    IndexBounds bounds2;
+    ASSERT_TRUE(bounds1 == bounds2);
+    ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsEqual) {
+    IndexBounds bounds1;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    IndexBounds bounds2;
+    bounds2.isSimpleRange = true;
+    bounds2.startKey = BSON("" << 1 << "" << 3);
+    bounds2.endKey = BSON("" << 2 << "" << 4);
+    bounds2.endKeyInclusive = false;
+
+    ASSERT_TRUE(bounds1 == bounds2);
+    ASSERT_FALSE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualDifferentFieldNames) {
+    // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil1;
+    oil1.name = "a";
+    oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil2;
+    oil2.name = "b";
+    oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds1;
+    bounds1.fields.push_back(oil1);
+    bounds1.fields.push_back(oil2);
+
+    // Second set of bounds identical except for the second field name:
+    //    {a: [[1, 3), (4, 5]], c: [[1,1]]}
+    OrderedIntervalList oil3;
+    oil3.name = "a";
+    oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil4;
+    oil4.name = "c";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds2;
+    bounds2.fields.push_back(oil3);
+    bounds2.fields.push_back(oil4);
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualOneRangeExclusive) {
+    // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil1;
+    oil1.name = "a";
+    oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil2;
+    oil2.name = "b";
+    oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds1;
+    bounds1.fields.push_back(oil1);
+    bounds1.fields.push_back(oil2);
+
+    // Second set of bounds identical except for (4, 5] changed to (4, 5):
+    //    {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil3;
+    oil3.name = "a";
+    oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, false));
+
+    OrderedIntervalList oil4;
+    oil4.name = "b";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds2;
+    bounds2.fields.push_back(oil3);
+    bounds2.fields.push_back(oil4);
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualExtraInterval) {
+    // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil1;
+    oil1.name = "a";
+    oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil2;
+    oil2.name = "b";
+    oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds1;
+    bounds1.fields.push_back(oil1);
+    bounds1.fields.push_back(oil2);
+
+    // Second set of bounds has an additional interval for field 'a':
+    //    {a: [[1, 3), (4, 5], (6, 7)], b: [[1,1]]}
+    OrderedIntervalList oil3;
+    oil3.name = "a";
+    oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+    oil3.intervals.push_back(Interval(BSON("" << 6 << "" << 7), false, false));
+
+    OrderedIntervalList oil4;
+    oil4.name = "b";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds2;
+    bounds2.fields.push_back(oil3);
+    bounds2.fields.push_back(oil4);
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, BoundsNotEqualExtraField) {
+    // First set of bounds: {a: [[1, 3), (4, 5]], b: [[1,1]]}
+    OrderedIntervalList oil1;
+    oil1.name = "a";
+    oil1.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil1.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil2;
+    oil2.name = "b";
+    oil2.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds1;
+    bounds1.fields.push_back(oil1);
+    bounds1.fields.push_back(oil2);
+
+    // Second set of bounds has an additional field 'c':
+    //    {a: [[1, 3), (4, 5]], b: [[1,1]], c: [[1]]}
+    OrderedIntervalList oil3;
+    oil3.name = "a";
+    oil3.intervals.push_back(Interval(BSON("" << 1 << "" << 3), true, false));
+    oil3.intervals.push_back(Interval(BSON("" << 4 << "" << 5), false, true));
+
+    OrderedIntervalList oil4;
+    oil4.name = "b";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    OrderedIntervalList oil5;
+    oil4.name = "c";
+    oil4.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+
+    IndexBounds bounds2;
+    bounds2.fields.push_back(oil3);
+    bounds2.fields.push_back(oil4);
+    bounds2.fields.push_back(oil5);
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualToRegularBounds) {
+    IndexBounds bounds1;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    IndexBounds bounds2;
+    OrderedIntervalList oil;
+    oil.intervals.push_back(Interval(BSON("" << 1 << "" << 1), true, true));
+    bounds2.fields.push_back(oil);
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentStartKey) {
+    IndexBounds bounds1;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    IndexBounds bounds2;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 1);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKey) {
+    IndexBounds bounds1;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    IndexBounds bounds2;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 99);
+    bounds1.endKeyInclusive = false;
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+TEST(IndexBoundsTest, SimpleRangeBoundsNotEqualDifferentEndKeyInclusive) {
+    IndexBounds bounds1;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = false;
+
+    IndexBounds bounds2;
+    bounds1.isSimpleRange = true;
+    bounds1.startKey = BSON("" << 1 << "" << 3);
+    bounds1.endKey = BSON("" << 2 << "" << 4);
+    bounds1.endKeyInclusive = true;
+
+    ASSERT_FALSE(bounds1 == bounds2);
+    ASSERT_TRUE(bounds1 != bounds2);
+}
+
+//
 // Iteration over
 //
 
diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h
index 88309d33e05..9743a989321 100644
--- a/src/mongo/db/query/interval.h
+++ b/src/mongo/db/query/interval.h
@@ -190,4 +190,8 @@ inline bool operator==(const Interval& lhs, const Interval& rhs) {
     return lhs.compare(rhs) == Interval::INTERVAL_EQUALS;
 }
 
+inline bool operator!=(const Interval& lhs, const Interval& rhs) {
+    return !(lhs == rhs);
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index c91b93bb785..fc494f255aa 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -28,11 +28,14 @@
 
 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
 
+#include "mongo/platform/basic.h"
+
 #include "mongo/db/query/planner_access.h"
 
 #include <algorithm>
 #include <vector>
 
+#include "mongo/base/owned_pointer_vector.h"
 #include "mongo/db/matcher/expression_array.h"
 #include "mongo/db/matcher/expression_geo.h"
 #include "mongo/db/matcher/expression_text.h"
@@ -56,6 +59,51 @@ bool isTextNode(const QuerySolutionNode* node) {
     return STAGE_TEXT == node->getType();
 }
 
+/**
+ * Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null.
+ */
+FetchNode* getFetchNode(QuerySolutionNode* node) {
+    if (STAGE_FETCH != node->getType()) {
+        return nullptr;
+    }
+
+    return static_cast<FetchNode*>(node);
+}
+
+/**
+ * If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an
+ * IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns
+ * null.
+ */
+const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) {
+    if (STAGE_IXSCAN == node->getType()) {
+        return static_cast<const IndexScanNode*>(node);
+    } else if (STAGE_FETCH == node->getType()) {
+        invariant(1U == node->children.size());
+        const QuerySolutionNode* child = node->children[0];
+        if (STAGE_IXSCAN == child->getType()) {
+            return static_cast<const IndexScanNode*>(child);
+        }
+    }
+
+    return nullptr;
+}
+
+/**
+ * Takes as input two query solution nodes returned by processIndexScans(). If both are
+ * IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical
+ * (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false.
+ */
+bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) {
+    const IndexScanNode* leftIxscan = getIndexScanNode(lhs);
+    const IndexScanNode* rightIxscan = getIndexScanNode(rhs);
+    if (!leftIxscan || !rightIxscan) {
+        return false;
+    }
+
+    return *leftIxscan == *rightIxscan;
+}
+
 }  // namespace
 
 namespace mongo {
@@ -567,6 +615,61 @@ void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node,
 }
 
 // static
+std::vector<QuerySolutionNode*> QueryPlannerAccess::collapseEquivalentScans(
+    const std::vector<QuerySolutionNode*> scans) {
+    OwnedPointerVector<QuerySolutionNode> ownedScans(scans);
+    invariant(ownedScans.size() > 0);
+
+    // Scans that need to be collapsed will be adjacent to each other in the list due to how we
+    // sort the query predicate. We step through the list, either merging the current scan into
+    // the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't
+    // be merged.
+    OwnedPointerVector<QuerySolutionNode> collapsedScans;
+
+    collapsedScans.push_back(ownedScans.releaseAt(0));
+    for (size_t i = 1; i < ownedScans.size(); ++i) {
+        if (scansAreEquivalent(collapsedScans.back(), ownedScans[i])) {
+            // We collapse the entry from 'ownedScans' into the back of 'collapsedScans'.
+            std::unique_ptr<QuerySolutionNode> collapseFrom(ownedScans.releaseAt(i));
+            FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get());
+            FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back());
+
+            // If there's no filter associated with a fetch node on 'collapseFrom', all we have to
+            // do is clear the filter on the node that we are collapsing into.
+            if (!collapseFromFetch || !collapseFromFetch->filter.get()) {
+                if (collapseIntoFetch) {
+                    collapseIntoFetch->filter.reset();
+                }
+                continue;
+            }
+
+            // If there's no filter associated with a fetch node on the back of the 'collapsedScans'
+            // list, then there's nothing more to do.
+            if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) {
+                continue;
+            }
+
+            // Both the 'from' and 'into' nodes have filters. We join them with an
+            // OrMatchExpression.
+            std::unique_ptr<OrMatchExpression> collapsedFilter =
+                stdx::make_unique<OrMatchExpression>();
+            collapsedFilter->add(collapseFromFetch->filter.release());
+            collapsedFilter->add(collapseIntoFetch->filter.release());
+
+            // Normalize the filter and add it to 'into'.
+            collapseIntoFetch->filter.reset(
+                CanonicalQuery::normalizeTree(collapsedFilter.release()));
+        } else {
+            // Scans are not equivalent and can't be collapsed.
+            collapsedScans.push_back(ownedScans.releaseAt(i));
+        }
+    }
+
+    invariant(collapsedScans.size() > 0);
+    return collapsedScans.release();
+}
+
+// static
 bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
                                            MatchExpression* root,
                                            bool inArrayOperator,
@@ -962,6 +1065,10 @@ QuerySolutionNode* QueryPlannerAccess::buildIndexedOr(const CanonicalQuery& quer
         return NULL;
     }
 
+    // If all index scans are identical, then we collapse them into a single scan. This prevents
+    // us from creating OR plans where the branches of the OR perform duplicate work.
+    ixscanNodes = collapseEquivalentScans(ixscanNodes);
+
     QuerySolutionNode* orResult = NULL;
 
     // An OR of one node is just that node.
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 8ab6bf9d58a..a5844f4fa58 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -265,6 +265,28 @@ public:
                                       std::vector<MatchExpression*>* subnodesOut);
 
     /**
+     * Given a list of OR-related subtrees returned by processIndexScans(), looks for logically
+     * equivalent IndexScanNodes and combines them. This is an optimization to avoid creating
+     * plans that repeat index access work.
+     *
+     * Example:
+     *  Suppose processIndexScans() returns a list of the following three query solutions:
+     *    1) IXSCAN (bounds: {b: [[2,2]]})
+     *    2) FETCH (filter: {d:1}) -> IXSCAN (bounds: {c: [[3,3]]})
+     *    3) FETCH (filter: {e:1}) -> IXSCAN (bounds: {c: [[3,3]]})
+     *  This method would collapse scans #2 and #3, resulting in the following output:
+     *    1) IXSCAN (bounds: {b: [[2,2]]})
+     *    2) FETCH (filter: {$or:[{d:1}, {e:1}]}) -> IXSCAN (bounds: {c: [[3,3]]})
+     *
+     * Used as a helper for buildIndexedOr().
+     *
+     * Takes ownership of 'scans'. The caller assumes ownership of the pointers in the returned
+     * list of QuerySolutionNode*.
+     */
+    static std::vector<QuerySolutionNode*> collapseEquivalentScans(
+        const std::vector<QuerySolutionNode*> scans);
+
+    /**
      * Helper used by buildIndexedAnd and buildIndexedOr.
      *
      * The children of AND and OR nodes are sorted by the index that the subtree rooted at
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 073e60f10fa..8cffd85bd5c 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -662,7 +662,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query,
            << query.root()->toString();
 
     // If there is a GEO_NEAR it must have an index it can use directly.
-    MatchExpression* gnNode = NULL;
+    const MatchExpression* gnNode = NULL;
     if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
         // No index for GEO_NEAR?  No query.
         RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
@@ -677,7 +677,7 @@ Status QueryPlanner::plan(const CanonicalQuery& query,
     }
 
     // Likewise, if there is a TEXT it must have an index it can use directly.
-    MatchExpression* textNode = NULL;
+    const MatchExpression* textNode = NULL;
     if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
         RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
 
diff --git a/src/mongo/db/query/query_planner_common.h b/src/mongo/db/query/query_planner_common.h
index 4fdf8f30815..d62c9c7a3ba 100644
--- a/src/mongo/db/query/query_planner_common.h
+++ b/src/mongo/db/query/query_planner_common.h
@@ -44,9 +44,9 @@ public:
      *
      * If 'out' is not NULL, sets 'out' to the first node of type 'type' encountered.
      */
-    static bool hasNode(MatchExpression* root,
+    static bool hasNode(const MatchExpression* root,
                         MatchExpression::MatchType type,
-                        MatchExpression** out = NULL) {
+                        const MatchExpression** out = NULL) {
         if (type == root->matchType()) {
             if (NULL != out) {
                 *out = root;
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 4768eed4312..9950b295cc1 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -706,6 +706,128 @@ TEST_F(QueryPlannerTest, OrOfAnd6) {
         " b: [[1,1,true,true], [5,5,true,true]]}}}]}}}}");
 }
 
+// We do collapse OR of ANDs if branches of the OR plan are using identical index scans.
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseIndenticalScans) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{$or: [{a:1, b:2}, {a:1, b:2}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: null, node: {ixscan: {pattern: {a: 1, b: 1}},"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+        "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScans) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}},"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+        "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{c: 1, $or: [{a:1, b:2}, {a:1, b:2, d:3}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: {c: 1}, node: {ixscan: {pattern: {a: 1, b: 1}},"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+        "filter: null}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIndenticalScansWithFilter2) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{c: 1, $or: [{a:{$gte:1,$lte:1}, b:2}, {a:1, b:2, d:3}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: {c: 1}, node: {fetch: {filter: null, node: "
+        "{ixscan: {pattern: {a: 1, b: 1}},"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]},"
+        "filter: null}}}}}");
+}
+
+TEST_F(QueryPlannerTest, ContainedOrOfAndCollapseIdenticalScansTwoFilters) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{c: 1, $or: [{a:1, b:2, d:3}, {a:1, b:2, e:4}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: {c: 1}, node: {fetch: {filter: {$or:[{e:4},{d:3}]},"
+        "node: {ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseScansExistingOrFilter) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    runQuery(fromjson("{$or: [{a:1, b:2, $or: [{c:3}, {d:4}]}, {a:1, b:2, e:5}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: {$or: [{e:5},{c:3},{d:4}]}, node: {ixscan: "
+        "{filter: null, pattern: {a: 1, b: 1}, "
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThird) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    addIndex(BSON("c" << 1 << "d" << 1));
+    runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 2}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: null, node: {or: {nodes: ["
+        "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}},"
+        "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+        "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndCollapseTwoScansButNotThirdWithFilters) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    addIndex(BSON("c" << 1 << "d" << 1));
+    runQuery(fromjson("{$or: [{a:1, b:2, e:5}, {c:3, d:4}, {a:1, b:2, f:6}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: null, node: {or: {nodes: ["
+        "{fetch: {filter: {$or: [{f:6},{e:5}]}, node: "
+        "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}}}},"
+        "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+        "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
+TEST_F(QueryPlannerTest, RootedOrOfAndDontCollapseDifferentBounds) {
+    addIndex(BSON("a" << 1 << "b" << 1));
+    addIndex(BSON("c" << 1 << "d" << 1));
+    runQuery(fromjson("{$or: [{a: 1, b: 2}, {c: 3, d: 4}, {a: 1, b: 99}]}"));
+
+    assertNumSolutions(2U);
+    assertSolutionExists("{cscan: {dir: 1}}");
+    assertSolutionExists(
+        "{fetch: {filter: null, node: {or: {nodes: ["
+        "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+        "bounds: {a: [[1,1,true,true]], b: [[2,2,true,true]]}}},"
+        "{ixscan: {pattern: {a: 1, b: 1}, filter: null,"
+        "bounds: {a: [[1,1,true,true]], b: [[99,99,true,true]]}}},"
+        "{ixscan: {pattern: {c: 1, d: 1}, filter: null,"
+        "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}");
+}
+
 // SERVER-13960: properly handle $or with a mix of exact and inexact predicates.
 TEST_F(QueryPlannerTest, OrInexactWithExact) {
     addIndex(BSON("name" << 1));
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 85b0cb7933c..5bc7ff5276c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -578,6 +578,27 @@ QuerySolutionNode* IndexScanNode::clone() const {
     return copy;
 }
 
+namespace {
+
+bool filtersAreEquivalent(const MatchExpression* lhs, const MatchExpression* rhs) {
+    if (!lhs && !rhs) {
+        return true;
+    } else if (!lhs || !rhs) {
+        return false;
+    } else {
+        return lhs->equivalent(rhs);
+    }
+}
+
+}  // namespace
+
+bool IndexScanNode::operator==(const IndexScanNode& other) const {
+    return filtersAreEquivalent(filter.get(), other.filter.get()) &&
+        indexKeyPattern == other.indexKeyPattern && indexIsMultiKey == other.indexIsMultiKey &&
+        direction == other.direction && maxScan == other.maxScan &&
+        addKeyMetadata == other.addKeyMetadata && bounds == other.bounds;
+}
+
 //
 // ProjectionNode
 //
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 8de5dabb40f..c5c6ca36dd2 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -28,6 +28,7 @@
 
 #pragma once
 
+#include <memory>
 
 #include "mongo/db/jsobj.h"
 #include "mongo/db/matcher/expression.h"
@@ -458,6 +459,8 @@ struct IndexScanNode : public QuerySolutionNode {
 
     QuerySolutionNode* clone() const;
 
+    bool operator==(const IndexScanNode& other) const;
+
     BSONObjSet _sorts;
 
     BSONObj indexKeyPattern;
diff --git a/src/mongo/dbtests/query_stage_subplan.cpp b/src/mongo/dbtests/query_stage_subplan.cpp
index 2231240f2c4..ffe592eba90 100644
--- a/src/mongo/dbtests/query_stage_subplan.cpp
+++ b/src/mongo/dbtests/query_stage_subplan.cpp
@@ -26,6 +26,10 @@
  *    then also delete it in the license file.
  */
 
+#include "mongo/platform/basic.h"
+
+#include <memory>
+
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/db_raii.h"
@@ -62,6 +66,21 @@ public:
     }
 
 protected:
+    /**
+     * Parses the json string 'findCmd', specifying a find command, to a CanonicalQuery.
+     */
+    std::unique_ptr<CanonicalQuery> cqFromFindCommand(const std::string& findCmd) {
+        BSONObj cmdObj = fromjson(findCmd);
+
+        const NamespaceString nss("testns.testcoll");
+        bool isExplain = false;
+        auto lpq =
+            unittest::assertGet(LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain));
+
+        auto cq = unittest::assertGet(CanonicalQuery::canonicalize(lpq.release()));
+        return cq;
+    }
+
     OperationContextImpl _txn;
 
 private:
@@ -163,6 +182,254 @@ public:
     }
 };
 
+/**
+ * Unit test the subplan stage's canUseSubplanning() method.
+ */
+class QueryStageSubplanCanUseSubplanning : public QueryStageSubplanBase {
+public:
+    void run() {
+        // We won't try and subplan something that doesn't have an $or.
+        {
+            std::string findCmd = "{find: 'testns', filter: {$and:[{a:1}, {b:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Don't try and subplan if there is no filter.
+        {
+            std::string findCmd = "{find: 'testns'}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // We won't try and subplan two contained ORs.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or:[{a:1}, {b:1}], $or:[{c:1}, {d:1}], e:1}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning if there is a hint.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+                "hint: {a:1, b:1}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning with min.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+                "min: {a:1, b:1}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning with max.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+                "max: {a:2, b:2}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning with tailable.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+                "tailable: true}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning with snapshot.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]},"
+                "snapshot: true}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can use subplanning for rooted $or.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1, b:1}, {c:1, d:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+
+            std::string findCmd2 =
+                "{find: 'testns',"
+                "filter: {$or: [{a:1}, {c:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq2 = cqFromFindCommand(findCmd2);
+            ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq2));
+        }
+
+        // Can use subplanning for a single contained $or.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can use subplanning if the contained $or query has a geo predicate.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {loc: {$geoWithin: {$centerSphere: [[0,0], 1]}},"
+                "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_TRUE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning if the contained $or query also has a $text predicate.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {$text: {$search: 'foo'},"
+                "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+
+        // Can't use subplanning if the contained $or query also has a $near predicate.
+        {
+            std::string findCmd =
+                "{find: 'testns',"
+                "filter: {loc: {$near: [0, 0]},"
+                "e: 1, $or: [{a:1, b:1}, {c:1, d:1}]}}";
+            std::unique_ptr<CanonicalQuery> cq = cqFromFindCommand(findCmd);
+            ASSERT_FALSE(SubplanStage::canUseSubplanning(*cq));
+        }
+    }
+};
+
+/**
+ * Unit test the subplan stage's rewriteToRootedOr() method.
+ */
+class QueryStageSubplanRewriteToRootedOr : public QueryStageSubplanBase {
+public:
+    void run() {
+        // Rewrite (AND (OR a b) e) => (OR (AND a e) (AND b e))
+        {
+            BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1}");
+            StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+            ASSERT_OK(expr.getStatus());
+            std::unique_ptr<MatchExpression> rewrittenExpr =
+                SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+            std::string findCmdRewritten =
+                "{find: 'testns',"
+                "filter: {$or:[{a:1,e:1}, {b:1,e:1}]}}";
+            std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+            ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+        }
+
+        // Rewrite (AND (OR a b) e f) => (OR (AND a e f) (AND b e f))
+        {
+            BSONObj queryObj = fromjson("{$or:[{a:1}, {b:1}], e:1, f:1}");
+            StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+            ASSERT_OK(expr.getStatus());
+            std::unique_ptr<MatchExpression> rewrittenExpr =
+                SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+            std::string findCmdRewritten =
+                "{find: 'testns',"
+                "filter: {$or:[{a:1,e:1,f:1}, {b:1,e:1,f:1}]}}";
+            std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+            ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+        }
+
+        // Rewrite (AND (OR (AND a b) (AND c d) e f) => (OR (AND a b e f) (AND c d e f))
+        {
+            BSONObj queryObj = fromjson("{$or:[{a:1,b:1}, {c:1,d:1}], e:1,f:1}");
+            StatusWithMatchExpression expr = MatchExpressionParser::parse(queryObj);
+            ASSERT_OK(expr.getStatus());
+            std::unique_ptr<MatchExpression> rewrittenExpr =
+                SubplanStage::rewriteToRootedOr(std::move(expr.getValue()));
+
+            std::string findCmdRewritten =
+                "{find: 'testns',"
+                "filter: {$or:[{a:1,b:1,e:1,f:1},"
+                "{c:1,d:1,e:1,f:1}]}}";
+            std::unique_ptr<CanonicalQuery> cqRewritten = cqFromFindCommand(findCmdRewritten);
+
+            ASSERT(rewrittenExpr->equivalent(cqRewritten->root()));
+        }
+    }
+};
+
+/**
+ * Test the subplan stage's ability to answer a contained $or query.
+ */
+class QueryStageSubplanPlanRootedOr : public QueryStageSubplanBase {
+public:
+    void run() {
+        OldClientWriteContext ctx(&_txn, ns());
+        addIndex(BSON("b" << 1 << "a" << 1));
+        addIndex(BSON("c" << 1 << "a" << 1));
+
+        BSONObj query = fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}");
+
+        // Two of these documents match.
+        insert(BSON("_id" << 1 << "a" << 1 << "b" << 2));
+        insert(BSON("_id" << 2 << "a" << 2 << "b" << 2));
+        insert(BSON("_id" << 3 << "a" << 1 << "c" << 3));
+        insert(BSON("_id" << 4 << "a" << 1 << "c" << 4));
+
+        auto cq = unittest::assertGet(CanonicalQuery::canonicalize(ns(), query));
+
+        Collection* collection = ctx.getCollection();
+
+        // Get planner params.
+        QueryPlannerParams plannerParams;
+        fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+        WorkingSet ws;
+        std::unique_ptr<SubplanStage> subplan(
+            new SubplanStage(&_txn, collection, &ws, plannerParams, cq.get()));
+
+        // Plan selection should succeed due to falling back on regular planning.
+        PlanYieldPolicy yieldPolicy(NULL, PlanExecutor::YIELD_MANUAL);
+        ASSERT_OK(subplan->pickBestPlan(&yieldPolicy));
+
+        // Work the stage until it produces all results.
+        size_t numResults = 0;
+        PlanStage::StageState stageState = PlanStage::NEED_TIME;
+        while (stageState != PlanStage::IS_EOF) {
+            WorkingSetID id = WorkingSet::INVALID_ID;
+            stageState = subplan->work(&id);
+            ASSERT_NE(stageState, PlanStage::DEAD);
+            ASSERT_NE(stageState, PlanStage::FAILURE);
+
+            if (stageState == PlanStage::ADVANCED) {
+                ++numResults;
+                WorkingSetMember* member = ws.get(id);
+                ASSERT(member->hasObj());
+                ASSERT(member->obj.value() == BSON("_id" << 1 << "a" << 1 << "b" << 2) ||
+                       member->obj.value() == BSON("_id" << 3 << "a" << 1 << "c" << 3));
+            }
+        }
+
+        ASSERT_EQ(numResults, 2U);
+    }
+};
+
 class All : public Suite {
 public:
     All() : Suite("query_stage_subplan") {}
@@ -170,6 +437,9 @@ public:
     void setupTests() {
         add<QueryStageSubplanGeo2dOr>();
         add<QueryStageSubplanPlanFromCache>();
+        add<QueryStageSubplanCanUseSubplanning>();
+        add<QueryStageSubplanRewriteToRootedOr>();
+        add<QueryStageSubplanPlanRootedOr>();
     }
 };
author	David Storch <david.storch@10gen.com>	2015-05-29 19:15:19 -0400
committer	David Storch <david.storch@10gen.com>	2015-07-10 17:11:16 -0400
commit	15c72c8570c63e2e6ba0a3d339a8286d0be604db (patch)
tree	96cc37b3259e9ce7472132b8c414eb614317c037
parent	9c63b79a8d5c8d19663850f9d668a3581dea77d5 (diff)
download	mongo-15c72c8570c63e2e6ba0a3d339a8286d0be604db.tar.gz