SERVER-10026 SERVER-10471 planner bug fixes from going through tests

author: Hari Khalsa <hkhalsa@10gen.com> 2013-09-20 12:44:35 -0400
committer: Hari Khalsa <hkhalsa@10gen.com> 2013-09-23 10:38:13 -0400
commit: fc8201aec8acb3ee46fde6915702f1269b448c6c (patch)
tree: 2334ca68d9f3031f0d4d44bbeeea5208dde10ca6 /src
parent: ca1fa9d82a3b180f623774f86ab4ee3a8a43b6e3 (diff)
download: mongo-fc8201aec8acb3ee46fde6915702f1269b448c6c.tar.gz
8 files changed, 107 insertions, 57 deletions
diff --git a/src/mongo/db/exec/query_projection.cpp b/src/mongo/db/exec/query_projection.cpp
index 843961abadb..0bb06ca1f4b 100644
--- a/src/mongo/db/exec/query_projection.cpp
+++ b/src/mongo/db/exec/query_projection.cpp
@@ -55,14 +55,14 @@ namespace mongo {
 
             if (_fieldsInclusive) {
                 // We only want stuff in _fields.
-                for (unordered_set<string>::const_iterator it = _fields.begin();
-                     it != _fields.end(); ++it) {
+                for (vector<string>::const_iterator it = _includedFields.begin();
+                     it != _includedFields.end(); ++it) {
                     BSONElement elt;
-                    if (!wsm->getFieldDotted(*it, &elt)) {
-                        // XXX: needs to be propagated better
-                        return Status(ErrorCodes::BadValue, "no field " + *it + " in wsm to proj");
+                    // We can project a field that doesn't exist.  We just ignore it.
+                    // UNITTEST 11738048
+                    if (wsm->getFieldDotted(*it, &elt) && !elt.eoo()) {
+                        bob.append(elt);
                     }
-                    bob.append(elt);
                 }
             }
             else {
@@ -75,7 +75,7 @@ namespace mongo {
                 while (it.more()) {
                     BSONElement elt = it.next();
                     if (!mongoutils::str::equals("_id", elt.fieldName())) {
-                        if (_fields.end() == _fields.find(elt.fieldName())) {
+                        if (_excludedFields.end() == _excludedFields.find(elt.fieldName())) {
                             bob.append(elt);
                         }
                     }
@@ -95,9 +95,13 @@ namespace mongo {
         // _id can be included/excluded separately and is by default included.
         bool _includeID;
 
-        // Either we include all of _fields or we exclude all of _fields.
+        // Either we include all of _includedFields or we exclude all of _excludedFields.
         bool _fieldsInclusive;
-        unordered_set<string> _fields;
+        unordered_set<string> _excludedFields;
+
+        // Fields can be ordered if they're included.
+        // UNITTEST 11738048
+        vector<string> _includedFields;
     };
 
     // static
@@ -118,14 +122,21 @@ namespace mongo {
             }
             else {
                 bool newFieldValue = elt.trueValue();
-                if (qp->_fields.size() > 0) {
+                if (qp->_includedFields.size() > 0 || qp->_excludedFields.size() > 0) {
                     // make sure we're all true or all false otherwise error
                     if (newFieldValue != lastNonIDValue) {
                         return Status(ErrorCodes::BadValue, "Inconsistent projection specs");
                     }
                 }
                 lastNonIDValue = newFieldValue;
-                qp->_fields.insert(elt.fieldName());
+                if (lastNonIDValue) {
+                    // inclusive
+                    qp->_includedFields.push_back(elt.fieldName());
+                }
+                else {
+                    // exclusive
+                    qp->_excludedFields.insert(elt.fieldName());
+                }
             }
         }
 
diff --git a/src/mongo/db/matcher/expression_array.cpp b/src/mongo/db/matcher/expression_array.cpp
index 5dcd3d3dc1e..f7c3a9daf1f 100644
--- a/src/mongo/db/matcher/expression_array.cpp
+++ b/src/mongo/db/matcher/expression_array.cpp
@@ -118,6 +118,12 @@ namespace mongo {
         _debugAddSpace( debug, level );
         debug << path() << " $elemMatch\n";
         _sub->debugString( debug, level + 1 );
+
+        MatchExpression::TagData* td = getTag();
+        if (NULL != td) {
+            debug << " ";
+            td->debugString(&debug);
+        }
     }
 
 
@@ -174,6 +180,12 @@ namespace mongo {
         for ( unsigned i = 0; i < _subs.size(); i++ ) {
             _subs[i]->debugString( debug, level + 1 );
         }
+
+        MatchExpression::TagData* td = getTag();
+        if (NULL != td) {
+            debug << " ";
+            td->debugString(&debug);
+        }
     }
 
 
@@ -234,6 +246,12 @@ namespace mongo {
         for ( size_t i = 0; i < _list.size(); i++ ) {
             _list[i]->debugString( debug, level + 1);
         }
+
+        MatchExpression::TagData* td = getTag();
+        if (NULL != td) {
+            debug << " ";
+            td->debugString(&debug);
+        }
     }
 
     bool AllElemMatchOp::equivalent( const MatchExpression* other ) const {
@@ -271,6 +289,12 @@ namespace mongo {
     void SizeMatchExpression::debugString( StringBuilder& debug, int level ) const {
         _debugAddSpace( debug, level );
         debug << path() << " $size : " << _size << "\n";
+
+        MatchExpression::TagData* td = getTag();
+        if (NULL != td) {
+            debug << " ";
+            td->debugString(&debug);
+        }
     }
 
     bool SizeMatchExpression::equivalent( const MatchExpression* other ) const {
diff --git a/src/mongo/db/matcher/expression_array.h b/src/mongo/db/matcher/expression_array.h
index 36eb861fd42..f5fb1f55d50 100644
--- a/src/mongo/db/matcher/expression_array.h
+++ b/src/mongo/db/matcher/expression_array.h
@@ -80,6 +80,9 @@ namespace mongo {
         virtual ElemMatchObjectMatchExpression* shallowClone() const {
             ElemMatchObjectMatchExpression* e = new ElemMatchObjectMatchExpression();
             e->init(path(), _sub->shallowClone());
+            if ( getTag() ) {
+                e->setTag(getTag()->clone());
+            }
             return e;
         }
 
@@ -110,6 +113,9 @@ namespace mongo {
             for (size_t i = 0; i < _subs.size(); ++i) {
                 e->add(_subs[i]->shallowClone());
             }
+            if ( getTag() ) {
+                e->setTag(getTag()->clone());
+            }
             return e;
         }
 
@@ -133,6 +139,9 @@ namespace mongo {
         virtual SizeMatchExpression* shallowClone() const {
             SizeMatchExpression* e = new SizeMatchExpression();
             e->init(path(), _size);
+            if ( getTag() ) {
+                e->setTag(getTag()->clone());
+            }
             return e;
         }
 
@@ -166,6 +175,9 @@ namespace mongo {
                 e->add(reinterpret_cast<ArrayMatchingMatchExpression*>(
                     _list[i]->shallowClone()));
             }
+            if ( getTag() ) {
+                e->setTag(getTag()->clone());
+            }
             return e;
         }
 
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index 2c5f420078b..beca421af84 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -67,6 +67,13 @@ namespace mongo {
     void normalizeTree(MatchExpression* root) {
         // root->isLogical() is true now.  We care about AND and OR.  Negations currently scare us.
         if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) {
+            // We could have AND of AND of AND.  Make sure we clean up our children before merging
+            // them.
+            // UNITTEST 11738048
+            for (size_t i = 0; i < root->numChildren(); ++i) {
+                normalizeTree(root->getChild(i));
+            }
+
             // If any of our children are of the same logical operator that we are, we remove the
             // child's children and append them to ourselves after we examine all children.
             vector<MatchExpression*> absorbedChildren;
@@ -78,9 +85,10 @@ namespace mongo {
                     for (size_t j = 0; j < child->numChildren(); ++j) {
                         absorbedChildren.push_back(child->getChild(j));
                     }
-                    child->getChildVector()->clear();
                     // TODO(opt): this is possibly n^2-ish
                     root->getChildVector()->erase(root->getChildVector()->begin() + i);
+                    child->getChildVector()->clear();
+                    // Note that this only works because we cleared the child's children
                     delete child;
                     // Don't increment 'i' as the current child 'i' used to be child 'i+1'
                 }
@@ -90,10 +98,6 @@ namespace mongo {
             }
 
             root->getChildVector()->insert(root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end());
-
-            for (size_t i = 0; i < root->numChildren(); ++i) {
-                normalizeTree(root->getChild(i));
-            }
         }
     }
 
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 5515562546d..d9ef7ee0be4 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -70,7 +70,8 @@ namespace mongo {
                 // BSONValue will be useful here.
                 BSONObj dataObj = objFromElement(node->getData());
 
-                if (dataObj.couldBeArray()) {
+                // UNITTEST 11738048
+                if (Array == dataObj.firstElement().type()) {
                     // XXX: build better bounds
                     warning() << "building lazy bounds for " << expr->toString() << endl;
                     interval = allValues();
diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp
index e3b12dcdaa4..1045dc46a6c 100644
--- a/src/mongo/db/query/new_find.cpp
+++ b/src/mongo/db/query/new_find.cpp
@@ -547,6 +547,8 @@ namespace mongo {
             bob.append("n", numResults);
             BSONObj obj = bob.done();
             bb.appendBuf((void*)obj.objdata(), obj.objsize());
+            // The explain output is actually a result.
+            numResults = 1;
         }
 
         // Add the results from the query into the output buffer.
diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp
index 8ca57d64208..754e760bd40 100644
--- a/src/mongo/db/query/plan_enumerator.cpp
+++ b/src/mongo/db/query/plan_enumerator.cpp
@@ -43,7 +43,7 @@ namespace mongo {
         //
 
         _done = false;
-        // cout << "enumerator received root: " << _root->toString() << endl;
+        cout << "enumerator received root: " << _root->toString() << endl;
 
         // If we fail to prepare, there's some OR clause or other index-requiring predicate that
         // doesn't have an index.
@@ -55,6 +55,8 @@ namespace mongo {
             _assignedCounter.resize(_leavesRequireIndex.size(), 0);
         }
 
+        cout << "prepped enum base tree " << _root->toString() << endl;
+
         //
         // Final initialization
         //
@@ -73,7 +75,7 @@ namespace mongo {
         // each getNext.
 
         for (size_t i = 0; i < _leavesRequireIndex.size(); ++i) {
-            // cout << "Leaf requires index: " << _leavesRequireIndex[i]->toString();
+            cout << "Leaf requires index: " << _leavesRequireIndex[i]->toString();
 
             // XXX: this is a slow lookup due to str stuff
             PredicateMap::const_iterator pmit = _pm.find(_leavesRequireIndex[i]->path().toString());
@@ -100,6 +102,8 @@ namespace mongo {
             _leavesRequireIndex[i]->setTag(tag);
         }
 
+        cout << "enum tag iter tree " << _root->toString() << endl;
+
         // Move to next index.
         size_t carry = 1;
         for (size_t i = 0; i < _assignedCounter.size(); ++i) {
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index d616150849d..0026509400c 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -208,6 +208,12 @@ namespace mongo {
         IndexBoundsBuilder::translate(expr, direction, &oil, exact);
         // TODO(opt): this is a surplus copy, could build right in the original
         isn->bounds.fields.push_back(oil);
+
+        // Pad the remaining fields if it's a compound index.
+        while (it.more()) {
+            isn->bounds.fields.push_back(IndexBoundsBuilder::allValuesForField(it.next()));
+        }
+
         return isn;
     }
 
@@ -357,8 +363,10 @@ namespace mongo {
                 // This is the node we're about to return.
                 QuerySolutionNode* andResult;
 
+                // We must use an index for at least one child of the AND.
+                verify(theAnd->children.size() >= 1);
+
                 // Short-circuit: an AND of one child is just the child.
-                verify(theAnd->children.size() > 0);
                 if (theAnd->children.size() == 1) {
                     andResult = theAnd->children[0];
                     theAnd->children.clear();
@@ -434,7 +442,7 @@ namespace mongo {
                             // XXX: Do we delete the curChild-th child??
                         }
                         else {
-                            // We keep curChild in the AND for affixing later.
+                            // We keep curChild in the OR for affixing later as a filter.
                             ++curChild;
                         }
                         continue;
@@ -503,7 +511,7 @@ namespace mongo {
                             // XXX: Do we delete the curChild-th child??
                         }
                         else {
-                            // We keep curChild in the AND for affixing later.
+                            // We keep curChild in the OR for affixing later.
                             ++curChild;
                         }
                     }
@@ -514,22 +522,28 @@ namespace mongo {
                     theOr->children.push_back(currentScan.release());
                 }
 
-                // Unlike an AND, an OR cannot have filters hanging off of it.
-                // TODO: Should we verify?
-                if (root->numChildren() > 0) {
+                // Unlike an AND, an OR cannot have filters hanging off of it.  We stop processing
+                // when any of our children lack index tags.  If a node lacks an index tag it cannot
+                // be answered via an index.
+                if (curChild != root->numChildren()) {
                     warning() << "planner OR error, non-indexed branch.";
+                    // XXX: don't verify in prod environment but good for debugging now.
                     verify(0);
                     return NULL;
                 }
 
-                // Short-circuit: the OR of one child is just the child.
-                if (1 == theOr->children.size()) {
-                    QuerySolutionNode* child = theOr->children[0];
-                    theOr->children.clear();
-                    return child;
+                // If there are any nodes still attached to the OR, we can't answer them using the
+                // index, so we put a fetch with filter.
+                if (root->numChildren() > 0) {
+                    FetchNode* fetch = new FetchNode();
+                    fetch->filter = root;
+                    // takes ownership
+                    fetch->child.reset(theOr.release());
+                    return fetch;
+                }
+                else {
+                    return theOr.release();
                 }
-
-                return theOr.release();
             }
             else {
                 // NOT or NOR, can't do anything.
@@ -550,15 +564,6 @@ namespace mongo {
                 bool exact = false;
                 auto_ptr<IndexScanNode> isn(makeIndexScan(indexKeyPatterns[tag->index], root, &exact));
 
-                BSONObjIterator it(isn->indexKeyPattern);
-                // Skip first field, as we've filled out the bounds in makeIndexScan.
-                it.next();
-
-                // The rest is filler for any trailing fields.
-                while (it.more()) {
-                    isn->bounds.fields.push_back(IndexBoundsBuilder::allValuesForField(it.next()));
-                }
-
                 // If the bounds are exact, the set of documents that satisfy the predicate is exactly
                 // equal to the set of documents that the scan provides.
                 //
@@ -580,6 +585,7 @@ namespace mongo {
 
     QuerySolution* makeSolution(const CanonicalQuery& query, MatchExpression* taggedRoot,
                                 const vector<BSONObj>& indexKeyPatterns) {
+        cout << "about to build solntree from tagged tree:\n" << taggedRoot->toString() << endl;
         QuerySolutionNode* solnRoot = buildSolutionTree(taggedRoot, indexKeyPatterns);
         if (NULL == solnRoot) { return NULL; }
 
@@ -715,7 +721,7 @@ namespace mongo {
 
         // Figure out how useful each index is to each predicate.
         rateIndices(relevantIndices, &predicates);
-        // dumpPredMap(predicates);
+        dumpPredMap(predicates);
 
         //
         // Planner Section 2: Use predicate/index data to output sets of indices that we can use.
@@ -740,32 +746,18 @@ namespace mongo {
             // this plan.  If not, add a fetch.
             //
 
-            if (!query.getParsed().getProj().isEmpty()) {
-                warning() << "Can't deal with proj yet" << endl;
-            }
-            else {
-                // Note that we need a fetch, possibly tack on to end?
-            }
-
             //
             // Planner Section 5: Sort.  If we're sorting, see if the plan gives us a sort for
             // free.  If not, add a sort.
             //
 
-            if (!query.getParsed().getSort().isEmpty()) {
-            }
-            else {
-                // Note that we need a sort, possibly tack on to end?  may want to see if sort is
-                // covered and then tack fetch on after the covered sort...
-            }
-
             //
             // Planner Section 6: Final check.  Make sure that we build a valid solution.
             // TODO: Validate.
             //
 
             if (NULL != soln) {
-                // cout << "Adding solution:\n" << soln->toString() << endl;
+                cout << "Adding solution:\n" << soln->toString() << endl;
                 out->push_back(soln);
             }
         }
author	Hari Khalsa <hkhalsa@10gen.com>	2013-09-20 12:44:35 -0400
committer	Hari Khalsa <hkhalsa@10gen.com>	2013-09-23 10:38:13 -0400
commit	fc8201aec8acb3ee46fde6915702f1269b448c6c (patch)
tree	2334ca68d9f3031f0d4d44bbeeea5208dde10ca6 /src
parent	ca1fa9d82a3b180f623774f86ab4ee3a8a43b6e3 (diff)
download	mongo-fc8201aec8acb3ee46fde6915702f1269b448c6c.tar.gz