diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2013-09-18 11:57:21 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2013-09-18 16:42:20 -0400 |
commit | 311ebc33f399b555309ee6eba04af8c605108529 (patch) | |
tree | be50e3cfea0c124759ded943a5d1c7cc5a79910b | |
parent | de25d5b966fae434669df47b41c076445d2303f6 (diff) | |
download | mongo-311ebc33f399b555309ee6eba04af8c605108529.tar.gz |
SERVER-10026 enumeration as strategies, bug fixes galore, build plans
44 files changed, 1587 insertions, 267 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 5a088b45855..9b974356f90 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -43,6 +43,8 @@ env.StaticLibrary( "limit.cpp", "merge_sort.cpp", "or.cpp", + "projection.cpp", + "query_projection.cpp", "skip.cpp", "sort.cpp", "stagedebug_cmd.cpp", diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp index 8c1c495224c..d1a8ad988f6 100644 --- a/src/mongo/db/exec/and_hash.cpp +++ b/src/mongo/db/exec/and_hash.cpp @@ -59,12 +59,12 @@ namespace mongo { // We read the first child into our hash table. if (_shouldScanChildren && (0 == _currentChild)) { - return readFirstChild(); + return readFirstChild(out); } // Probing into our hash table with other children. if (_shouldScanChildren) { - return hashOtherChildren(); + return hashOtherChildren(out); } // Returning results. @@ -93,7 +93,7 @@ namespace mongo { } } - PlanStage::StageState AndHashStage::readFirstChild() { + PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) { verify(_currentChild == 0); WorkingSetID id; @@ -126,6 +126,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == childStatus) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == childStatus) { @@ -136,7 +137,7 @@ namespace mongo { } } - PlanStage::StageState AndHashStage::hashOtherChildren() { + PlanStage::StageState AndHashStage::hashOtherChildren(WorkingSetID* out) { verify(_currentChild > 0); WorkingSetID id; @@ -197,6 +198,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == childStatus) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == childStatus) { diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h index e722312b9fb..48a1667c1f8 100644 --- a/src/mongo/db/exec/and_hash.h +++ b/src/mongo/db/exec/and_hash.h @@ -67,8 +67,8 @@ namespace mongo { virtual PlanStageStats* getStats(); private: - StageState readFirstChild(); - StageState hashOtherChildren(); + StageState readFirstChild(WorkingSetID* out); + StageState hashOtherChildren(WorkingSetID* out); // Not owned by us. WorkingSet* _ws; diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp index 0ffdb43b326..c3241eb8e4b 100644 --- a/src/mongo/db/exec/and_sorted.cpp +++ b/src/mongo/db/exec/and_sorted.cpp @@ -60,7 +60,7 @@ namespace mongo { // If we don't have any nodes that we're work()-ing until they hit a certain DiskLoc... if (0 == _workingTowardRep.size()) { // Get a target DiskLoc. - return getTargetLoc(); + return getTargetLoc(out); } // Move nodes toward the target DiskLoc. @@ -69,7 +69,7 @@ namespace mongo { return moveTowardTargetLoc(out); } - PlanStage::StageState AndSortedStage::getTargetLoc() { + PlanStage::StageState AndSortedStage::getTargetLoc(WorkingSetID* out) { verify(numeric_limits<size_t>::max() == _targetNode); verify(WorkingSet::INVALID_ID == _targetId); verify(DiskLoc() == _targetLoc); @@ -104,6 +104,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == state) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == state) { @@ -200,6 +201,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == state) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == state) { diff --git a/src/mongo/db/exec/and_sorted.h b/src/mongo/db/exec/and_sorted.h index 3e91ddedf16..40b2b1cc3eb 100644 --- a/src/mongo/db/exec/and_sorted.h +++ b/src/mongo/db/exec/and_sorted.h @@ -69,7 +69,7 @@ namespace mongo { private: // Find a node to AND against. - PlanStage::StageState getTargetLoc(); + PlanStage::StageState getTargetLoc(WorkingSetID* out); // Move a child which hasn't advanced to the target node forward. // Returns the target node in 'out' if all children successfully advance to it. diff --git a/src/mongo/db/exec/collection_scan_common.h b/src/mongo/db/exec/collection_scan_common.h index a0ad324bc82..880e7b56fc7 100644 --- a/src/mongo/db/exec/collection_scan_common.h +++ b/src/mongo/db/exec/collection_scan_common.h @@ -34,8 +34,8 @@ namespace mongo { struct CollectionScanParams { enum Direction { - FORWARD, - BACKWARD, + FORWARD = 1, + BACKWARD = -1, }; CollectionScanParams() : start(DiskLoc()), diff --git a/src/mongo/db/exec/fetch.cpp b/src/mongo/db/exec/fetch.cpp index 77bc122558d..0e1f5b7257d 100644 --- a/src/mongo/db/exec/fetch.cpp +++ b/src/mongo/db/exec/fetch.cpp @@ -73,6 +73,7 @@ namespace mongo { // If we asked our parent for a page-in last time work(...) was called, finish the fetch. if (WorkingSet::INVALID_ID != _idBeingPagedIn) { + cout << "fetch completed, id being paged on " << _idBeingPagedIn << endl; return fetchCompleted(out); } @@ -115,6 +116,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == status) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == status) { diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index 54a70b1ca83..859842521fc 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -104,12 +104,15 @@ namespace mongo { _checker.reset(new IndexBoundsChecker(&_params.bounds, _descriptor->keyPattern(), _params.direction)); + + int nFields = _descriptor->keyPattern().nFields(); vector<const BSONElement*> key; vector<bool> inc; + key.resize(nFields); + inc.resize(nFields); _checker->getStartKey(&key, &inc); _btreeCursor->seek(key, inc); - int nFields = _descriptor->keyPattern().nFields(); _keyElts.resize(nFields); _keyEltsInc.resize(nFields); } @@ -121,8 +124,12 @@ namespace mongo { // Note that we're not calling next() here. } else { - _indexCursor->next(); - checkEnd(); + // You're allowed to call work() even if the stage is EOF, but we can't call + // _indexCursor->next() if we're EOF. + if (!isEOF()) { + _indexCursor->next(); + checkEnd(); + } } if (isEOF()) { return PlanStage::IS_EOF; } diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h index 5bb58e82b2f..eded608e581 100644 --- a/src/mongo/db/exec/index_scan.h +++ b/src/mongo/db/exec/index_scan.h @@ -31,6 +31,7 @@ #include "mongo/db/exec/plan_stage.h" #include "mongo/db/diskloc.h" #include "mongo/db/index/btree_index_cursor.h" +#include "mongo/db/index/index_access_method.h" #include "mongo/db/jsobj.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/query/index_bounds.h" diff --git a/src/mongo/db/exec/limit.cpp b/src/mongo/db/exec/limit.cpp index 4fef72eed4e..e060e3f4835 100644 --- a/src/mongo/db/exec/limit.cpp +++ b/src/mongo/db/exec/limit.cpp @@ -54,6 +54,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == status) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == status) { diff --git a/src/mongo/db/exec/merge_sort.cpp b/src/mongo/db/exec/merge_sort.cpp index 085d54a29cb..f3e8ee24208 100644 --- a/src/mongo/db/exec/merge_sort.cpp +++ b/src/mongo/db/exec/merge_sort.cpp @@ -122,6 +122,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == code) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == code) { diff --git a/src/mongo/db/exec/or.cpp b/src/mongo/db/exec/or.cpp index 364a8344cab..0cb762be433 100644 --- a/src/mongo/db/exec/or.cpp +++ b/src/mongo/db/exec/or.cpp @@ -109,6 +109,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == childStatus) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == childStatus) { diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h index b761c836c54..ff6dc6d5890 100644 --- a/src/mongo/db/exec/plan_stage.h +++ b/src/mongo/db/exec/plan_stage.h @@ -136,6 +136,26 @@ namespace mongo { NEED_FETCH, }; + static string stateStr(const StageState& state) { + if (ADVANCED == state) { + return "ADVANCED"; + } + else if (IS_EOF == state) { + return "IS_EOF"; + } + else if (NEED_TIME == state) { + return "NEED_TIME"; + } + else if (NEED_FETCH == state) { + return "NEED_FETCH"; + } + else { + verify(FAILURE == state); + return "FAILURE"; + } + } + + /** * Perform a unit of work on the query. Ask the stage to produce the next unit of output. * Stage returns StageState::ADVANCED if *out is set to the next unit of output. Otherwise, diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp new file mode 100644 index 00000000000..f7e0edde052 --- /dev/null +++ b/src/mongo/db/exec/projection.cpp @@ -0,0 +1,87 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/diskloc.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/exec/projection.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher/expression.h" + +namespace mongo { + + ProjectionStage::ProjectionStage(QueryProjection* projection, WorkingSet* ws, PlanStage* child, + const MatchExpression* filter) + : _projection(projection), _ws(ws), _child(child), _filter(filter) { } + + ProjectionStage::~ProjectionStage() { } + + bool ProjectionStage::isEOF() { return _child->isEOF(); } + + PlanStage::StageState ProjectionStage::work(WorkingSetID* out) { + ++_commonStats.works; + + if (isEOF()) { return PlanStage::IS_EOF; } + WorkingSetID id; + StageState status = _child->work(&id); + + if (PlanStage::ADVANCED == status) { + WorkingSetMember* member = _ws->get(id); + Status status = _projection->project(member); + if (!status.isOK()) { return PlanStage::FAILURE; } + *out = id; + } + else if (PlanStage::NEED_FETCH == status) { + *out = id; + } + + return status; + } + + void ProjectionStage::prepareToYield() { + ++_commonStats.yields; + _child->prepareToYield(); + } + + void ProjectionStage::recoverFromYield() { + ++_commonStats.unyields; + _child->recoverFromYield(); + } + + void ProjectionStage::invalidate(const DiskLoc& dl) { + ++_commonStats.invalidates; + _child->invalidate(dl); + } + + PlanStageStats* ProjectionStage::getStats() { + _commonStats.isEOF = isEOF(); + auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats)); + ret->children.push_back(_child->getStats()); + return ret.release(); + } + +} // namespace mongo diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h new file mode 100644 index 00000000000..ade2bbfab84 --- /dev/null +++ b/src/mongo/db/exec/projection.h @@ -0,0 +1,71 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/diskloc.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/exec/query_projection.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher/expression.h" + +namespace mongo { + + /** + * This stage computes a projection. + */ + class ProjectionStage : public PlanStage { + public: + ProjectionStage(QueryProjection* projection, WorkingSet* ws, PlanStage* child, + const MatchExpression* filter); + virtual ~ProjectionStage(); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl); + + PlanStageStats* getStats(); + + private: + scoped_ptr<QueryProjection> _projection; + + // _ws is not owned by us. + WorkingSet* _ws; + scoped_ptr<PlanStage> _child; + + // The filter is not owned by us. + const MatchExpression* _filter; + + // Stats + CommonStats _commonStats; + }; + +} // namespace mongo diff --git a/src/mongo/db/query/query_projection.cpp b/src/mongo/db/exec/query_projection.cpp index 5936761120f..843961abadb 100644 --- a/src/mongo/db/query/query_projection.cpp +++ b/src/mongo/db/exec/query_projection.cpp @@ -26,7 +26,7 @@ * it in the license file. */ -#include "mongo/db/query/query_projection.h" +#include "mongo/db/exec/query_projection.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/jsobj.h" @@ -43,11 +43,11 @@ namespace mongo { public: virtual ~InclExclProjection() { } - Status project(const WorkingSetMember& wsm, BSONObj* out) { + Status project(WorkingSetMember* wsm) { BSONObjBuilder bob; if (_includeID) { BSONElement elt; - if (!wsm.getFieldDotted("_id", &elt)) { + if (!wsm->getFieldDotted("_id", &elt)) { return Status(ErrorCodes::BadValue, "Couldn't get _id field in proj"); } bob.append(elt); @@ -58,7 +58,8 @@ namespace mongo { for (unordered_set<string>::const_iterator it = _fields.begin(); it != _fields.end(); ++it) { BSONElement elt; - if (!wsm.getFieldDotted(*it, &elt)) { + if (!wsm->getFieldDotted(*it, &elt)) { + // XXX: needs to be propagated better return Status(ErrorCodes::BadValue, "no field " + *it + " in wsm to proj"); } bob.append(elt); @@ -66,20 +67,25 @@ namespace mongo { } else { // We want stuff NOT in _fields. This can't be covered, so we expect an obj. - if (!wsm.hasObj()) { + if (!wsm->hasObj()) { return Status(ErrorCodes::BadValue, "exclusion specified for projection but no obj to iter over"); } - BSONObjIterator it(wsm.obj); + BSONObjIterator it(wsm->obj); while (it.more()) { BSONElement elt = it.next(); - if (_fields.end() == _fields.find(elt.fieldName())) { - bob.append(elt); + if (!mongoutils::str::equals("_id", elt.fieldName())) { + if (_fields.end() == _fields.find(elt.fieldName())) { + bob.append(elt); + } } } } - *out = bob.obj(); + wsm->state = WorkingSetMember::OWNED_OBJ; + wsm->obj = bob.obj(); + wsm->keyData.clear(); + wsm->loc = DiskLoc(); return Status::OK(); } diff --git a/src/mongo/db/query/query_projection.h b/src/mongo/db/exec/query_projection.h index 9b35222c06a..5a7dcc83e3d 100644 --- a/src/mongo/db/query/query_projection.h +++ b/src/mongo/db/exec/query_projection.h @@ -42,9 +42,9 @@ namespace mongo { virtual ~QueryProjection() { } /** - * Compute the projection over the WSM. Place the output in 'out'. + * Compute the projection over the WSM. Place the output in the provided WSM. */ - virtual Status project(const WorkingSetMember& wsm, BSONObj* out) = 0; + virtual Status project(WorkingSetMember* wsm) = 0; /** * This projection handles the inclusion/exclusion syntax of the .find() command. diff --git a/src/mongo/db/exec/skip.cpp b/src/mongo/db/exec/skip.cpp index 36d89eac7cc..7a62bac5479 100644 --- a/src/mongo/db/exec/skip.cpp +++ b/src/mongo/db/exec/skip.cpp @@ -61,6 +61,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == status) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == status) { diff --git a/src/mongo/db/exec/sort.cpp b/src/mongo/db/exec/sort.cpp index c8bff563f2d..cd7523c5826 100644 --- a/src/mongo/db/exec/sort.cpp +++ b/src/mongo/db/exec/sort.cpp @@ -118,6 +118,7 @@ namespace mongo { } else { if (PlanStage::NEED_FETCH == code) { + *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == code) { diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 6b765761ae8..2a4d3adbb39 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -90,6 +90,11 @@ namespace mongo { virtual MatchExpression* getChild( size_t i ) const { return NULL; } /** + * Get all the children of a node + */ + virtual std::vector<MatchExpression*>* getChildVector() { return NULL; } + + /** * Get the path of the leaf. Returns StringData() if there is no path (node is logical). */ virtual const StringData path() const { return StringData(); } diff --git a/src/mongo/db/matcher/expression_array.cpp b/src/mongo/db/matcher/expression_array.cpp index 33c6814d6ce..5dcd3d3dc1e 100644 --- a/src/mongo/db/matcher/expression_array.cpp +++ b/src/mongo/db/matcher/expression_array.cpp @@ -220,7 +220,8 @@ namespace mongo { if ( _list.size() == 0 ) return false; for ( unsigned i = 0; i < _list.size(); i++ ) { - if ( !_list[i]->matchesArray( anArray, NULL ) ) + if ( !static_cast<ArrayMatchingMatchExpression*>(_list[i])->matchesArray( + anArray, NULL ) ) return false; } return true; diff --git a/src/mongo/db/matcher/expression_array.h b/src/mongo/db/matcher/expression_array.h index dbc06e46059..36eb861fd42 100644 --- a/src/mongo/db/matcher/expression_array.h +++ b/src/mongo/db/matcher/expression_array.h @@ -86,6 +86,7 @@ namespace mongo { virtual void debugString( StringBuilder& debug, int level ) const; virtual size_t numChildren() const { return 1; } + virtual MatchExpression* getChild( size_t i ) const { return _sub.get(); } private: @@ -115,6 +116,7 @@ namespace mongo { virtual void debugString( StringBuilder& debug, int level ) const; virtual size_t numChildren() const { return _subs.size(); } + virtual MatchExpression* getChild( size_t i ) const { return _subs[i]; } private: @@ -179,7 +181,10 @@ namespace mongo { virtual bool equivalent( const MatchExpression* other ) const; virtual size_t numChildren() const { return _list.size(); } - virtual ArrayMatchingMatchExpression* getChild( size_t i ) const { return _list[i]; } + + virtual MatchExpression* getChild( size_t i ) const { return _list[i]; } + + virtual std::vector<MatchExpression*>* getChildVector() { return &_list; } const StringData path() const { return _path; } @@ -190,7 +195,7 @@ namespace mongo { StringData _path; ElementPath _elementPath; - std::vector<ArrayMatchingMatchExpression*> _list; + std::vector<MatchExpression*> _list; }; } diff --git a/src/mongo/db/matcher/expression_tree.h b/src/mongo/db/matcher/expression_tree.h index 5536b96d436..31f2a513057 100644 --- a/src/mongo/db/matcher/expression_tree.h +++ b/src/mongo/db/matcher/expression_tree.h @@ -57,8 +57,11 @@ namespace mongo { void clearAndRelease() { _expressions.clear(); } virtual size_t numChildren() const { return _expressions.size(); } + virtual MatchExpression* getChild( size_t i ) const { return _expressions[i]; } + virtual std::vector<MatchExpression*>* getChildVector() { return &_expressions; } + bool equivalent( const MatchExpression* other ) const; protected: @@ -190,6 +193,7 @@ namespace mongo { bool equivalent( const MatchExpression* other ) const; virtual size_t numChildren() const { return 1; } + virtual MatchExpression* getChild( size_t i ) const { return _exp.get(); } virtual void resetTag() { diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 14ae82418e7..223fc274257 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -7,6 +7,7 @@ env.StaticLibrary( source=[ "canonical_query.cpp", "index_bounds_builder.cpp", + "index_tag.cpp", "lite_parsed_query.cpp", "plan_enumerator.cpp", "query_planner.cpp", diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index a012625dfb4..2c5f420078b 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -64,6 +64,40 @@ namespace mongo { return Status::OK(); } + void normalizeTree(MatchExpression* root) { + // root->isLogical() is true now. We care about AND and OR. Negations currently scare us. + if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) { + // If any of our children are of the same logical operator that we are, we remove the + // child's children and append them to ourselves after we examine all children. + vector<MatchExpression*> absorbedChildren; + + for (size_t i = 0; i < root->numChildren();) { + MatchExpression* child = root->getChild(i); + if (child->matchType() == root->matchType()) { + // AND of an AND or OR of an OR. Absorb child's children into ourself. + for (size_t j = 0; j < child->numChildren(); ++j) { + absorbedChildren.push_back(child->getChild(j)); + } + child->getChildVector()->clear(); + // TODO(opt): this is possibly n^2-ish + root->getChildVector()->erase(root->getChildVector()->begin() + i); + delete child; + // Don't increment 'i' as the current child 'i' used to be child 'i+1' + } + else { + ++i; + } + } + + root->getChildVector()->insert(root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end()); + + for (size_t i = 0; i < root->numChildren(); ++i) { + normalizeTree(root->getChild(i)); + } + } + } + + Status CanonicalQuery::init(LiteParsedQuery* lpq) { _pq.reset(lpq); @@ -71,7 +105,10 @@ namespace mongo { StatusWithMatchExpression swme = MatchExpressionParser::parse(_pq->getFilter()); if (!swme.isOK()) { return swme.getStatus(); } - _root.reset(swme.getValue()); + MatchExpression* root = swme.getValue(); + normalizeTree(root); + _root.reset(root); + return Status::OK(); } diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp index bd2ded64e00..ec72bbbc8c3 100644 --- a/src/mongo/db/query/index_bounds.cpp +++ b/src/mongo/db/query/index_bounds.cpp @@ -166,6 +166,9 @@ namespace mongo { void IndexBoundsChecker::getStartKey(vector<const BSONElement*>* valueOut, vector<bool>* inclusiveOut) { + verify(valueOut->size() == _bounds->fields.size()); + verify(inclusiveOut->size() == _bounds->fields.size()); + for (size_t i = 0; i < _bounds->fields.size(); ++i) { (*valueOut)[i] = &_bounds->fields[i].intervals[0].start; (*inclusiveOut)[i] = _bounds->fields[i].intervals[0].startInclusive; diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h index a7a126c31e8..c1c164b4173 100644 --- a/src/mongo/db/query/index_bounds.h +++ b/src/mongo/db/query/index_bounds.h @@ -54,9 +54,29 @@ namespace mongo { * interpret. Previously known as FieldRangeVector. */ struct IndexBounds { + IndexBounds() : isSimpleRange(false) { } + // For each indexed field, the values that the field is allowed to take on. vector<OrderedIntervalList> fields; + /** + * The provided oil is AND-related to the bounds. If we have an existing oil over the + * field, intersect. Otherwise, add it to the right location in 'fields'. + */ + void joinAnd(const OrderedIntervalList& oil, const BSONObj& keyPattern) { + // XXX XXX + verify(0); + } + + /** + * The provided oil is OR-related to the bounds. If we have an existing oil over the + * field, find the union. Otherwise, add it to the right location in 'fields'. + */ + void joinOr(const OrderedIntervalList& oil, const BSONObj& keyPattern) { + // XXX XXX + verify(0); + } + // Debugging check. // We must have as many fields the key pattern does. // The fields must be oriented in the direction we'd encounter them given the indexing diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 147846f3614..5515562546d 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -50,70 +50,94 @@ namespace mongo { return oil; } + Interval IndexBoundsBuilder::allValues() { + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.appendMaxKey(""); + return makeRangeInterval(bob.obj(), true, true); + } + // static - void IndexBoundsBuilder::translate(const LeafMatchExpression* expr, const BSONElement& idxElt, + void IndexBoundsBuilder::translate(const MatchExpression* expr, int direction, OrderedIntervalList* oilOut, bool* exactOut) { Interval interval; bool exact = false; - if (MatchExpression::EQ == expr->matchType()) { - const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); - // We have to copy the data out of the parse tree and stuff it into the index bounds. - // BSONValue will be useful here. - // XXX: This is wrong when idxElt is an array. - // XXX: equality with arrays is weird, see queryutil.cpp:203 - BSONObj dataObj = objFromElement(node->getData()); - verify(dataObj.isOwned()); - interval = makePointInterval(dataObj); - // TODO ALBERTO - // XXX: This is false when idxElt is an array. - exact = true; - } - else if (MatchExpression::LTE == expr->matchType()) { - const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); - BSONObjBuilder bob; - bob.appendMinKey(""); - bob.append(node->getData()); - BSONObj dataObj = bob.obj(); - verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); - exact = true; - } - else if (MatchExpression::LT == expr->matchType()) { - const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); - BSONObjBuilder bob; - bob.appendMinKey(""); - bob.append(node->getData()); - BSONObj dataObj = bob.obj(); - verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, false); - exact = true; - } - else if (MatchExpression::GT == expr->matchType()) { - const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); - BSONObjBuilder bob; - bob.append(node->getData()); - bob.appendMaxKey(""); - BSONObj dataObj = bob.obj(); - verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, false, true); - exact = true; - } - else if (MatchExpression::GTE == expr->matchType()) { - const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); - BSONObjBuilder bob; - bob.append(node->getData()); - bob.appendMaxKey(""); - BSONObj dataObj = bob.obj(); - verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); - exact = true; + if (expr->isLeaf()) { + if (MatchExpression::EQ == expr->matchType()) { + const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); + // We have to copy the data out of the parse tree and stuff it into the index bounds. + // BSONValue will be useful here. + BSONObj dataObj = objFromElement(node->getData()); + + if (dataObj.couldBeArray()) { + // XXX: build better bounds + warning() << "building lazy bounds for " << expr->toString() << endl; + interval = allValues(); + exact = false; + } + else { + verify(dataObj.isOwned()); + interval = makePointInterval(dataObj); + exact = true; + } + } + else if (MatchExpression::LTE == expr->matchType()) { + const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.append(node->getData()); + BSONObj dataObj = bob.obj(); + verify(dataObj.isOwned()); + interval = makeRangeInterval(dataObj, true, true); + exact = true; + } + else if (MatchExpression::LT == expr->matchType()) { + const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.append(node->getData()); + BSONObj dataObj = bob.obj(); + verify(dataObj.isOwned()); + interval = makeRangeInterval(dataObj, true, false); + exact = true; + } + else if (MatchExpression::GT == expr->matchType()) { + const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); + BSONObjBuilder bob; + bob.append(node->getData()); + bob.appendMaxKey(""); + BSONObj dataObj = bob.obj(); + verify(dataObj.isOwned()); + interval = makeRangeInterval(dataObj, false, true); + exact = true; + } + else if (MatchExpression::GTE == expr->matchType()) { + const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); + BSONObjBuilder bob; + bob.append(node->getData()); + bob.appendMaxKey(""); + BSONObj dataObj = bob.obj(); + verify(dataObj.isOwned()); + interval = makeRangeInterval(dataObj, true, true); + exact = true; + } + else { + // XXX: build better bounds + warning() << "building lazy bounds for " << expr->toString() << endl; + interval = allValues(); + exact = false; + } } else { - verify(0); + // XXX: build better bounds + verify(expr->isArray()); + warning() << "building lazy bounds for " << expr->toString() << endl; + interval = allValues(); + exact = false; } - if (-1 == idxElt.number()) { + if (-1 == direction) { reverseInterval(&interval); } diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 85549f83555..96c8ea57f78 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -46,10 +46,13 @@ namespace mongo { static OrderedIntervalList allValuesForField(const BSONElement& elt); /** - * Turn the LeafMatchExpression in 'expr' into a set of index bounds. The field that 'expr' - * is concerned with is indexed according to 'idxElt'. + * Turn the MatchExpression in 'expr' into a set of index bounds. The field that 'expr' + * is concerned with is indexed in the direction 'direction'. + * + * The expression must be a predicate over one field. That is, expr->isLeaf() or + * expr->isArray() must be true, and expr->isLogical() must be false. */ - static void translate(const LeafMatchExpression* expr, const BSONElement& idxElt, + static void translate(const MatchExpression* expr, int direction, OrderedIntervalList* oilOut, bool* exactOut); private: @@ -79,6 +82,8 @@ namespace mongo { * Swap start/end in the provided interval. */ static void reverseInterval(Interval* ival); + + static Interval allValues(); }; } // namespace mongo diff --git a/src/mongo/db/query/index_tag.cpp b/src/mongo/db/query/index_tag.cpp new file mode 100644 index 00000000000..9742d14c1eb --- /dev/null +++ b/src/mongo/db/query/index_tag.cpp @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/query/index_tag.h" + +#include <algorithm> +#include <limits> + +namespace mongo { + + const size_t IndexTag::kNoIndex = std::numeric_limits<size_t>::max(); + + void tagForSort(MatchExpression* tree) { + if (!tree->isLeaf()) { + size_t myTagValue = IndexTag::kNoIndex; + for (size_t i = 0; i < tree->numChildren(); ++i) { + MatchExpression* child = tree->getChild(i); + tagForSort(child); + IndexTag* childTag = static_cast<IndexTag*>(child->getTag()); + if (NULL != childTag) { + myTagValue = std::min(myTagValue, childTag->index); + } + } + if (myTagValue != IndexTag::kNoIndex) { + tree->setTag(new IndexTag(myTagValue)); + } + } + } + + bool TagComparison(const MatchExpression* lhs, const MatchExpression* rhs) { + IndexTag* lhsTag = static_cast<IndexTag*>(lhs->getTag()); + size_t lhsValue = (NULL == lhsTag) ? IndexTag::kNoIndex : lhsTag->index; + IndexTag* rhsTag = static_cast<IndexTag*>(rhs->getTag()); + size_t rhsValue = (NULL == rhsTag) ? IndexTag::kNoIndex : rhsTag->index; + return lhsValue < rhsValue; + } + + void sortUsingTags(MatchExpression* tree) { + for (size_t i = 0; i < tree->numChildren(); ++i) { + sortUsingTags(tree->getChild(i)); + } + std::vector<MatchExpression*>* children = tree->getChildVector(); + if (NULL != children) { + std::sort(children->begin(), children->end(), TagComparison); + } + } + +} // namespace mongo diff --git a/src/mongo/db/query/index_tag.h b/src/mongo/db/query/index_tag.h index ee8791c38ba..51178af9149 100644 --- a/src/mongo/db/query/index_tag.h +++ b/src/mongo/db/query/index_tag.h @@ -24,7 +24,9 @@ namespace mongo { // XXX class IndexTag : public MatchExpression::TagData { public: + static const size_t kNoIndex; + IndexTag() : index(kNoIndex) {} IndexTag(size_t i) : index(i) { } virtual ~IndexTag() { } @@ -41,4 +43,16 @@ namespace mongo { size_t index; }; + /** + * Tags each node of the tree with the lowest numbered indexed that the sub-tree + * rooted at that node uses. + */ + void tagForSort(MatchExpression* tree); + + /** + * Then sorts the tree using its IndexTag()s. The outcome is that nodes that use the same index + * are adjacent to one another. + */ + void sortUsingTags(MatchExpression* tree); + } // namespace mongo diff --git a/src/mongo/db/query/internal_plans.h b/src/mongo/db/query/internal_plans.h index 12bb632c571..71e7b1a9fbf 100644 --- a/src/mongo/db/query/internal_plans.h +++ b/src/mongo/db/query/internal_plans.h @@ -34,7 +34,6 @@ #include "mongo/db/exec/index_scan.h" #include "mongo/db/index/catalog_hack.h" #include "mongo/db/query/internal_runner.h" -#include "mongo/db/query/query_projection.h" namespace mongo { diff --git a/src/mongo/db/query/multi_plan_runner.cpp b/src/mongo/db/query/multi_plan_runner.cpp index 23c23407efa..1ab3b2efabb 100644 --- a/src/mongo/db/query/multi_plan_runner.cpp +++ b/src/mongo/db/query/multi_plan_runner.cpp @@ -36,7 +36,7 @@ namespace mongo { MultiPlanRunner::MultiPlanRunner(CanonicalQuery* query) - : _failure(false), _policy(Runner::YIELD_MANUAL) { } + : _failure(false), _policy(Runner::YIELD_MANUAL), _query(query) { } MultiPlanRunner::~MultiPlanRunner() { for (size_t i = 0; i < _candidates.size(); ++i) { @@ -186,9 +186,11 @@ namespace mongo { _candidates[bestChild].root)); _bestPlan->setYieldPolicy(_policy); _alreadyProduced = _candidates[bestChild].results; - // TODO: Normally we'd hand this to the cache, who would own it. - delete _candidates[bestChild].solution; + _bestSolution.reset(_candidates[bestChild].solution); + // XXX + // cout << "Winning solution:\n" << _bestSolution->toString() << endl; + // TODO: // Store the choice we just made in the cache. // QueryPlanCache* cache = PlanCache::get(somenamespace); // cache->add(_query, *_candidates[bestChild]->solution, decision->bestPlanStats); @@ -238,7 +240,7 @@ namespace mongo { if (NULL != _yieldPolicy.get()) { saveState(); _yieldPolicy->yield(record); - if (_failure) { return Runner::RUNNER_DEAD; } + if (_failure) { return false; } restoreState(); } else { diff --git a/src/mongo/db/query/multi_plan_runner.h b/src/mongo/db/query/multi_plan_runner.h index aa5c8c6c631..6f1603c25fa 100644 --- a/src/mongo/db/query/multi_plan_runner.h +++ b/src/mongo/db/query/multi_plan_runner.h @@ -112,6 +112,8 @@ namespace mongo { scoped_ptr<PlanExecutor> _bestPlan; // ...and any results it produced while working toward winning. std::queue<WorkingSetID> _alreadyProduced; + // ...and the solution, for caching. + scoped_ptr<QuerySolution> _bestSolution; // Candidate plans. vector<CandidatePlan> _candidates; diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp index 397da78c73f..8ca57d64208 100644 --- a/src/mongo/db/query/plan_enumerator.cpp +++ b/src/mongo/db/query/plan_enumerator.cpp @@ -38,51 +38,198 @@ namespace mongo { return Status(ErrorCodes::BadValue, "Cannot enumerate indexed plans with no indices"); } - // See "navigation state" assumptions on this class's header. - const set<RelevantIndex>& indexes = _pm.begin()->second.relevant; - const vector<MatchExpression*>& nodes = _pm.begin()->second.nodes; + // + // Legacy Strategy initialization + // - verify(indexes.size()); + _done = false; + // cout << "enumerator received root: " << _root->toString() << endl; - IndexInfo indexInfo; - indexInfo.index = indexes.begin()->index; - indexInfo.node = *nodes.begin(); - _indexes.push_back(indexInfo); + // If we fail to prepare, there's some OR clause or other index-requiring predicate that + // doesn't have an index. + if (!prepLegacyStrategy(_root)) { + _done = true; + } + else { + // We increment this from the beginning and roll forward. + _assignedCounter.resize(_leavesRequireIndex.size(), 0); + } - // Prepare to return the first plan. - _iter = 0; + // + // Final initialization + // return Status::OK(); } bool PlanEnumerator::getNext(MatchExpression** tree) { - dassert(_indexes.size()); + if (_done) { return false; } - // If we have used all indices that are useful in any number predicates, there's - // nothing left to do. - if (_iter == _indexes.size()) { - return false; + // + // Legacy Strategy + // + + // _assignedCounter is the next selection of indies to try. We increment after + // each getNext. + + for (size_t i = 0; i < _leavesRequireIndex.size(); ++i) { + // cout << "Leaf requires index: " << _leavesRequireIndex[i]->toString(); + + // XXX: this is a slow lookup due to str stuff + PredicateMap::const_iterator pmit = _pm.find(_leavesRequireIndex[i]->path().toString()); + verify(pmit != _pm.end()); + const PredicateInfo& pi = pmit->second; + verify(!pi.relevant.empty()); + + // XXX: relevant indices should be array + set<RelevantIndex>::const_iterator it = pi.relevant.begin(); + for (size_t j = 0; j < _assignedCounter[i]; ++j) { + ++it; + } + + // it now points to which idx to assign + + // XXX: ignoring compound indices entirely for now. can only choose a NOT_FIRST index + // if we chose a FIRST index for a node and-related to us. We know what nodes are + // directly AND-related when we create _leavesRequireIndex. Cache there somehow. + // use map of MatchExpression -> some # that represents the and, perhaps the parent. + // Only need for leaves. + verify(RelevantIndex::FIRST == it->relevance); + + IndexTag* tag = new IndexTag(it->index); + _leavesRequireIndex[i]->setTag(tag); } - // Annotate in the output tree, which nodes could use the index we're currently - // working with here. - int indexNum = _indexes[_iter].index; - while (_iter < _indexes.size() && _indexes[_iter].index == indexNum) { + // Move to next index. + size_t carry = 1; + for (size_t i = 0; i < _assignedCounter.size(); ++i) { + if (!carry) break; + + _assignedCounter[i] += carry; - IndexTag* indexTag = new IndexTag(indexNum); - MatchExpression* node = _indexes[_iter].node; - node->setTag(indexTag); + // The max value is the size of the relevant index set + PredicateMap::const_iterator it = _pm.find(_leavesRequireIndex[i]->path().toString()); + verify(it != _pm.end()); + const PredicateInfo& pi = it->second; - ++_iter; + if (_assignedCounter[i] >= pi.relevant.size()) { + _assignedCounter[i] = 0; + carry = 1; + } } - // XXX put the tree back the way it was. - // XXX if we're not really manipulating the clone this is wasted work. + if (carry > 0) { _done = true; } + + // + // _root is now tagged with the indices we use, selected by a strategy above + // + + // tags are cloned w/the tree clone. MatchExpression* ret = _root->shallowClone(); + // clear out copy of tags from tree we walk _root->resetTag(); + // TODO: Document thoroughly and/or move up into the planner. + tagForSort(ret); + sortUsingTags(ret); + *tree = ret; return true; } + // + // Legacy strategy. Each OR clause has one index. + // + + bool PlanEnumerator::hasIndexAvailable(MatchExpression* node) { + PredicateMap::const_iterator it = _pm.find(node->path().toString()); + if (it == _pm.end()) { + return false; + } + // XXX XXX: Check to see if we have any entries that are FIRST. Will not work with compound + // right now. + return it->second.relevant.size() > 0; + } + + bool PlanEnumerator::prepLegacyStrategy(MatchExpression* root) { + if (root->isLeaf() || root->isArray()) { + if (!hasIndexAvailable(root)) { return false; } + _leavesRequireIndex.push_back(root); + return true; + } + else { + verify(root->isLogical()); + if (MatchExpression::OR == root->matchType()) { + for (size_t i = 0; i < root->numChildren(); ++i) { + MatchExpression* child = root->getChild(i); + bool willHaveIndex = prepLegacyStrategy(root->getChild(i)); + if (!willHaveIndex) { + warning() << "OR child " << child->toString() << " won't have idx"; + return false; + } + } + // Each of our children has an index so we'll have an index. + return true; + } + else if (MatchExpression::AND == root->matchType()) { + MatchExpression* expressionToIndex = NULL; + bool indexAssignedToAtLeastOneChild = false; + + // XXX: is this non-deterministic depending on the order of clauses of the and? I + // think it is. if we see an OR clause first as a child, those have an index + // assigned, and we hang any further preds off as a filter. if we see a filter + // first, we create an ixscan and AND it with any subsequent OR children. + // + // A solution here is to sort the children in the desired order, so the ORs are + // first, if we're really trying to minimize indices. + + for (size_t i = 0; i < root->numChildren(); ++i) { + MatchExpression* child = root->getChild(i); + + // If the child requires an index, use an index. + // TODO: Text goes here. + if (MatchExpression::GEO_NEAR == child->matchType()) { + // We must use an index in this case. + if (!prepLegacyStrategy(child)) { + return false; + } + indexAssignedToAtLeastOneChild = true; + } + else if (child->isLogical()) { + // We must use an index in this case as well. + + // We've squashed AND-AND and OR-OR into AND/OR respectively, so this should + // be an OR. + verify(MatchExpression::OR == child->matchType()); + if (!prepLegacyStrategy(child)) { + return false; + } + indexAssignedToAtLeastOneChild = true; + } + else { + verify(child->isArray() || child->isLeaf()); + + if (!indexAssignedToAtLeastOneChild && hasIndexAvailable(child)) { + verify(NULL == expressionToIndex); + expressionToIndex = child; + indexAssignedToAtLeastOneChild = true; + } + } + } + + // We've only filled out expressionToIndex if it has an index available. + if (NULL != expressionToIndex) { + verify(prepLegacyStrategy(expressionToIndex)); + } + + return indexAssignedToAtLeastOneChild; + } + else { + warning() << "Can't deal w/logical node in enum: " << root->toString(); + verify(0); + return false; + } + } + } + } // namespace mongo diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h index 730343e4a5a..d65cbd2d9f5 100644 --- a/src/mongo/db/query/plan_enumerator.h +++ b/src/mongo/db/query/plan_enumerator.h @@ -66,6 +66,11 @@ namespace mongo { bool getNext(MatchExpression** tree); private: + + // + // Data used by all enumeration strategies + // + // Match expression we're planning for. Not owned by us. MatchExpression* _root; @@ -77,29 +82,38 @@ namespace mongo { const std::vector<BSONObj>& _indices; // - // navigation state (work in progress) + // Enumeration Strategies // - // We intend to enumerate, initially, solely based on the distinct index access - // patterns that a query would accept. The enumeration state, below, will change as we - // progress toward that goal. For now, we simplify by imposing the following - // assumptions + // - // + Each index can help only one predicate in a query - // + There is only one index that can help a query + // Legacy strategy. // - // TODO: Relax the above + // The legacy strategy assigns the absolute fewest number of indices require to satisfy a + // query. Some predicates require an index (GEO_NEAR and TEXT). Each branch of an OR requires + // an index. // - // List of all the useful indices and which node in the match expression each of them - // applies to. - struct IndexInfo{ - int index; - MatchExpression* node; - }; - vector<IndexInfo> _indexes; + // Which leaves require an index? + vector<MatchExpression*> _leavesRequireIndex; + + // For each leaf, a counter of which index we've assigned so far. + vector<size_t> _assignedCounter; + + // Are we done with the legacy strategy? + bool _done; + + /** + * Fill out _leavesRequireIndex such that each OR clause and each index-requiring leaf has + * an index. If there are no OR clauses, we use only one index. + */ + bool prepLegacyStrategy(MatchExpression* root); + + /** + * Does the provided node have any indices that can be used to answer it? + */ + bool hasIndexAvailable(MatchExpression* node); - // Iterator over _indices. Points to the next index to be used when getNext is called. - size_t _iter; + // XXX TODO: Add a dump() or toString() for legacy strategy. }; } // namespace mongo diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index 793394f2367..966f7211c3d 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -97,6 +97,13 @@ namespace mongo { for (;;) { WorkingSetID id; PlanStage::StageState code = _root->work(&id); + /* + cout << "gotNext state " << PlanStage::stateStr(code); + if (PlanStage::ADVANCED == code || PlanStage::NEED_FETCH == code) { + cout << " wsid " << id; + } + cout << endl; + */ if (PlanStage::ADVANCED == code) { WorkingSetMember* member = _workingSet->get(id); diff --git a/src/mongo/db/query/plan_ranker.cpp b/src/mongo/db/query/plan_ranker.cpp index b89ff216f71..dc395e48096 100644 --- a/src/mongo/db/query/plan_ranker.cpp +++ b/src/mongo/db/query/plan_ranker.cpp @@ -85,7 +85,10 @@ namespace mongo { } else { // This is a placeholder for better ranking logic. - return static_cast<double>(stats.common.advanced) + // + // We start all scores at 1. Our "no plan selected" score is 0 and we want all plans to + // be greater than that. + return 1 + static_cast<double>(stats.common.advanced) / static_cast<double>(stats.common.works); } } diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 4d5eac4ab53..a1ea7d12d2c 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -100,8 +100,8 @@ namespace mongo { // We're looking at the first element in the index. We can definitely use any index // prefixed by the predicate's field to answer that predicate. - for (PredicateMap::iterator it = predicates->find(elt.fieldName()); - it != predicates->end(); ++it) { + PredicateMap::iterator it = predicates->find(elt.fieldName()); + if (it != predicates->end()) { it->second.relevant.insert(RelevantIndex(i, RelevantIndex::FIRST)); } @@ -110,8 +110,8 @@ namespace mongo { // later. while (kpIt.more()) { elt = kpIt.next(); - for (PredicateMap::iterator it = predicates->find(elt.fieldName()); - it != predicates->end(); ++it) { + PredicateMap::iterator it = predicates->find(elt.fieldName()); + if (it != predicates->end()) { it->second.relevant.insert(RelevantIndex(i, RelevantIndex::NOT_FIRST)); } } @@ -137,6 +137,8 @@ namespace mongo { soln->filter.reset(query.root()->shallowClone()); // BSONValue, where are you? soln->filterData = query.getQueryObj(); + verify(soln->filterData.isOwned()); + soln->ns = query.ns(); // Make the (only) node, a collection scan. CollectionScanNode* csn = new CollectionScanNode(); @@ -145,15 +147,40 @@ namespace mongo { csn->tailable = tailable; const BSONObj& sortObj = query.getParsed().getSort(); + + QuerySolutionNode* solnRoot = csn; + // TODO: We need better checking for this. Should be done in CanonicalQuery and we should // be able to assume it's correct. if (!sortObj.isEmpty()) { BSONElement natural = sortObj.getFieldDotted("$natural"); - csn->direction = natural.numberInt() >= 0 ? 1 : -1; + if (!natural.eoo()) { + csn->direction = natural.numberInt() >= 0 ? 1 : -1; + } + else { + SortNode* sort = new SortNode(); + sort->pattern = sortObj; + sort->child.reset(csn); + solnRoot = sort; + } } - // Add this solution to the list of solutions. - soln->root.reset(csn); + if (!query.getParsed().getProj().isEmpty()) { + ProjectionNode* proj = new ProjectionNode(); + proj->projection = query.getParsed().getProj(); + proj->child.reset(solnRoot); + solnRoot = proj; + } + + if (0 != query.getParsed().getSkip()) { + SkipNode* skip = new SkipNode(); + skip->skip = query.getParsed().getSkip(); + skip->child.reset(solnRoot); + solnRoot = skip; + } + + soln->root.reset(solnRoot); + // cout << "Outputting collscan " << soln->toString() << endl; return soln.release(); } @@ -164,92 +191,470 @@ namespace mongo { FilterInfo(MatchExpression* f, int c) : filterNode(f), currChild(c) {} }; - // TODO: Document when this settles - QuerySolution* makeIndexedPath(const CanonicalQuery& query, - const vector<BSONObj>& indexKeyPatterns, - MatchExpression* filter) { + IndexScanNode* makeIndexScan(const BSONObj& indexKeyPattern, MatchExpression* expr, bool* exact) { + IndexScanNode* isn = new IndexScanNode(); + isn->indexKeyPattern = indexKeyPattern; - auto_ptr<QuerySolution> soln(new QuerySolution()); + if (indexKeyPattern.firstElement().fieldName() != expr->path().toString()) { + // cout << "indexkp fn is " << indexKeyPattern.firstElement().fieldName() << " path is " << expr->path().toString() << endl; + verify(0); + } - // We'll build a tree of solutions nodes as we traverse the filter. For - // now, we're not generating multi-index solutions, so we can hold on to a - // single node here. - IndexScanNode* isn = new IndexScanNode(); + BSONObjIterator it(isn->indexKeyPattern); + BSONElement elt = it.next(); + + OrderedIntervalList oil(expr->path().toString()); + int direction = (elt.numberInt() >= 0) ? 1 : -1; + IndexBoundsBuilder::translate(expr, direction, &oil, exact); + // TODO(opt): this is a surplus copy, could build right in the original + isn->bounds.fields.push_back(oil); + return isn; + } + + QuerySolutionNode* buildSolutionTree(MatchExpression* root, const vector<BSONObj>& indexKeyPatterns) { + if (root->isLogical()) { + // The children of AND and OR nodes are sorted by the index that the subtree rooted at + // that node uses. Child nodes that use the same index are adjacent to one another to + // facilitate grouping of index scans. + // + // See tagForSort and sortUsingTags in index_tag.h + if (MatchExpression::AND == root->matchType()) { + auto_ptr<AndHashNode> theAnd(new AndHashNode()); + + // Process all IXSCANs, possibly combining them. + auto_ptr<IndexScanNode> currentScan; + size_t currentIndexNumber = IndexTag::kNoIndex; + size_t curChild = 0; + while (curChild < root->numChildren()) { + MatchExpression* child = root->getChild(curChild); + + // No tag, it's not an IXSCAN. We've sorted our children such that the tagged + // children are first, so we stop now. + if (NULL == child->getTag()) { break; } + + IndexTag* ixtag = static_cast<IndexTag*>(child->getTag()); + // If there's a tag it must be valid. + verify(IndexTag::kNoIndex != ixtag->index); + + // TODO(opt): If the child is logical, it could collapse into an ixscan. We + // ignore this for now. + if (child->isLogical()) { + QuerySolutionNode* childSolution = buildSolutionTree(child, indexKeyPatterns); + if (NULL == childSolution) { return NULL; } + theAnd->children.push_back(childSolution); + // The logical sub-tree is responsible for fully evaluating itself. Any + // required filters or fetches are already hung on it. As such, we remove + // the filter branch from our tree. + // XXX: Verify this is the right policy. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // XXX: Do we delete the curChild-th child?? + continue; + } + + // We now know that 'child' can use an index and that it's a predicate over a + // field (leaf). There is no current index scan, make the current scan this + // child. + if (NULL == currentScan.get()) { + verify(IndexTag::kNoIndex == currentIndexNumber); + currentIndexNumber = ixtag->index; + + bool exact = false; + currentScan.reset(makeIndexScan(indexKeyPatterns[currentIndexNumber], child, &exact)); + + if (exact) { + // The bounds answer the predicate, and we can remove the expression from the root. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // Don't increment curChild. + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + continue; + } + + // If the child uses a different index than the current index scan, and there is + // a valid current index scan, add the current index scan as a child to the AND, + // as we're done with it. + // + // The child then becomes our new current index scan. + + // XXX temporary until we can merge bounds. + bool childUsesNewIndex = true; + + // XXX: uncomment when we can combine ixscans via bounds merging. + // bool childUsesNewIndex = (currentIndexNumber != ixtag->index); + + if (childUsesNewIndex) { + verify(NULL != currentScan.get()); + theAnd->children.push_back(currentScan.release()); + currentIndexNumber = ixtag->index; + + bool exact = false; + currentScan.reset(makeIndexScan(indexKeyPatterns[currentIndexNumber], child, &exact)); + + if (exact) { + // The bounds answer the predicate. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + continue; + } + else { + // The child uses the same index we're currently building a scan for. Merge the + // bounds and filters. + verify(currentIndexNumber == ixtag->index); + + // First, make the bounds. + OrderedIntervalList oil(child->path().toString()); + + // TODO(opt): We can cache this as part of the index rating process + BSONObjIterator kpIt(indexKeyPatterns[currentIndexNumber]); + BSONElement elt = kpIt.next(); + while (elt.fieldName() != oil.name) { + verify(kpIt.more()); + elt = kpIt.next(); + } + verify(!elt.eoo()); + int direction = (elt.numberInt() >= 0) ? 1 : -1; + bool exact = false; + IndexBoundsBuilder::translate(child, direction, &oil, &exact); + + // Merge the bounds with the existing. + currentScan->bounds.joinAnd(oil, indexKeyPatterns[currentIndexNumber]); + + if (exact) { + // The bounds answer the predicate. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + } + } - // We'll do a post order traversal of the filter, which is a n-ary tree. We descend the - // tree keeping track of which child is next to visit. - std::stack<FilterInfo> filterNodes; - FilterInfo rootFilter(filter, 0); - filterNodes.push(rootFilter); - - while (!filterNodes.empty()) { - - FilterInfo& fi = filterNodes.top(); - MatchExpression* filterNode = fi.filterNode; - size_t& currChild = fi.currChild; - if (filterNode->numChildren() == currChild) { - - // Visit leaf or node. If we find an index tag, we compute the bounds and - // fill up the index information on the current IXScan node. - IndexTag* indexTag = static_cast<IndexTag*>(filterNode->getTag()); - bool exactBounds = false; - if (indexTag != NULL) { - - isn->indexKeyPattern = indexKeyPatterns[indexTag->index]; - - // We assume that this is in the same direction of the index. We may later - // change our minds if the query requires sorting in the opposite - // direction. But, right now, the direction of traversal is not in - // question. - isn->direction = 1; - - // TODO: handle combining oils if this is not the first predicate we'eve - // seen for this field. - // TODO: handle compound indexes - OrderedIntervalList oil(filterNode->path().toString()); - IndexBoundsBuilder::translate( - static_cast<LeafMatchExpression*>(filterNode), - isn->indexKeyPattern.firstElement(), - &oil, - &exactBounds); - - // TODO: union or intersect oils - // It can be the case that this is not the first "oil" for a given - // field. That is, there are several predicates agains a same field which - // happens to have a useful index. In that case, we may need to combine - // oils in a "larger" bound to accomodate all the predicates. - vector<OrderedIntervalList>& fields = isn->bounds.fields; - fields.push_back(oil); + // Output the scan we're done with. + if (NULL != currentScan.get()) { + theAnd->children.push_back(currentScan.release()); } - // TODO: possibly trim redundant MatchExpressions nodes - // It can be the case that the bounds for a query will only return exact - // results (as opposed to just limiting the index range in which the results - // lie). When results are exact, we don't need to retest them and thus such - // nodes can be eliminated from the filter. Note that some non-leaf nodes - // (e.g. and's, or's) may be left with single children and can therefore be - // eliminated as well. - isn->filter = filterNode; - - // TODO: Multiple IXScans nodes in a solution - // If this is not the first index tag found, then we add another IXScan node to - // the queryNodes stack and start using $or and $ands in the filter expession - // to tie the IXScan together into a tree of QuerySolutionNode's. - - filterNodes.pop(); + // + // Process all non-indexed predicates. We hang these above the AND with a fetch and + // filter. + // + + // This is the node we're about to return. + QuerySolutionNode* andResult; + + // Short-circuit: an AND of one child is just the child. + verify(theAnd->children.size() > 0); + if (theAnd->children.size() == 1) { + andResult = theAnd->children[0]; + theAnd->children.clear(); + // Deletes theAnd but we cleared the children. + theAnd.reset(); + } + else { + andResult = theAnd.release(); + } + + // If there are any nodes still attached to the AND, we can't answer them using the + // index, so we put a fetch with filter. + if (root->numChildren() > 0) { + FetchNode* fetch = new FetchNode(); + fetch->filter = root; + // takes ownership + fetch->child.reset(andResult); + andResult = fetch; + } + else { + // XXX: If root has no children, who deletes it/owns it? What happens? + } + + return andResult; + } + else if (MatchExpression::OR == root->matchType()) { + auto_ptr<OrNode> theOr(new OrNode()); + + // Process all IXSCANs, possibly combining them. + auto_ptr<IndexScanNode> currentScan; + size_t currentIndexNumber = IndexTag::kNoIndex; + size_t curChild = 0; + while (curChild < root->numChildren()) { + MatchExpression* child = root->getChild(curChild); + + // No tag, it's not an IXSCAN. + if (NULL == child->getTag()) { break; } + + IndexTag* ixtag = static_cast<IndexTag*>(child->getTag()); + // If there's a tag it must be valid. + verify(IndexTag::kNoIndex != ixtag->index); + + // TODO(opt): If the child is logical, it could collapse into an ixscan. We + // ignore this for now. + if (child->isLogical()) { + QuerySolutionNode* childSolution = buildSolutionTree(child, indexKeyPatterns); + if (NULL == childSolution) { return NULL; } + theOr->children.push_back(childSolution); + // The logical sub-tree is responsible for fully evaluating itself. Any + // required filters or fetches are already hung on it. As such, we remove + // the filter branch from our tree. + // XXX: Verify this is the right policy. + // XXX: Do we delete the curChild-th child?? + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + continue; + } + + // We now know that 'child' can use an index and that it's a predicate over a + // field. There is no current index scan, make the current scan this child. + if (NULL == currentScan.get()) { + verify(IndexTag::kNoIndex == currentIndexNumber); + currentIndexNumber = ixtag->index; + + bool exact = false; + currentScan.reset(makeIndexScan(indexKeyPatterns[currentIndexNumber], child, &exact)); + + if (exact) { + // The bounds answer the predicate, and we can remove the expression from the root. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // Don't increment curChild. + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + continue; + } + + // If the child uses a different index than the current index scan, and there is + // a valid current index scan, add the current index scan as a child to the AND, + // as we're done with it. + // + // The child then becomes our new current index scan. + + // XXX temporary until we can merge bounds. + bool childUsesNewIndex = true; + + // XXX: uncomment when we can combine ixscans via bounds merging. + // bool childUsesNewIndex = (currentIndexNumber != ixtag->index); + + if (childUsesNewIndex) { + verify(NULL != currentScan.get()); + theOr->children.push_back(currentScan.release()); + currentIndexNumber = ixtag->index; + + bool exact = false; + currentScan.reset(makeIndexScan(indexKeyPatterns[currentIndexNumber], child, &exact)); + + if (exact) { + // The bounds answer the predicate. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + continue; + } + else { + // The child uses the same index we're currently building a scan for. Merge the + // bounds and filters. + verify(currentIndexNumber == ixtag->index); + + // First, make the bounds. + OrderedIntervalList oil(child->path().toString()); + + // TODO(opt): We can cache this as part of the index rating process + BSONObjIterator kpIt(indexKeyPatterns[currentIndexNumber]); + BSONElement elt = kpIt.next(); + while (elt.fieldName() != oil.name) { + verify(kpIt.more()); + elt = kpIt.next(); + } + verify(!elt.eoo()); + int direction = (elt.numberInt() >= 0) ? 1 : -1; + bool exact = false; + IndexBoundsBuilder::translate(child, direction, &oil, &exact); + + // Merge the bounds with the existing. + currentScan->bounds.joinOr(oil, indexKeyPatterns[currentIndexNumber]); + + if (exact) { + // The bounds answer the predicate. + // TODO(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe optimize later. + root->getChildVector()->erase(root->getChildVector()->begin() + curChild); + + // XXX: Do we delete the curChild-th child?? + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + } + } + + // Output the scan we're done with. + if (NULL != currentScan.get()) { + theOr->children.push_back(currentScan.release()); + } + + // Unlike an AND, an OR cannot have filters hanging off of it. + // TODO: Should we verify? + if (root->numChildren() > 0) { + warning() << "planner OR error, non-indexed branch."; + verify(0); + return NULL; + } + + // Short-circuit: the OR of one child is just the child. + if (1 == theOr->children.size()) { + QuerySolutionNode* child = theOr->children[0]; + theOr->children.clear(); + return child; + } + + return theOr.release(); + } + else { + // NOT or NOR, can't do anything. + return NULL; + } + } + else { + // isArray or isLeaf is true. Either way, it's over one field, and the bounds builder + // deals with it. + if (NULL == root->getTag()) { + // No index to use here, not in the context of logical operator, so we're SOL. + return NULL; } else { - // Continue the traversal. - FilterInfo fiChild(filterNode->getChild(currChild), 0); - filterNodes.push(fiChild); - currChild++; + // Make an index scan over the tagged index #. + IndexTag* tag = static_cast<IndexTag*>(root->getTag()); + + bool exact = false; + auto_ptr<IndexScanNode> isn(makeIndexScan(indexKeyPatterns[tag->index], root, &exact)); + + BSONObjIterator it(isn->indexKeyPattern); + // Skip first field, as we've filled out the bounds in makeIndexScan. + it.next(); + + // The rest is filler for any trailing fields. + while (it.more()) { + isn->bounds.fields.push_back(IndexBoundsBuilder::allValuesForField(it.next())); + } + + // If the bounds are exact, the set of documents that satisfy the predicate is exactly + // equal to the set of documents that the scan provides. + // + // If the bounds are not exact, the set of documents returned from the scan is a + // superset of documents that satisfy the predicate, and we must check the + // predicate. + if (!exact) { + FetchNode* fetch = new FetchNode(); + fetch->filter = root; + fetch->child.reset(isn.release()); + return fetch; + } + + return isn.release(); } } + return NULL; + } + + QuerySolution* makeSolution(const CanonicalQuery& query, MatchExpression* taggedRoot, + const vector<BSONObj>& indexKeyPatterns) { + QuerySolutionNode* solnRoot = buildSolutionTree(taggedRoot, indexKeyPatterns); + if (NULL == solnRoot) { return NULL; } + + // TODO XXX: Solutions need properties, need to use those properties to see when things + // covered, sorts provided, etc. + + // Fetch before proj + bool addFetch = (STAGE_FETCH != solnRoot->getType()); + if (addFetch) { + FetchNode* fetch = new FetchNode(); + fetch->child.reset(solnRoot); + solnRoot = fetch; + } + + if (!query.getParsed().getProj().isEmpty()) { + ProjectionNode* proj = new ProjectionNode(); + proj->projection = query.getParsed().getProj(); + proj->child.reset(solnRoot); + solnRoot = proj; + } + + if (!query.getParsed().getSort().isEmpty()) { + SortNode* sort = new SortNode(); + sort->pattern = query.getParsed().getSort(); + sort->child.reset(solnRoot); + solnRoot = sort; + } + + if (0 != query.getParsed().getSkip()) { + SkipNode* skip = new SkipNode(); + skip->skip = query.getParsed().getSkip(); + skip->child.reset(solnRoot); + solnRoot = skip; + } - soln->root.reset(isn); + auto_ptr<QuerySolution> soln(new QuerySolution()); + soln->filter.reset(taggedRoot); + soln->filterData = query.getQueryObj(); + verify(soln->filterData.isOwned()); + soln->root.reset(solnRoot); + soln->ns = query.ns(); return soln.release(); } + void dumpPredMap(const PredicateMap& predicates) { + for (PredicateMap::const_iterator it = predicates.begin(); it != predicates.end(); ++it) { + cout << "field " << it->first << endl; + const PredicateInfo& pi = it->second; + cout << "\tRelevant indices:\n"; + for (set<RelevantIndex>::iterator si = pi.relevant.begin(); si != pi.relevant.end(); ++si) { + cout << "\t\tidx #" << si->index << " relevance: "; + if (RelevantIndex::FIRST == si->relevance) { + cout << "first"; + } + else { + cout << "second"; + } + } + cout << "\n\tNodes:\n"; + for (size_t i = 0; i < pi.nodes.size(); ++i) { + cout << "\t\t" << pi.nodes[i]->toString(); + } + } + } + + bool hasNode(MatchExpression* root, MatchExpression::MatchType type) { + if (type == root->matchType()) { + return true; + } + for (size_t i = 0; i < root->numChildren(); ++i) { + if (hasNode(root->getChild(i), type)) { + return true; + } + } + return false; + } + // static void QueryPlanner::plan(const CanonicalQuery& query, const vector<BSONObj>& indexKeyPatterns, vector<QuerySolution*>* out) { @@ -277,8 +682,8 @@ namespace mongo { // NOR and NOT we can't handle well with indices. If we see them here, they weren't // rewritten. Just output a collscan for those. - if (hasPredicate(predicates, MatchExpression::NOT) - || hasPredicate(predicates, MatchExpression::NOR)) { + if (hasNode(query.root(), MatchExpression::NOT) + || hasNode(query.root(), MatchExpression::NOR)) { // If there's a near predicate, we can't handle this. // TODO: Should canonicalized query detect this? @@ -300,8 +705,15 @@ namespace mongo { return; } + /* + for (size_t i = 0; i < relevantIndices.size(); ++i) { + cout << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; + } + */ + // Figure out how useful each index is to each predicate. rateIndices(relevantIndices, &predicates); + // dumpPredMap(predicates); // // Planner Section 2: Use predicate/index data to output sets of indices that we can use. @@ -318,7 +730,8 @@ namespace mongo { // to try to rewrite the tree. TODO: Do this for real. We treat the tree as static. // - QuerySolution* soln = makeIndexedPath(query, indexKeyPatterns, rawTree); + QuerySolution* soln = makeSolution(query, rawTree, relevantIndices); + if (NULL == soln) { continue; } // // Planner Section 4: Covering. If we're projecting, See if we get any covering from @@ -350,13 +763,16 @@ namespace mongo { // if (NULL != soln) { + // cout << "Adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } // TODO: Do we always want to offer a collscan solution? if (!hasPredicate(predicates, MatchExpression::GEO_NEAR)) { - out->push_back(makeCollectionScan(query, false)); + QuerySolution* collscan = makeCollectionScan(query, false); + // cout << "default collscan = " << collscan->toString() << endl; + out->push_back(collscan); } } diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 3f19ce812b2..bdc865a6ca8 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -147,8 +147,10 @@ namespace { // TODO check filter QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + boundsEqual(fromjson("{x: [ [5, 5, true, true] ] }"), ixNode->bounds); // TODO check filter } @@ -162,10 +164,14 @@ namespace { getPlanByType(STAGE_COLLSCAN, &collScanSolution); // TODO check filter - QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); - boundsEqual(fromjson("{x: [ [5, 5, true, true] ] }"), ixNode->bounds); + //QuerySolution* indexedSolution; + //getPlanByType(STAGE_FETCH, &indexedSolution); + //FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + //IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + + // XXX: this boundsEqual check is bogus, need bounds on y. + // boundsEqual(fromjson("{x: [ [5, 5, true, true] ] }"), ixNode->bounds); + // TODO check filter } @@ -186,8 +192,10 @@ namespace { // TODO check filter QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + BSONObj bounds = BSON("x" << BSON_ARRAY(BSON_ARRAY(MINKEY << 5 << true << false))); boundsEqual(bounds, ixNode->bounds); // todo check filter @@ -207,8 +215,10 @@ namespace { // TODO check filter QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + BSONObj bounds = BSON("x" << BSON_ARRAY(BSON_ARRAY(MINKEY << 5 << true << true))); boundsEqual(bounds, ixNode->bounds); // todo check filter @@ -228,8 +238,9 @@ namespace { // TODO check filter QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); BSONObj bounds = BSON("x" << BSON_ARRAY(BSON_ARRAY(5 << MAXKEY << false << true))); boundsEqual(bounds, ixNode->bounds); // todo check filter @@ -249,8 +260,9 @@ namespace { // TODO check filter QuerySolution* indexedSolution; - getPlanByType(STAGE_IXSCAN, &indexedSolution); - IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); BSONObj bounds = BSON("x" << BSON_ARRAY(BSON_ARRAY(5 << MAXKEY << true << true))); boundsEqual(bounds, ixNode->bounds); // todo check filter @@ -260,26 +272,76 @@ namespace { // tree operations // - // STOPPED HERE - need to hook up machinery for multiple indexed predicates - // first test is segfaulting - // second is not working (until the machinery is in place) + TEST_F(SingleIndexTest, TwoPredicatesAnding) { + setIndex(BSON("x" << 1)); + runQuery(fromjson("{$and: [ {x: {$gt: 1}}, {x: {$lt: 3}} ] }")); + ASSERT_EQUALS(getNumSolutions(), 2U); + + QuerySolution* collScanSolution; + getPlanByType(STAGE_COLLSCAN, &collScanSolution); + // TODO check filter + + QuerySolution* indexedSolution; + // This is a fetch not an ixscan because our index tagging isn't good so far and we don't + // know that the index is used for the second predicate. + getPlanByType(STAGE_FETCH, &indexedSolution); + FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + boundsEqual(BSON("x" << BSON_ARRAY(BSON_ARRAY(1 << MAXKEY << false << true))), ixNode->bounds); + // TODO: use this when we tag both indices. + // boundsEqual(fromjson("{x: [ [1, 3, false, false] ] }"), ixNode->bounds); + // TODO check filter + } + + TEST_F(SingleIndexTest, SimpleOr) { + setIndex(BSON("a" << 1)); + runQuery(fromjson("{$or: [ {a: 20}, {a: 21}]}")); + ASSERT_EQUALS(getNumSolutions(), 2U); + QuerySolution* indexedSolution = NULL; + getPlanByType(STAGE_FETCH, &indexedSolution); + cout << indexedSolution->toString() << endl; + } + + TEST_F(SingleIndexTest, OrWithAndChild) { + setIndex(BSON("a" << 1)); + runQuery(fromjson("{$or: [ {a: 20}, {$and: [{a:1}, {b:7}]}]}")); + ASSERT_EQUALS(getNumSolutions(), 2U); + QuerySolution* indexedSolution = NULL; + getPlanByType(STAGE_FETCH, &indexedSolution); + cout << indexedSolution->toString() << endl; + } + + // + // Tree operations that require simple tree rewriting. // - // TEST_F(SingleIndexTest, TwoPredicatesAnding) { - // setIndex(BSON("x" << 1)); - // runQuery(fromjson("{$and: [ {$gt: 1}, {$lt: 3} ] }")); - // ASSERT_EQUALS(getNumSolutions(), 2U); - // QuerySolution* collScanSolution; - // getPlanByType(STAGE_COLLSCAN, &collScanSolution); - // // TODO check filter + TEST_F(SingleIndexTest, AndOfAnd) { + setIndex(BSON("x" << 1)); + runQuery(fromjson("{$and: [ {$and: [ {x: 2.5}]}, {x: {$gt: 1}}, {x: {$lt: 3}} ] }")); + ASSERT_EQUALS(getNumSolutions(), 2U); + + QuerySolution* collScanSolution; + getPlanByType(STAGE_COLLSCAN, &collScanSolution); + // TODO check filter + + QuerySolution* indexedSolution; + // This is a fetch not an ixscan because our index tagging isn't good so far and we don't + // know that the index is used for the second predicate. + getPlanByType(STAGE_FETCH, &indexedSolution); + cout << indexedSolution->toString() << endl; + + //FetchNode* fn = static_cast<FetchNode*>(indexedSolution->root.get()); + //IndexScanNode* ixNode = static_cast<IndexScanNode*>(fn->child.get()); + //boundsEqual(BSON("x" << BSON_ARRAY(BSON_ARRAY(1 << MAXKEY << false << true))), ixNode->bounds); + // TODO: use this when we tag both indices. + // boundsEqual(fromjson("{x: [ [1, 3, false, false] ] }"), ixNode->bounds); + // TODO check filter + } - // QuerySolution* indexedSolution; - // getPlanByType(STAGE_IXSCAN, &indexedSolution); - // IndexScanNode* ixNode = static_cast<IndexScanNode*>(indexedSolution->root.get()); - // boundsEqual(fromjson("{x: [ [1, 3, false, false] ] }"), ixNode->bounds); - // // TODO check filter - // } + // STOPPED HERE - need to hook up machinery for multiple indexed predicates + // second is not working (until the machinery is in place) + // // TEST_F(SingleIndexTest, TwoPredicatesOring) { // setIndex(BSON("x" << 1)); // runQuery(fromjson("{$or: [ {a: 1}, {a: 2} ] }")); diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 24b36fadf7d..d5333ec68bb 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -49,8 +49,18 @@ namespace mongo { /** * Internal function called by toString() + * + * TODO: Consider outputting into a BSONObj or builder thereof. */ - virtual void appendToString(stringstream* ss) const = 0; + virtual void appendToString(stringstream* ss, int indent) const = 0; + + protected: + static void addIndent(stringstream* ss, int level) { + for (int i = 0; i < level; ++i) { + *ss << "---"; + } + } + private: MONGO_DISALLOW_COPYING(QuerySolutionNode); }; @@ -73,6 +83,8 @@ namespace mongo { // Any filters in root or below point into this. Must be owned. BSONObj filterData; + string ns; + /** * Output a human-readable string representing the plan. */ @@ -82,7 +94,7 @@ namespace mongo { } stringstream ss; - root->appendToString(&ss); + root->appendToString(&ss, 0); return ss.str(); } private: @@ -94,8 +106,15 @@ namespace mongo { virtual StageType getType() const { return STAGE_COLLSCAN; } - virtual void appendToString(stringstream* ss) const { - *ss << "COLLSCAN ns=" << name << " filter= " << filter->toString() << endl; + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "COLLSCAN"; + addIndent(ss, indent + 1); + *ss << "ns = " << name; + if (NULL != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->toString() << endl; + } } // Name of the namespace. @@ -112,18 +131,96 @@ namespace mongo { MatchExpression* filter; }; + struct AndHashNode : public QuerySolutionNode { + AndHashNode() : filter(NULL) { } + ~AndHashNode() { + for (size_t i = 0; i < children.size(); ++i) { + delete children[i]; + } + } + virtual StageType getType() const { return STAGE_AND_HASH; } + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "AND_HASH"; + if (NULL != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->toString() << endl; + } + for (size_t i = 0; i < children.size(); ++i) { + *ss << "Child " << i << ": "; + children[i]->appendToString(ss, indent + 1); + } + } + MatchExpression* filter; + vector<QuerySolutionNode*> children; + }; + + struct OrNode : public QuerySolutionNode { + OrNode() : dedup(true), filter(NULL) { } + ~OrNode() { + for (size_t i = 0; i < children.size(); ++i) { + delete children[i]; + } + } + virtual StageType getType() const { return STAGE_OR; } + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "OR\n"; + if (NULL != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->toString() << endl; + } + for (size_t i = 0; i < children.size(); ++i) { + addIndent(ss, indent + 1); + *ss << "Child " << i << ":\n"; + children[i]->appendToString(ss, indent + 2); + *ss << endl; + } + } + bool dedup; + MatchExpression* filter; + vector<QuerySolutionNode*> children; + }; + + struct FetchNode : public QuerySolutionNode { + FetchNode() : filter(NULL) { } + virtual StageType getType() const { return STAGE_FETCH; } + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "FETCH\n"; + if (NULL != filter) { + addIndent(ss, indent + 1); + StringBuilder sb; + *ss << "filter:\n"; + filter->debugString(sb, indent + 2); + *ss << sb.str(); + } + addIndent(ss, indent + 1); + *ss << "Child:" << endl; + child->appendToString(ss, indent + 2); + } + MatchExpression* filter; + scoped_ptr<QuerySolutionNode> child; + }; + struct IndexScanNode : public QuerySolutionNode { IndexScanNode() : filter(NULL), limit(0), direction(1) { } virtual StageType getType() const { return STAGE_IXSCAN; } - virtual void appendToString(stringstream* ss) const { - *ss << "IXSCAN kp=" << indexKeyPattern; + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "IXSCAN\n"; + addIndent(ss, indent + 1); + *ss << "keyPattern = " << indexKeyPattern << endl; if (NULL != filter) { - *ss << " filter= " << filter->toString(); + addIndent(ss, indent + 1); + *ss << " filter= " << filter->toString() << endl; } - *ss << " dir = " << direction; - *ss << " bounds = " << bounds.toString(); + addIndent(ss, indent + 1); + *ss << "dir = " << direction << endl; + addIndent(ss, indent + 1); + *ss << "bounds = " << bounds.toString(); } BSONObj indexKeyPattern; @@ -141,4 +238,78 @@ namespace mongo { IndexBounds bounds; }; + struct ProjectionNode : public QuerySolutionNode { + ProjectionNode() { } + virtual StageType getType() const { return STAGE_PROJECTION; } + + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "PROJ\n"; + addIndent(ss, indent + 1); + *ss << "proj = " << projection.toString() << endl; + addIndent(ss, indent + 1); + *ss << "Child:" << endl; + child->appendToString(ss, indent + 2); + } + + BSONObj projection; + scoped_ptr<QuerySolutionNode> child; + // TODO: Filter + }; + + struct SortNode : public QuerySolutionNode { + SortNode() { } + virtual StageType getType() const { return STAGE_SORT; } + + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "SORT\n"; + addIndent(ss, indent + 1); + *ss << "pattern = " << pattern.toString() << endl; + addIndent(ss, indent + 1); + *ss << "Child:" << endl; + child->appendToString(ss, indent + 2); + } + + BSONObj pattern; + scoped_ptr<QuerySolutionNode> child; + // TODO: Filter + }; + + struct LimitNode : public QuerySolutionNode { + LimitNode() { } + virtual StageType getType() const { return STAGE_LIMIT; } + + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "LIMIT\n"; + addIndent(ss, indent + 1); + *ss << "limit = " << limit << endl; + addIndent(ss, indent + 1); + *ss << "Child:" << endl; + child->appendToString(ss, indent + 2); + } + + int limit; + scoped_ptr<QuerySolutionNode> child; + }; + + struct SkipNode : public QuerySolutionNode { + SkipNode() { } + virtual StageType getType() const { return STAGE_SKIP; } + + virtual void appendToString(stringstream* ss, int indent) const { + addIndent(ss, indent); + *ss << "SKIP\n"; + addIndent(ss, indent + 1); + *ss << "skip= " << skip << endl; + addIndent(ss, indent + 1); + *ss << "Child:" << endl; + child->appendToString(ss, indent + 2); + } + + int skip; + scoped_ptr<QuerySolutionNode> child; + }; + } // namespace mongo diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index e11910aec0d..dc2d59f1ceb 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -28,16 +28,21 @@ #include "mongo/db/query/stage_builder.h" +#include "mongo/db/exec/and_hash.h" #include "mongo/db/exec/collection_scan.h" +#include "mongo/db/exec/fetch.h" +#include "mongo/db/exec/index_scan.h" +#include "mongo/db/exec/limit.h" +#include "mongo/db/exec/or.h" +#include "mongo/db/exec/projection.h" +#include "mongo/db/exec/sort.h" +#include "mongo/db/exec/skip.h" +#include "mongo/db/index/catalog_hack.h" +#include "mongo/db/namespace_details.h" namespace mongo { - //static - bool StageBuilder::build(const QuerySolution& solution, PlanStage** rootOut, - WorkingSet** wsOut) { - QuerySolutionNode* root = solution.root.get(); - if (NULL == root) { return false; } - + PlanStage* buildStages(const string& ns, const QuerySolutionNode* root, WorkingSet* ws) { if (STAGE_COLLSCAN == root->getType()) { const CollectionScanNode* csn = static_cast<const CollectionScanNode*>(root); CollectionScanParams params; @@ -45,8 +50,114 @@ namespace mongo { params.tailable = csn->tailable; params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD : CollectionScanParams::BACKWARD; - *wsOut = new WorkingSet(); - *rootOut = new CollectionScan(params, *wsOut, csn->filter); + return new CollectionScan(params, ws, csn->filter); + } + else if (STAGE_IXSCAN == root->getType()) { + const IndexScanNode* ixn = static_cast<const IndexScanNode*>(root); + // + // XXX XXX + // Given that this grabs data from the catalog, we must do this inside of a lock. + // We should change this to take a (ns, index key pattern) pair so that the params + // don't involve any on-disk data, just descriptions thereof. + // XXX XXX + // + IndexScanParams params; + NamespaceDetails* nsd = nsdetails(ns.c_str()); + if (NULL == nsd) { + warning() << "Can't ixscan null ns " << ns << endl; + return NULL; + } + int idxNo = nsd->findIndexByKeyPattern(ixn->indexKeyPattern); + if (-1 == idxNo) { + warning() << "Can't find idx " << ixn->indexKeyPattern.toString() + << "in ns " << ns << endl; + return NULL; + } + params.descriptor = CatalogHack::getDescriptor(nsd, idxNo); + params.bounds = ixn->bounds; + params.direction = ixn->direction; + params.limit = ixn->limit; + return new IndexScan(params, ws, ixn->filter); + } + else if (STAGE_FETCH == root->getType()) { + const FetchNode* fn = static_cast<const FetchNode*>(root); + PlanStage* childStage = buildStages(ns, fn->child.get(), ws); + if (NULL == childStage) { return NULL; } + return new FetchStage(ws, childStage, fn->filter); + } + else if (STAGE_SORT == root->getType()) { + const SortNode* sn = static_cast<const SortNode*>(root); + PlanStage* childStage = buildStages(ns, sn->child.get(), ws); + if (NULL == childStage) { return NULL; } + SortStageParams params; + params.pattern = sn->pattern; + return new SortStage(params, ws, childStage); + } + else if (STAGE_PROJECTION == root->getType()) { + const ProjectionNode* pn = static_cast<const ProjectionNode*>(root); + QueryProjection* proj; + if (!QueryProjection::newInclusionExclusion(pn->projection, &proj).isOK()) { + return NULL; + } + PlanStage* childStage = buildStages(ns, pn->child.get(), ws); + if (NULL == childStage) { + delete proj; + return NULL; + } + return new ProjectionStage(proj, ws, childStage, NULL); + } + else if (STAGE_LIMIT == root->getType()) { + const LimitNode* ln = static_cast<const LimitNode*>(root); + PlanStage* childStage = buildStages(ns, ln->child.get(), ws); + if (NULL == childStage) { return NULL; } + return new LimitStage(ln->limit, ws, childStage); + } + else if (STAGE_SKIP == root->getType()) { + const SkipNode* sn = static_cast<const SkipNode*>(root); + PlanStage* childStage = buildStages(ns, sn->child.get(), ws); + if (NULL == childStage) { return NULL; } + return new SkipStage(sn->skip, ws, childStage); + } + else if (STAGE_AND_HASH == root->getType()) { + const AndHashNode* ahn = static_cast<const AndHashNode*>(root); + auto_ptr<AndHashStage> ret(new AndHashStage(ws, ahn->filter)); + for (size_t i = 0; i < ahn->children.size(); ++i) { + PlanStage* childStage = buildStages(ns, ahn->children[i], ws); + if (NULL == childStage) { return NULL; } + ret->addChild(childStage); + } + return ret.release(); + } + else if (STAGE_OR == root->getType()) { + const OrNode * orn = static_cast<const OrNode*>(root); + auto_ptr<OrStage> ret(new OrStage(ws, orn->dedup, orn->filter)); + for (size_t i = 0; i < orn->children.size(); ++i) { + PlanStage* childStage = buildStages(ns, orn->children[i], ws); + if (NULL == childStage) { return NULL; } + ret->addChild(childStage); + } + return ret.release(); + } + else { + stringstream ss; + root->appendToString(&ss, 0); + warning() << "Could not build exec tree for node " << ss.str() << endl; + return NULL; + } + } + + //static + bool StageBuilder::build(const QuerySolution& solution, PlanStage** rootOut, + WorkingSet** wsOut) { + QuerySolutionNode* root = solution.root.get(); + if (NULL == root) { return false; } + + auto_ptr<WorkingSet> ws(new WorkingSet()); + PlanStage* stageRoot = buildStages(solution.ns, root, ws.get()); + + if (NULL != stageRoot) { + *rootOut = stageRoot; + *wsOut = ws.release(); return true; } else { diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 4f11069b663..98d4de94a22 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -41,6 +41,7 @@ namespace mongo { STAGE_IXSCAN, STAGE_LIMIT, STAGE_OR, + STAGE_PROJECTION, STAGE_SKIP, STAGE_SORT, STAGE_SORT_MERGE, diff --git a/src/mongo/db/storage/extent_manager.cpp b/src/mongo/db/storage/extent_manager.cpp index 04bef34cd3e..5275949ef0f 100644 --- a/src/mongo/db/storage/extent_manager.cpp +++ b/src/mongo/db/storage/extent_manager.cpp @@ -273,7 +273,7 @@ namespace mongo { break; // entire extent could be empty, keep looking } - return e->firstRecord; + return e->lastRecord; } Extent* ExtentManager::getNextExtent( Extent* e ) const { |