diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2013-07-02 20:01:25 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2013-07-05 16:42:23 -0400 |
commit | 3ac4551322eb2307e4957b4a1014f03768b17a82 (patch) | |
tree | a15f6b319e9a65b0b91672bae2e50d34df311254 /src/mongo/db | |
parent | 1961a5d66cee7d9bc102cc2ff6f189c4c4306895 (diff) | |
download | mongo-3ac4551322eb2307e4957b4a1014f03768b17a82.tar.gz |
SERVER-10026 index intersection hashed and sorted
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/exec/and_common-inl.h | 45 | ||||
-rw-r--r-- | src/mongo/db/exec/and_hash.cpp | 211 | ||||
-rw-r--r-- | src/mongo/db/exec/and_hash.h | 84 | ||||
-rw-r--r-- | src/mongo/db/exec/and_sorted.cpp | 199 | ||||
-rw-r--r-- | src/mongo/db/exec/and_sorted.h | 84 | ||||
-rw-r--r-- | src/mongo/db/exec/index_scan.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/exec/stagedebug_cmd.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.h | 18 |
9 files changed, 719 insertions, 4 deletions
diff --git a/src/mongo/db/exec/and_common-inl.h b/src/mongo/db/exec/and_common-inl.h new file mode 100644 index 00000000000..364936e92e9 --- /dev/null +++ b/src/mongo/db/exec/and_common-inl.h @@ -0,0 +1,45 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace mongo { + + class AndCommon { + public: + /** + * If src has any data dest doesn't, add that data to dest. + */ + static void mergeFrom(WorkingSetMember* dest, WorkingSetMember* src) { + verify(dest->hasLoc()); + verify(src->hasLoc()); + verify(dest->loc == src->loc); + + // This is N^2 but N is probably pretty small. Easy enough to revisit. + // Merge key data. + for (size_t i = 0; i < src->keyData.size(); ++i) { + bool found = false; + for (size_t j = 0; j < dest->keyData.size(); ++j) { + if (dest->keyData[j].indexKeyPattern == src->keyData[i].indexKeyPattern) { + found = true; + break; + } + } + if (!found) { dest->keyData.push_back(src->keyData[i]); } + } + } + }; + +} // namespace mongo + diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp new file mode 100644 index 00000000000..2bce7bccbfb --- /dev/null +++ b/src/mongo/db/exec/and_hash.cpp @@ -0,0 +1,211 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/and_hash.h" + +#include "mongo/db/exec/and_common-inl.h" +#include "mongo/db/exec/working_set_common.h" + +namespace mongo { + + AndHashStage::AndHashStage(WorkingSet* ws, Matcher* matcher) + : _ws(ws), _matcher(matcher), _resultIterator(_dataMap.end()), + _shouldScanChildren(true), _currentChild(0) {} + + AndHashStage::~AndHashStage() { + for (size_t i = 0; i < _children.size(); ++i) { delete _children[i]; } + } + + void AndHashStage::addChild(PlanStage* child) { _children.push_back(child); } + + bool AndHashStage::isEOF() { + if (_shouldScanChildren) { return false; } + return _dataMap.end() == _resultIterator; + } + + PlanStage::StageState AndHashStage::work(WorkingSetID* out) { + if (isEOF()) { return PlanStage::IS_EOF; } + + // An AND is either reading the first child into the hash table, probing against the hash + // table with subsequent children, or returning results. + + // We read the first child into our hash table. + if (_shouldScanChildren && (0 == _currentChild)) { + return readFirstChild(); + } + + // Probing into our hash table with other children. + if (_shouldScanChildren) { + return hashOtherChildren(); + } + + // Returning results. + verify(!_shouldScanChildren); + + // Keep the thing we're returning so we can remove it from our internal map later. + DataMap::iterator returnedIt = _resultIterator; + ++_resultIterator; + + WorkingSetID idToReturn = returnedIt->second; + _dataMap.erase(returnedIt); + WorkingSetMember* member = _ws->get(idToReturn); + + // We should check for matching at the end so the matcher can use information in the + // indices of all our children. + if (NULL == _matcher || _matcher->matches(member)) { + *out = idToReturn; + return PlanStage::ADVANCED; + } + else { + _ws->free(idToReturn); + // Skip over the non-matching thing we currently point at. + return PlanStage::NEED_TIME; + } + } + + PlanStage::StageState AndHashStage::readFirstChild() { + verify(_currentChild == 0); + + WorkingSetID id; + StageState childStatus = _children[0]->work(&id); + + if (PlanStage::ADVANCED == childStatus) { + WorkingSetMember* member = _ws->get(id); + + verify(member->hasLoc()); + verify(_dataMap.end() == _dataMap.find(member->loc)); + + _dataMap[member->loc] = id; + return PlanStage::NEED_TIME; + } + else if (PlanStage::IS_EOF == childStatus) { + // Done reading child 0. + _currentChild = 1; + // If our first child was empty, don't scan any others, no possible results. + if (_dataMap.empty()) { + _shouldScanChildren = false; + return PlanStage::IS_EOF; + } + return PlanStage::NEED_TIME; + } + else { + return childStatus; + } + } + + PlanStage::StageState AndHashStage::hashOtherChildren() { + verify(_currentChild > 0); + + WorkingSetID id; + StageState childStatus = _children[_currentChild]->work(&id); + + if (PlanStage::ADVANCED == childStatus) { + WorkingSetMember* member = _ws->get(id); + verify(member->hasLoc()); + if (_dataMap.end() == _dataMap.find(member->loc)) { + // Ignore. It's not in any previous child. + } + else { + // We have a hit. Copy data into the WSM we already have. + _seenMap.insert(member->loc); + WorkingSetMember* olderMember = _ws->get(_dataMap[member->loc]); + AndCommon::mergeFrom(olderMember, member); + } + _ws->free(id); + return PlanStage::NEED_TIME; + } + else if (PlanStage::IS_EOF == childStatus) { + // Finished with a child. + ++_currentChild; + + // Keep elements of _dataMap that are in _seenMap. + DataMap::iterator it = _dataMap.begin(); + while (it != _dataMap.end()) { + if (_seenMap.end() == _seenMap.find(it->first)) { + DataMap::iterator toErase = it; + ++it; + _ws->free(toErase->second); + _dataMap.erase(toErase); + } + else { ++it; } + } + + _seenMap.clear(); + + // _dataMap is now the intersection of the first _currentChild nodes. + + // If we have nothing to AND with after finishing any child, stop. + if (_dataMap.empty()) { + _shouldScanChildren = false; + return PlanStage::IS_EOF; + } + + // We've finished scanning all children. Return results with the next call to work(). + if (_currentChild == _children.size()) { + _shouldScanChildren = false; + _resultIterator = _dataMap.begin(); + } + + return PlanStage::NEED_TIME; + } + else { + // NEED_YIELD or FAILURE. + return childStatus; + } + } + + void AndHashStage::prepareToYield() { + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->prepareToYield(); + } + } + + void AndHashStage::recoverFromYield() { + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->recoverFromYield(); + } + } + + void AndHashStage::invalidate(const DiskLoc& dl) { + if (isEOF()) { return; } + + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->invalidate(dl); + } + + _seenMap.erase(dl); + + // If we're pointing at the DiskLoc, move past it. It will be deleted. + if (_dataMap.end() != _resultIterator && (_resultIterator->first == dl)) { + ++_resultIterator; + } + + DataMap::iterator it = _dataMap.find(dl); + if (_dataMap.end() != it) { + WorkingSetID id = it->second; + WorkingSetMember* member = _ws->get(id); + verify(member->loc == dl); + + // The loc is about to be invalidated. Fetch it and clear the loc. + WorkingSetCommon::fetchAndInvalidateLoc(member); + + // Add the WSID to the to-be-reviewed list in the WS. + _ws->flagForReview(id); + _dataMap.erase(it); + } + } + +} // namespace mongo diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h new file mode 100644 index 00000000000..1cef926a824 --- /dev/null +++ b/src/mongo/db/exec/and_hash.h @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <boost/scoped_ptr.hpp> +#include <vector> + +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/platform/unordered_set.h" + +namespace mongo { + + /** + * Reads from N children, each of which must have a valid DiskLoc. Uses a hash table to + * intersect the outputs of the N children, and outputs the intersection. + * + * Preconditions: Valid DiskLoc. More than one child. + * + * Any DiskLoc that we keep a reference to that is invalidated before we are able to return it + * is fetched and added to the WorkingSet as "flagged for further review." Because this stage + * operates with DiskLocs, we are unable to evaluate the AND for the invalidated DiskLoc, and it + * must be fully matched later. + */ + class AndHashStage : public PlanStage { + public: + AndHashStage(WorkingSet* ws, Matcher* matcher); + virtual ~AndHashStage(); + + void addChild(PlanStage* child); + + virtual StageState work(WorkingSetID* out); + virtual bool isEOF(); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl); + + private: + StageState readFirstChild(); + StageState hashOtherChildren(); + + // Not owned by us. + WorkingSet* _ws; + scoped_ptr<Matcher> _matcher; + + // The stages we read from. Owned by us. + vector<PlanStage*> _children; + + // _dataMap is filled out by the first child and probed by subsequent children. + typedef unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher> DataMap; + DataMap _dataMap; + + // Keeps track of what elements from _dataMap subsequent children have seen. + typedef unordered_set<DiskLoc, DiskLoc::Hasher> SeenMap; + SeenMap _seenMap; + + // Iterator over the members of _dataMap that survive. + DataMap::iterator _resultIterator; + + // True if we're still scanning _children for results. + bool _shouldScanChildren; + + // Which child are we currently working on? + size_t _currentChild; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp new file mode 100644 index 00000000000..650fce0cdd5 --- /dev/null +++ b/src/mongo/db/exec/and_sorted.cpp @@ -0,0 +1,199 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/and_sorted.h" + +#include "mongo/db/exec/and_common-inl.h" +#include "mongo/db/exec/working_set_common.h" + +namespace mongo { + + AndSortedStage::AndSortedStage(WorkingSet* ws, Matcher* matcher) + : _ws(ws), _matcher(matcher), _targetNode(NULL), _targetId(WorkingSet::INVALID_ID), _isEOF(false) + { } + + AndSortedStage::~AndSortedStage() { + for (size_t i = 0; i < _children.size(); ++i) { delete _children[i]; } + } + + void AndSortedStage::addChild(PlanStage* child) { + _children.push_back(child); + } + + bool AndSortedStage::isEOF() { return _isEOF; } + + PlanStage::StageState AndSortedStage::work(WorkingSetID* out) { + if (isEOF()) { return PlanStage::IS_EOF; } + + // If we don't have any nodes that we're work()-ing until they hit a certain DiskLoc... + if (0 == _workingTowardRep.size()) { + // Get a target DiskLoc. + return getTargetLoc(); + } + + // Move nodes toward the target DiskLoc. + // If all nodes reach the target DiskLoc, return it. The next call to work() will set a new + // target. + return moveTowardTargetLoc(out); + } + + PlanStage::StageState AndSortedStage::getTargetLoc() { + verify(NULL == _targetNode); + verify(WorkingSet::INVALID_ID == _targetId); + verify(DiskLoc() == _targetLoc); + + // Pick one, and get a loc to work toward. + WorkingSetID id; + StageState state = _children[0]->work(&id); + + if (PlanStage::ADVANCED == state) { + WorkingSetMember* member = _ws->get(id); + + // AND only works with DiskLocs. If we don't have a loc, something went wrong with + // query planning. + verify(member->hasLoc()); + + // We have a value from one child to AND with. + _targetNode = _children[0]; + _targetId = id; + _targetLoc = member->loc; + + // We have to AND with all other children. + for (size_t i = 1; i < _children.size(); ++i) { + _workingTowardRep.push(_children[i]); + } + + return PlanStage::NEED_TIME; + } + else if (PlanStage::IS_EOF == state || PlanStage::FAILURE == state) { + _isEOF = true; + return state; + } + else { + // NEED_TIME, NEED_YIELD. + return state; + } + } + + PlanStage::StageState AndSortedStage::moveTowardTargetLoc(WorkingSetID* out) { + verify(NULL != _targetNode); + verify(WorkingSet::INVALID_ID != _targetId); + + // We have nodes that haven't hit _targetLoc yet. + PlanStage* next = _workingTowardRep.front(); + WorkingSetID id; + StageState state = next->work(&id); + + if (PlanStage::ADVANCED == state) { + WorkingSetMember* member = _ws->get(id); + + verify(member->hasLoc()); + + if (member->loc == _targetLoc) { + // The front element has hit _targetLoc. Don't move it forward anymore/work on another + // element. + _workingTowardRep.pop(); + AndCommon::mergeFrom(_ws->get(_targetId), member); + _ws->free(id); + + if (0 == _workingTowardRep.size()) { + WorkingSetID toReturn = _targetId; + WorkingSetMember* toMatchTest = _ws->get(toReturn); + + _targetNode = NULL; + _targetId = WorkingSet::INVALID_ID; + _targetLoc = DiskLoc(); + + // Everyone hit it, hooray. Return it, if it matches. + if (NULL == _matcher || _matcher->matches(toMatchTest)) { + *out = toReturn; + return PlanStage::ADVANCED; + } + else { + _ws->free(toReturn); + return PlanStage::NEED_TIME; + } + } + // More children need to be advanced to _targetLoc. + return PlanStage::NEED_TIME; + } + else if (member->loc < _targetLoc) { + // The front element of _workingTowardRep hasn't hit the thing we're AND-ing with + // yet. Try again later. + _ws->free(id); + return PlanStage::NEED_TIME; + } + else { + // member->loc > _targetLoc. + // _targetLoc wasn't successfully AND-ed with the other sub-plans. We toss it and try + // AND-ing with the next value. + _ws->free(_targetId); + _targetNode = next; + _targetLoc = member->loc; + _targetId = id; + _workingTowardRep = queue<PlanStage*>(); + for (size_t i = 0; i < _children.size(); ++i) { + if (next != _children[i]) { + _workingTowardRep.push(_children[i]); + } + } + // Need time to chase after the new _targetLoc. + return PlanStage::NEED_TIME; + } + } + else if (PlanStage::IS_EOF == state || PlanStage::FAILURE == state) { + _isEOF = true; + _ws->free(_targetId); + return state; + } + else { + // NEED_TIME, NEED_YIELD. + return state; + } + } + + void AndSortedStage::prepareToYield() { + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->prepareToYield(); + } + } + + void AndSortedStage::recoverFromYield() { + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->recoverFromYield(); + } + } + + void AndSortedStage::invalidate(const DiskLoc& dl) { + if (isEOF()) { return; } + + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->invalidate(dl); + } + + if (dl == _targetLoc) { + // We're in the middle of moving children forward until they hit _targetLoc, which is no + // longer a valid target. Fetch it, flag for review, and find another _targetLoc. + WorkingSetCommon::fetchAndInvalidateLoc(_ws->get(_targetId)); + _ws->flagForReview(_targetId); + _targetId = WorkingSet::INVALID_ID; + _targetNode = NULL; + _targetLoc = DiskLoc(); + _workingTowardRep = queue<PlanStage*>(); + } + } + +} // namespace mongo diff --git a/src/mongo/db/exec/and_sorted.h b/src/mongo/db/exec/and_sorted.h new file mode 100644 index 00000000000..db671344ced --- /dev/null +++ b/src/mongo/db/exec/and_sorted.h @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <queue> +#include <vector> + +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/platform/unordered_set.h" + +namespace mongo { + + /** + * Reads from N children, each of which must have a valid DiskLoc. Assumes each child produces + * DiskLocs in sorted order. Outputs the intersection of the DiskLocs outputted by the + * children. + * + * Preconditions: Valid DiskLoc. More than one child. + * + * Any DiskLoc that we keep a reference to that is invalidated before we are able to return it + * is fetched and added to the WorkingSet as "flagged for further review." Because this stage + * operates with DiskLocs, we are unable to evaluate the AND for the invalidated DiskLoc, and it + * must be fully matched later. + */ + class AndSortedStage : public PlanStage { + public: + AndSortedStage(WorkingSet* ws, Matcher* matcher); + virtual ~AndSortedStage(); + + void addChild(PlanStage* child); + + virtual StageState work(WorkingSetID* out); + virtual bool isEOF(); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl); + + private: + // Find a node to AND against. + PlanStage::StageState getTargetLoc(); + + // Move a child which hasn't advanced to the target node forward. + // Returns the target node in 'out' if all children successfully advance to it. + PlanStage::StageState moveTowardTargetLoc(WorkingSetID* out); + + // Not owned by us. + WorkingSet* _ws; + scoped_ptr<Matcher> _matcher; + + // Owned by us. + vector<PlanStage*> _children; + + // The current node we're AND-ing against. + PlanStage* _targetNode; + DiskLoc _targetLoc; + WorkingSetID _targetId; + + // Nodes we're moving forward until they hit the element we're AND-ing. + // Everything in here has not advanced to _targetLoc yet. + queue<PlanStage*> _workingTowardRep; + + // If any child hits EOF or if we have any errors, we're EOF. + bool _isEOF; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index c4601996b18..1f16190e928 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -119,17 +119,24 @@ namespace mongo { return PlanStage::NEED_TIME; } - bool IndexScan::isEOF() { return _indexCursor->isEOF() || _hitEnd; } + bool IndexScan::isEOF() { + if (NULL == _indexCursor.get()) { + // Have to call work() at least once. + return false; + } + + return _indexCursor->isEOF() || _hitEnd; + } void IndexScan::prepareToYield() { - if (isEOF()) { return; } + if (isEOF() || (NULL == _indexCursor.get())) { return; } _savedKey = _indexCursor->getKey().getOwned(); _savedLoc = _indexCursor->getValue(); _indexCursor->savePosition(); } void IndexScan::recoverFromYield() { - if (isEOF()) { return; } + if (isEOF() || (NULL == _indexCursor.get())) { return; } _indexCursor->restorePosition(); diff --git a/src/mongo/db/exec/stagedebug_cmd.cpp b/src/mongo/db/exec/stagedebug_cmd.cpp index b8a876cbba0..9d34aca40f2 100644 --- a/src/mongo/db/exec/stagedebug_cmd.cpp +++ b/src/mongo/db/exec/stagedebug_cmd.cpp @@ -18,6 +18,8 @@ #include "mongo/db/auth/action_type.h" #include "mongo/db/auth/privilege.h" #include "mongo/db/commands.h" +#include "mongo/db/exec/and_hash.h" +#include "mongo/db/exec/and_sorted.h" #include "mongo/db/exec/index_scan.h" #include "mongo/db/exec/simple_plan_runner.h" #include "mongo/db/index/catalog_hack.h" @@ -44,10 +46,14 @@ namespace mongo { * stop: stopObj, endInclusive: true/false, direction: -1/1, * limit: int}}} * + * Internal Nodes: + * + * node -> {andHash: {filter: {filter}, args: { nodes: [node, node]}}} + * node -> {andSorted: {filter: {filter}, args: { nodes: [node, node]}}} + * * Forthcoming Nodes: * * node -> {cscan: {filter: {filter}, args: {name: "collectionname" }}} - * node -> {and: {filter: {filter}, args: { nodes: [node, node]}}} * node -> {or: {filter: {filter}, args: { dedup:bool, nodes:[node, node]}}} * node -> {fetch: {filter: {filter}, args: {node: node}}} * node -> {sort: {filter: {filter}, args: {node: node, pattern: objWithSortCriterion}}} @@ -147,6 +153,57 @@ namespace mongo { return new IndexScan(params, workingSet, matcher.release()); } + else if ("andHash" == nodeName) { + uassert(16921, "Nodes argument must be provided to AND", + nodeArgs["nodes"].isABSONObj()); + + auto_ptr<AndHashStage> andStage(new AndHashStage(workingSet, matcher.release())); + + int nodesAdded = 0; + BSONObjIterator it(nodeArgs["nodes"].Obj()); + while (it.more()) { + BSONElement e = it.next(); + uassert(16922, "node of AND isn't an obj?: " + e.toString(), + e.isABSONObj()); + + PlanStage* subNode = parseQuery(dbname, e.Obj(), workingSet); + uassert(16923, "Can't parse sub-node of AND: " + e.Obj().toString(), + NULL != subNode); + // takes ownership + andStage->addChild(subNode); + ++nodesAdded; + } + + uassert(16927, "AND requires more than one child", nodesAdded >= 2); + + return andStage.release(); + } + else if ("andSorted" == nodeName) { + uassert(16924, "Nodes argument must be provided to AND", + nodeArgs["nodes"].isABSONObj()); + + auto_ptr<AndSortedStage> andStage(new AndSortedStage(workingSet, + matcher.release())); + + int nodesAdded = 0; + BSONObjIterator it(nodeArgs["nodes"].Obj()); + while (it.more()) { + BSONElement e = it.next(); + uassert(16925, "node of AND isn't an obj?: " + e.toString(), + e.isABSONObj()); + + PlanStage* subNode = parseQuery(dbname, e.Obj(), workingSet); + uassert(16926, "Can't parse sub-node of AND: " + e.Obj().toString(), + NULL != subNode); + // takes ownership + andStage->addChild(subNode); + ++nodesAdded; + } + + uassert(16928, "AND requires more than one child", nodesAdded >= 2); + + return andStage.release(); + } else { return NULL; } diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp index 7a4b96e54d4..248d3671a0c 100644 --- a/src/mongo/db/exec/working_set.cpp +++ b/src/mongo/db/exec/working_set.cpp @@ -49,6 +49,16 @@ namespace mongo { _data.erase(it); } + void WorkingSet::flagForReview(const WorkingSetID& i) { + WorkingSetMember* member = get(i); + verify(WorkingSetMember::OWNED_OBJ == member->state); + _flagged.push_back(i); + } + + const vector<WorkingSetID>& WorkingSet::getFlagged() const { + return _flagged; + } + WorkingSetMember::WorkingSetMember() : state(WorkingSetMember::INVALID) { } bool WorkingSetMember::hasLoc() const { diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h index f152871c029..e6705f4342f 100644 --- a/src/mongo/db/exec/working_set.h +++ b/src/mongo/db/exec/working_set.h @@ -54,6 +54,21 @@ namespace mongo { */ void free(const WorkingSetID& i); + /** + * The DiskLoc in WSM 'i' was invalidated while being processed. Any predicates over the + * WSM could not be fully evaluated, so the WSM may or may not satisfy them. As such, if we + * wish to output the WSM, we must do some clean-up work later. Adds the WSM with id 'i' to + * the list of flagged WSIDs. + * + * The WSM must be in the state OWNED_OBJ. + */ + void flagForReview(const WorkingSetID& i); + + /** + * Return a vector of all WSIDs passed to flagForReview. + */ + const vector<WorkingSetID>& getFlagged() const; + private: typedef unordered_map<WorkingSetID, WorkingSetMember*> DataMap; @@ -62,6 +77,9 @@ namespace mongo { // The WorkingSetID returned by the next call to allocate(). Should refer to the next valid // ID. IDs allocated contiguously. Should never point at an in-use ID. WorkingSetID _nextId; + + // All WSIDs invalidated during evaluation of a predicate (AND). + vector<WorkingSetID> _flagged; }; /** |