diff options
-rw-r--r-- | src/mongo/db/exec/and_common-inl.h | 16 | ||||
-rw-r--r-- | src/mongo/db/exec/and_hash.cpp | 116 | ||||
-rw-r--r-- | src/mongo/db/exec/and_hash.h | 11 | ||||
-rw-r--r-- | src/mongo/db/exec/and_sorted.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 2 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_and.cpp | 60 |
8 files changed, 180 insertions, 56 deletions
diff --git a/src/mongo/db/exec/and_common-inl.h b/src/mongo/db/exec/and_common-inl.h index 31f6fe33c87..48aa8313dc1 100644 --- a/src/mongo/db/exec/and_common-inl.h +++ b/src/mongo/db/exec/and_common-inl.h @@ -33,29 +33,29 @@ namespace mongo { /** * If src has any data dest doesn't, add that data to dest. */ - static void mergeFrom(WorkingSetMember* dest, WorkingSetMember* src) { + static void mergeFrom(WorkingSetMember* dest, const WorkingSetMember& src) { verify(dest->hasLoc()); - verify(src->hasLoc()); - verify(dest->loc == src->loc); + verify(src.hasLoc()); + verify(dest->loc == src.loc); // This is N^2 but N is probably pretty small. Easy enough to revisit. // Merge key data. - for (size_t i = 0; i < src->keyData.size(); ++i) { + for (size_t i = 0; i < src.keyData.size(); ++i) { bool found = false; for (size_t j = 0; j < dest->keyData.size(); ++j) { - if (dest->keyData[j].indexKeyPattern == src->keyData[i].indexKeyPattern) { + if (dest->keyData[j].indexKeyPattern == src.keyData[i].indexKeyPattern) { found = true; break; } } - if (!found) { dest->keyData.push_back(src->keyData[i]); } + if (!found) { dest->keyData.push_back(src.keyData[i]); } } // Merge computed data. typedef WorkingSetComputedDataType WSCD; for (WSCD i = WSCD(0); i < WSM_COMPUTED_NUM_TYPES; i = WSCD(i + 1)) { - if (!dest->hasComputed(i) && src->hasComputed(i)) { - dest->addComputed(src->getComputed(i)->clone()); + if (!dest->hasComputed(i) && src.hasComputed(i)) { + dest->addComputed(src.getComputed(i)->clone()); } } } diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp index 1116684e51f..8b96570a3f5 100644 --- a/src/mongo/db/exec/and_hash.cpp +++ b/src/mongo/db/exec/and_hash.cpp @@ -35,8 +35,10 @@ namespace mongo { AndHashStage::AndHashStage(WorkingSet* ws, const MatchExpression* filter) - : _ws(ws), _filter(filter), _resultIterator(_dataMap.end()), - _shouldScanChildren(true), _currentChild(0) {} + : _ws(ws), + _filter(filter), + _hashingChildren(true), + _currentChild(0) {} AndHashStage::~AndHashStage() { for (size_t i = 0; i < _children.size(); ++i) { delete _children[i]; } @@ -45,8 +47,16 @@ namespace mongo { void AndHashStage::addChild(PlanStage* child) { _children.push_back(child); } bool AndHashStage::isEOF() { - if (_shouldScanChildren) { return false; } - return _dataMap.end() == _resultIterator; + // Either we're busy hashing children, in which case we're not done yet. + if (_hashingChildren) { return false; } + + // Or we're streaming in results from the last child. + + // If there's nothing to probe against, we're EOF. + if (_dataMap.empty()) { return true; } + + // Otherwise, we're done when the last child is done. + return _children[_children.size() - 1]->isEOF(); } PlanStage::StageState AndHashStage::work(WorkingSetID* out) { @@ -55,42 +65,74 @@ namespace mongo { if (isEOF()) { return PlanStage::IS_EOF; } // An AND is either reading the first child into the hash table, probing against the hash - // table with subsequent children, or returning results. + // table with subsequent children, or checking the last child's results to see if they're + // in the hash table. // We read the first child into our hash table. - if (_shouldScanChildren && (0 == _currentChild)) { - return readFirstChild(out); - } - - // Probing into our hash table with other children. - if (_shouldScanChildren) { - return hashOtherChildren(out); + if (_hashingChildren) { + if (0 == _currentChild) { + return readFirstChild(out); + } + else if (_currentChild < _children.size() - 1) { + return hashOtherChildren(out); + } + else { + _hashingChildren = false; + // We don't hash our last child. Instead, we probe the table created from the + // previous children, returning results in the order of the last child. + // Fall through to below. + } } - // Returning results. - verify(!_shouldScanChildren); + // Returning results. We read from the last child and return the results that are in our + // hash map. - // Keep the thing we're returning so we can remove it from our internal map later. - DataMap::iterator returnedIt = _resultIterator; - ++_resultIterator; + // We should be EOF if we're not hashing results and the dataMap is empty. + verify(!_dataMap.empty()); - WorkingSetID idToReturn = returnedIt->second; - _dataMap.erase(returnedIt); - WorkingSetMember* member = _ws->get(idToReturn); + // We probe _dataMap with the last child. + verify(_currentChild == _children.size() - 1); - // We should check for matching at the end so the matcher can use information in the - // indices of all our children. - if (Filter::passes(member, _filter)) { - *out = idToReturn; - ++_commonStats.advanced; - return PlanStage::ADVANCED; + // Work the last child. + StageState childStatus = _children[_children.size() - 1]->work(out); + if (PlanStage::ADVANCED != childStatus) { + return childStatus; } - else { - _ws->free(idToReturn); - // Skip over the non-matching thing we currently point at. + + // We know that we've ADVANCED. See if the WSM is in our table. + WorkingSetMember* member = _ws->get(*out); + verify(member->hasLoc()); + + DataMap::iterator it = _dataMap.find(member->loc); + if (_dataMap.end() == it) { + // Child's output wasn't in every previous child. Throw it out. + _ws->free(*out); ++_commonStats.needTime; return PlanStage::NEED_TIME; } + else { + // Child's output was in every previous child. Merge any key data in + // the child's output and free the child's just-outputted WSM. + WorkingSetID hashID = it->second; + _dataMap.erase(it); + + WorkingSetMember* olderMember = _ws->get(hashID); + AndCommon::mergeFrom(olderMember, *member); + _ws->free(*out); + + // We should check for matching at the end so the matcher can use information in the + // indices of all our children. + if (Filter::passes(olderMember, _filter)) { + *out = hashID; + ++_commonStats.advanced; + return PlanStage::ADVANCED; + } + else { + _ws->free(hashID); + ++_commonStats.needTime; + return PlanStage::NEED_TIME; + } + } } PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) { @@ -115,7 +157,7 @@ namespace mongo { // If our first child was empty, don't scan any others, no possible results. if (_dataMap.empty()) { - _shouldScanChildren = false; + _hashingChildren = false; return PlanStage::IS_EOF; } @@ -153,7 +195,7 @@ namespace mongo { // We have a hit. Copy data into the WSM we already have. _seenMap.insert(member->loc); WorkingSetMember* olderMember = _ws->get(_dataMap[member->loc]); - AndCommon::mergeFrom(olderMember, member); + AndCommon::mergeFrom(olderMember, *member); } _ws->free(id); ++_commonStats.needTime; @@ -183,14 +225,13 @@ namespace mongo { // If we have nothing to AND with after finishing any child, stop. if (_dataMap.empty()) { - _shouldScanChildren = false; + _hashingChildren = false; return PlanStage::IS_EOF; } // We've finished scanning all children. Return results with the next call to work(). if (_currentChild == _children.size()) { - _shouldScanChildren = false; - _resultIterator = _dataMap.begin(); + _hashingChildren = false; } ++_commonStats.needTime; @@ -236,18 +277,13 @@ namespace mongo { _seenMap.erase(dl); - // If we're pointing at the DiskLoc, move past it. It will be deleted. - if (_dataMap.end() != _resultIterator && (_resultIterator->first == dl)) { - ++_resultIterator; - } - DataMap::iterator it = _dataMap.find(dl); if (_dataMap.end() != it) { WorkingSetID id = it->second; WorkingSetMember* member = _ws->get(id); verify(member->loc == dl); - if (_shouldScanChildren) { + if (_hashingChildren) { ++_specificStats.flaggedInProgress; } else { diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h index 48a1667c1f8..116db71c928 100644 --- a/src/mongo/db/exec/and_hash.h +++ b/src/mongo/db/exec/and_hash.h @@ -79,19 +79,18 @@ namespace mongo { // The stages we read from. Owned by us. vector<PlanStage*> _children; - // _dataMap is filled out by the first child and probed by subsequent children. + // _dataMap is filled out by the first child and probed by subsequent children. This is the + // hash table that we create by intersecting _children and probe with the last child. typedef unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher> DataMap; DataMap _dataMap; // Keeps track of what elements from _dataMap subsequent children have seen. + // Only used while _hashingChildren. typedef unordered_set<DiskLoc, DiskLoc::Hasher> SeenMap; SeenMap _seenMap; - // Iterator over the members of _dataMap that survive. - DataMap::iterator _resultIterator; - - // True if we're still scanning _children for results. - bool _shouldScanChildren; + // True if we're still intersecting _children[0..._children.size()-1]. + bool _hashingChildren; // Which child are we currently working on? size_t _currentChild; diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp index 0f884088845..cd3e4472345 100644 --- a/src/mongo/db/exec/and_sorted.cpp +++ b/src/mongo/db/exec/and_sorted.cpp @@ -135,7 +135,7 @@ namespace mongo { // The front element has hit _targetLoc. Don't move it forward anymore/work on // another element. _workingTowardRep.pop(); - AndCommon::mergeFrom(_ws->get(_targetId), member); + AndCommon::mergeFrom(_ws->get(_targetId), *member); _ws->free(id); if (0 == _workingTowardRep.size()) { diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 41d34742859..16245186bae 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -603,6 +603,17 @@ namespace mongo { AndHashNode* ahn = new AndHashNode(); ahn->children.swap(ixscanNodes); andResult = ahn; + // The AndHashNode provides the sort order of its last child. If any of the + // possible subnodes of AndHashNode provides the sort order we care about, we put + // that one last. + for (size_t i = 0; i < ahn->children.size(); ++i) { + ahn->children[i]->computeProperties(); + const BSONObjSet& sorts = ahn->children[i]->getSort(); + if (sorts.end() != sorts.find(query.getParsed().getSort())) { + std::swap(ahn->children[i], ahn->children.back()); + break; + } + } } } diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index fd5e9de5a55..d1c380dcaa5 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -1891,6 +1891,24 @@ namespace { "{ixscan: {filter: null, pattern: {'a.c':1}}}]}}}}"); } + TEST_F(QueryPlannerTest, IntersectSortFromAndHash) { + params.options = QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::INDEX_INTERSECTION; + addIndex(BSON("a" << 1)); + addIndex(BSON("b" << 1)); + runQuerySortProj(fromjson("{a: 1, b:{$gt: 1}}"), fromjson("{b:1}"), BSONObj()); + + // This provides the sort. + assertSolutionExists("{fetch: {filter: null, node: {andHash: {nodes: [" + "{ixscan: {filter: null, pattern: {a:1}}}," + "{ixscan: {filter: null, pattern: {b:1}}}]}}}}"); + + // Rearrange the preds, shouldn't matter. + runQuerySortProj(fromjson("{b: 1, a:{$lt: 7}}"), fromjson("{b:1}"), BSONObj()); + assertSolutionExists("{fetch: {filter: null, node: {andHash: {nodes: [" + "{ixscan: {filter: null, pattern: {a:1}}}," + "{ixscan: {filter: null, pattern: {b:1}}}]}}}}"); + } + // // Test bad input to query planner helpers. // diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 8a30cd169b4..846b5d8e990 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -247,7 +247,7 @@ namespace mongo { bool fetched() const; bool hasField(const string& field) const; bool sortedByDiskLoc() const { return false; } - const BSONObjSet& getSort() const { return _sort; } + const BSONObjSet& getSort() const { return children.back()->getSort(); } BSONObjSet _sort; }; diff --git a/src/mongo/dbtests/query_stage_and.cpp b/src/mongo/dbtests/query_stage_and.cpp index 4024a1c63aa..9889d1435e7 100644 --- a/src/mongo/dbtests/query_stage_and.cpp +++ b/src/mongo/dbtests/query_stage_and.cpp @@ -710,6 +710,65 @@ namespace QueryStageAnd { } }; + // Verify that AND preserves the order of the last child. + class QueryStageAndSortedByLastChild : public QueryStageAndBase { + public: + void run() { + Client::WriteContext ctx(ns()); + Database* db = ctx.ctx().db(); + Collection* coll = db->getCollection(ns()); + if (!coll) { + coll = db->createCollection(ns()); + } + + for (int i = 0; i < 50; ++i) { + insert(BSON("foo" << 1 << "bar" << i)); + } + + addIndex(BSON("foo" << 1)); + addIndex(BSON("bar" << 1)); + + WorkingSet ws; + scoped_ptr<AndHashStage> ah(new AndHashStage(&ws, NULL)); + + // Scan over foo == 1 + IndexScanParams params; + params.descriptor = getIndex(BSON("foo" << 1), coll); + params.bounds.isSimpleRange = true; + params.bounds.startKey = BSON("" << 1); + params.bounds.endKey = BSON("" << 1); + params.bounds.endKeyInclusive = true; + params.direction = 1; + ah->addChild(new IndexScan(params, &ws, NULL)); + + // Intersect with 7 <= bar < 10000 + params.descriptor = getIndex(BSON("bar" << 1), coll); + params.bounds.startKey = BSON("" << 7); + params.bounds.endKey = BSON("" << 10000); + ah->addChild(new IndexScan(params, &ws, NULL)); + + WorkingSetID lastId = WorkingSet::INVALID_ID; + + int count = 0; + while (!ah->isEOF()) { + WorkingSetID id; + PlanStage::StageState status = ah->work(&id); + if (PlanStage::ADVANCED != status) { continue; } + BSONObj thisObj = ws.get(id)->loc.obj(); + ASSERT_EQUALS(7 + count, thisObj["bar"].numberInt()); + ++count; + if (WorkingSet::INVALID_ID != lastId) { + BSONObj lastObj = ws.get(lastId)->loc.obj(); + ASSERT_LESS_THAN(lastObj["bar"].woCompare(thisObj["bar"]), 0); + } + lastId = id; + } + + ASSERT_EQUALS(count, 43); + } + }; + + class All : public Suite { public: All() : Suite( "query_stage_and" ) { } @@ -725,6 +784,7 @@ namespace QueryStageAnd { add<QueryStageAndSortedWithNothing>(); add<QueryStageAndSortedProducesNothing>(); add<QueryStageAndSortedWithMatcher>(); + add<QueryStageAndSortedByLastChild>(); } } queryStageAndAll; |