8 files changed, 180 insertions, 56 deletions
diff --git a/src/mongo/db/exec/and_common-inl.h b/src/mongo/db/exec/and_common-inl.h
index 31f6fe33c87..48aa8313dc1 100644
--- a/src/mongo/db/exec/and_common-inl.h
+++ b/src/mongo/db/exec/and_common-inl.h
@@ -33,29 +33,29 @@ namespace mongo {
         /**
          * If src has any data dest doesn't, add that data to dest.
          */
-        static void mergeFrom(WorkingSetMember* dest, WorkingSetMember* src) {
+        static void mergeFrom(WorkingSetMember* dest, const WorkingSetMember& src) {
             verify(dest->hasLoc());
-            verify(src->hasLoc());
-            verify(dest->loc == src->loc);
+            verify(src.hasLoc());
+            verify(dest->loc == src.loc);
 
             // This is N^2 but N is probably pretty small.  Easy enough to revisit.
             // Merge key data.
-            for (size_t i = 0; i < src->keyData.size(); ++i) {
+            for (size_t i = 0; i < src.keyData.size(); ++i) {
                 bool found = false;
                 for (size_t j = 0; j < dest->keyData.size(); ++j) {
-                    if (dest->keyData[j].indexKeyPattern == src->keyData[i].indexKeyPattern) {
+                    if (dest->keyData[j].indexKeyPattern == src.keyData[i].indexKeyPattern) {
                         found = true;
                         break;
                     }
                 }
-                if (!found) { dest->keyData.push_back(src->keyData[i]); }
+                if (!found) { dest->keyData.push_back(src.keyData[i]); }
             }
 
             // Merge computed data.
             typedef WorkingSetComputedDataType WSCD;
             for (WSCD i = WSCD(0); i < WSM_COMPUTED_NUM_TYPES; i = WSCD(i + 1)) {
-                if (!dest->hasComputed(i) && src->hasComputed(i)) {
-                    dest->addComputed(src->getComputed(i)->clone());
+                if (!dest->hasComputed(i) && src.hasComputed(i)) {
+                    dest->addComputed(src.getComputed(i)->clone());
                 }
             }
         }
diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp
index 1116684e51f..8b96570a3f5 100644
--- a/src/mongo/db/exec/and_hash.cpp
+++ b/src/mongo/db/exec/and_hash.cpp
@@ -35,8 +35,10 @@
 namespace mongo {
 
     AndHashStage::AndHashStage(WorkingSet* ws, const MatchExpression* filter)
-        : _ws(ws), _filter(filter), _resultIterator(_dataMap.end()),
-          _shouldScanChildren(true), _currentChild(0) {}
+        : _ws(ws),
+          _filter(filter),
+          _hashingChildren(true),
+          _currentChild(0) {}
 
     AndHashStage::~AndHashStage() {
         for (size_t i = 0; i < _children.size(); ++i) { delete _children[i]; }
@@ -45,8 +47,16 @@ namespace mongo {
     void AndHashStage::addChild(PlanStage* child) { _children.push_back(child); }
 
     bool AndHashStage::isEOF() {
-        if (_shouldScanChildren) { return false; }
-        return _dataMap.end() == _resultIterator;
+        // Either we're busy hashing children, in which case we're not done yet.
+        if (_hashingChildren) { return false; }
+
+        // Or we're streaming in results from the last child.
+
+        // If there's nothing to probe against, we're EOF.
+        if (_dataMap.empty()) { return true; }
+
+        // Otherwise, we're done when the last child is done.
+        return _children[_children.size() - 1]->isEOF();
     }
 
     PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
@@ -55,42 +65,74 @@ namespace mongo {
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         // An AND is either reading the first child into the hash table, probing against the hash
-        // table with subsequent children, or returning results.
+        // table with subsequent children, or checking the last child's results to see if they're
+        // in the hash table.
 
         // We read the first child into our hash table.
-        if (_shouldScanChildren && (0 == _currentChild)) {
-            return readFirstChild(out);
-        }
-
-        // Probing into our hash table with other children.
-        if (_shouldScanChildren) {
-            return hashOtherChildren(out);
+        if (_hashingChildren) {
+            if (0 == _currentChild) {
+                return readFirstChild(out);
+            }
+            else if (_currentChild < _children.size() - 1) {
+                return hashOtherChildren(out);
+            }
+            else {
+                _hashingChildren = false;
+                // We don't hash our last child.  Instead, we probe the table created from the
+                // previous children, returning results in the order of the last child.
+                // Fall through to below.
+            }
         }
 
-        // Returning results.
-        verify(!_shouldScanChildren);
+        // Returning results.  We read from the last child and return the results that are in our
+        // hash map.
 
-        // Keep the thing we're returning so we can remove it from our internal map later.
-        DataMap::iterator returnedIt = _resultIterator;
-        ++_resultIterator;
+        // We should be EOF if we're not hashing results and the dataMap is empty.
+        verify(!_dataMap.empty());
 
-        WorkingSetID idToReturn = returnedIt->second;
-        _dataMap.erase(returnedIt);
-        WorkingSetMember* member = _ws->get(idToReturn);
+        // We probe _dataMap with the last child.
+        verify(_currentChild == _children.size() - 1);
 
-        // We should check for matching at the end so the matcher can use information in the
-        // indices of all our children.
-        if (Filter::passes(member, _filter)) {
-            *out = idToReturn;
-            ++_commonStats.advanced;
-            return PlanStage::ADVANCED;
+        // Work the last child.
+        StageState childStatus = _children[_children.size() - 1]->work(out);
+        if (PlanStage::ADVANCED != childStatus) {
+            return childStatus;
         }
-        else {
-            _ws->free(idToReturn);
-            // Skip over the non-matching thing we currently point at.
+
+        // We know that we've ADVANCED.  See if the WSM is in our table.
+        WorkingSetMember* member = _ws->get(*out);
+        verify(member->hasLoc());
+
+        DataMap::iterator it = _dataMap.find(member->loc);
+        if (_dataMap.end() == it) {
+            // Child's output wasn't in every previous child.  Throw it out.
+            _ws->free(*out);
             ++_commonStats.needTime;
             return PlanStage::NEED_TIME;
         }
+        else {
+            // Child's output was in every previous child.  Merge any key data in
+            // the child's output and free the child's just-outputted WSM.
+            WorkingSetID hashID = it->second;
+            _dataMap.erase(it);
+
+            WorkingSetMember* olderMember = _ws->get(hashID);
+            AndCommon::mergeFrom(olderMember, *member);
+            _ws->free(*out);
+
+            // We should check for matching at the end so the matcher can use information in the
+            // indices of all our children.
+            if (Filter::passes(olderMember, _filter)) {
+                *out = hashID;
+                ++_commonStats.advanced;
+                return PlanStage::ADVANCED;
+            }
+            else {
+                _ws->free(hashID);
+                ++_commonStats.needTime;
+                return PlanStage::NEED_TIME;
+            }
+        }
     }
 
     PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) {
@@ -115,7 +157,7 @@ namespace mongo {
 
             // If our first child was empty, don't scan any others, no possible results.
             if (_dataMap.empty()) {
-                _shouldScanChildren = false;
+                _hashingChildren = false;
                 return PlanStage::IS_EOF;
             }
 
@@ -153,7 +195,7 @@ namespace mongo {
                 // We have a hit.  Copy data into the WSM we already have.
                 _seenMap.insert(member->loc);
                 WorkingSetMember* olderMember = _ws->get(_dataMap[member->loc]);
-                AndCommon::mergeFrom(olderMember, member);
+                AndCommon::mergeFrom(olderMember, *member);
             }
             _ws->free(id);
             ++_commonStats.needTime;
@@ -183,14 +225,13 @@ namespace mongo {
 
             // If we have nothing to AND with after finishing any child, stop.
             if (_dataMap.empty()) {
-                _shouldScanChildren = false;
+                _hashingChildren = false;
                 return PlanStage::IS_EOF;
             }
 
             // We've finished scanning all children.  Return results with the next call to work().
             if (_currentChild == _children.size()) {
-                _shouldScanChildren = false;
-                _resultIterator = _dataMap.begin();
+                _hashingChildren = false;
             }
 
             ++_commonStats.needTime;
@@ -236,18 +277,13 @@ namespace mongo {
 
         _seenMap.erase(dl);
 
-        // If we're pointing at the DiskLoc, move past it.  It will be deleted.
-        if (_dataMap.end() != _resultIterator && (_resultIterator->first == dl)) {
-            ++_resultIterator;
-        }
-
         DataMap::iterator it = _dataMap.find(dl);
         if (_dataMap.end() != it) {
             WorkingSetID id = it->second;
             WorkingSetMember* member = _ws->get(id);
             verify(member->loc == dl);
 
-            if (_shouldScanChildren) {
+            if (_hashingChildren) {
                 ++_specificStats.flaggedInProgress;
             }
             else {
diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h
index 48a1667c1f8..116db71c928 100644
--- a/src/mongo/db/exec/and_hash.h
+++ b/src/mongo/db/exec/and_hash.h
@@ -79,19 +79,18 @@ namespace mongo {
         // The stages we read from.  Owned by us.
         vector<PlanStage*> _children;
 
-        // _dataMap is filled out by the first child and probed by subsequent children.
+        // _dataMap is filled out by the first child and probed by subsequent children.  This is the
+        // hash table that we create by intersecting _children and probe with the last child.
         typedef unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher> DataMap;
         DataMap _dataMap;
 
         // Keeps track of what elements from _dataMap subsequent children have seen.
+        // Only used while _hashingChildren.
         typedef unordered_set<DiskLoc, DiskLoc::Hasher> SeenMap;
         SeenMap _seenMap;
 
-        // Iterator over the members of _dataMap that survive.
-        DataMap::iterator _resultIterator;
-
-        // True if we're still scanning _children for results.
-        bool _shouldScanChildren;
+        // True if we're still intersecting _children[0..._children.size()-1].
+        bool _hashingChildren;
 
         // Which child are we currently working on?
         size_t _currentChild;
diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp
index 0f884088845..cd3e4472345 100644
--- a/src/mongo/db/exec/and_sorted.cpp
+++ b/src/mongo/db/exec/and_sorted.cpp
@@ -135,7 +135,7 @@ namespace mongo {
                 // The front element has hit _targetLoc.  Don't move it forward anymore/work on
                 // another element.
                 _workingTowardRep.pop();
-                AndCommon::mergeFrom(_ws->get(_targetId), member);
+                AndCommon::mergeFrom(_ws->get(_targetId), *member);
                 _ws->free(id);
 
                 if (0 == _workingTowardRep.size()) {
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 41d34742859..16245186bae 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -603,6 +603,17 @@ namespace mongo {
                 AndHashNode* ahn = new AndHashNode();
                 ahn->children.swap(ixscanNodes);
                 andResult = ahn;
+                // The AndHashNode provides the sort order of its last child.  If any of the
+                // possible subnodes of AndHashNode provides the sort order we care about, we put
+                // that one last.
+                for (size_t i = 0; i < ahn->children.size(); ++i) {
+                    ahn->children[i]->computeProperties();
+                    const BSONObjSet& sorts = ahn->children[i]->getSort();
+                    if (sorts.end() != sorts.find(query.getParsed().getSort())) {
+                        std::swap(ahn->children[i], ahn->children.back());
+                        break;
+                    }
+                }
             }
         }
 
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index fd5e9de5a55..d1c380dcaa5 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -1891,6 +1891,24 @@ namespace {
                                          "{ixscan: {filter: null, pattern: {'a.c':1}}}]}}}}");
     }
 
+    TEST_F(QueryPlannerTest, IntersectSortFromAndHash) {
+        params.options = QueryPlannerParams::NO_TABLE_SCAN | QueryPlannerParams::INDEX_INTERSECTION;
+        addIndex(BSON("a" << 1));
+        addIndex(BSON("b" << 1));
+        runQuerySortProj(fromjson("{a: 1, b:{$gt: 1}}"), fromjson("{b:1}"), BSONObj());
+
+        // This provides the sort.
+        assertSolutionExists("{fetch: {filter: null, node: {andHash: {nodes: ["
+                                    "{ixscan: {filter: null, pattern: {a:1}}},"
+                                    "{ixscan: {filter: null, pattern: {b:1}}}]}}}}");
+
+        // Rearrange the preds, shouldn't matter.
+        runQuerySortProj(fromjson("{b: 1, a:{$lt: 7}}"), fromjson("{b:1}"), BSONObj());
+        assertSolutionExists("{fetch: {filter: null, node: {andHash: {nodes: ["
+                                    "{ixscan: {filter: null, pattern: {a:1}}},"
+                                    "{ixscan: {filter: null, pattern: {b:1}}}]}}}}");
+    }
+
     //
     // Test bad input to query planner helpers.
     //
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 8a30cd169b4..846b5d8e990 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -247,7 +247,7 @@ namespace mongo {
         bool fetched() const;
         bool hasField(const string& field) const;
         bool sortedByDiskLoc() const { return false; }
-        const BSONObjSet& getSort() const { return _sort; }
+        const BSONObjSet& getSort() const { return children.back()->getSort(); }
 
         BSONObjSet _sort;
     };
diff --git a/src/mongo/dbtests/query_stage_and.cpp b/src/mongo/dbtests/query_stage_and.cpp
index 4024a1c63aa..9889d1435e7 100644
--- a/src/mongo/dbtests/query_stage_and.cpp
+++ b/src/mongo/dbtests/query_stage_and.cpp
@@ -710,6 +710,65 @@ namespace QueryStageAnd {
         }
     };
 
+    // Verify that AND preserves the order of the last child.
+    class QueryStageAndSortedByLastChild : public QueryStageAndBase {
+    public:
+        void run() {
+            Client::WriteContext ctx(ns());
+            Database* db = ctx.ctx().db();
+            Collection* coll = db->getCollection(ns());
+            if (!coll) {
+                coll = db->createCollection(ns());
+            }
+
+            for (int i = 0; i < 50; ++i) {
+                insert(BSON("foo" << 1 << "bar" << i));
+            }
+
+            addIndex(BSON("foo" << 1));
+            addIndex(BSON("bar" << 1));
+
+            WorkingSet ws;
+            scoped_ptr<AndHashStage> ah(new AndHashStage(&ws, NULL));
+
+            // Scan over foo == 1
+            IndexScanParams params;
+            params.descriptor = getIndex(BSON("foo" << 1), coll);
+            params.bounds.isSimpleRange = true;
+            params.bounds.startKey = BSON("" << 1);
+            params.bounds.endKey = BSON("" << 1);
+            params.bounds.endKeyInclusive = true;
+            params.direction = 1;
+            ah->addChild(new IndexScan(params, &ws, NULL));
+
+            // Intersect with 7 <= bar < 10000
+            params.descriptor = getIndex(BSON("bar" << 1), coll);
+            params.bounds.startKey = BSON("" << 7);
+            params.bounds.endKey = BSON("" << 10000);
+            ah->addChild(new IndexScan(params, &ws, NULL));
+
+            WorkingSetID lastId = WorkingSet::INVALID_ID;
+
+            int count = 0;
+            while (!ah->isEOF()) {
+                WorkingSetID id;
+                PlanStage::StageState status = ah->work(&id);
+                if (PlanStage::ADVANCED != status) { continue; }
+                BSONObj thisObj = ws.get(id)->loc.obj();
+                ASSERT_EQUALS(7 + count, thisObj["bar"].numberInt());
+                ++count;
+                if (WorkingSet::INVALID_ID != lastId) {
+                    BSONObj lastObj = ws.get(lastId)->loc.obj();
+                    ASSERT_LESS_THAN(lastObj["bar"].woCompare(thisObj["bar"]), 0);
+                }
+                lastId = id;
+            }
+
+            ASSERT_EQUALS(count, 43);
+        }
+    };
+
+
     class All : public Suite {
     public:
         All() : Suite( "query_stage_and" ) { }
@@ -725,6 +784,7 @@ namespace QueryStageAnd {
             add<QueryStageAndSortedWithNothing>();
             add<QueryStageAndSortedProducesNothing>();
             add<QueryStageAndSortedWithMatcher>();
+            add<QueryStageAndSortedByLastChild>();
         }
     }  queryStageAndAll;