2 files changed, 125 insertions, 134 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index ffde08be60e..3e6532126ac 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -56,12 +56,10 @@ namespace mongo {
                          WorkingSet* workingSet,
                          const MatchExpression* filter)
         : _txn(txn),
-          _checkEndKeys(0),
           _workingSet(workingSet),
-          _hitEnd(false),
-          _filter(filter), 
+          _scanState(INITIALIZING),
+          _filter(filter),
           _shouldDedup(true),
-          _yieldMovedCursor(false),
           _params(params),
           _btreeCursor(NULL),
           _commonStats(kStageType) {
@@ -71,6 +69,10 @@ namespace mongo {
     }
 
     void IndexScan::initIndexScan() {
+        // This function transitions from the initializing state to CHECKING_END. If
+        // the initialization fails, however, then the state transitions to HIT_END.
+        invariant(INITIALIZING == _scanState);
+
         // Perform the possibly heavy-duty initialization of the underlying index cursor.
         if (_params.doNotDedup) {
             _shouldDedup = false;
@@ -104,7 +106,7 @@ namespace mongo {
             Status status = _indexCursor->seek(_params.bounds.startKey);
             if (!status.isOK()) {
                 warning() << "IndexCursor seek failed: " << status.toString();
-                _hitEnd = true;
+                _scanState = HIT_END;
             }
             if (!isEOF()) {
                 _specificStats.keysExamined = 1;
@@ -128,9 +130,17 @@ namespace mongo {
                 _keyEltsInc.resize(nFields);
             }
             else {
-                _hitEnd = true;
+                _scanState = HIT_END;
             }
         }
+
+        // This method may throw an execption while it's doing intialization. If we've gotten
+        // here, then we've done all the initialization without an exception being thrown. This
+        // means it is safe to transition to the CHECKING_END state. In error cases, we transition
+        // to HIT_END, so we should not change state again here.
+        if (HIT_END != _scanState) {
+            _scanState = CHECKING_END;
+        }
     }
 
     PlanStage::StageState IndexScan::work(WorkingSetID* out) {
@@ -139,79 +149,73 @@ namespace mongo {
         // Adds the amount of time taken by work() to executionTimeMillis.
         ScopedTimer timer(&_commonStats.executionTimeMillis);
 
-        if (NULL == _indexCursor.get()) {
-            // First call to work().  Perform possibly heavy init.
+        if (INITIALIZING == _scanState) {
+            invariant(NULL == _indexCursor.get());
             initIndexScan();
-            checkEnd();
-        }
-        else if (_yieldMovedCursor) {
-            _yieldMovedCursor = false;
-            // Note that we're not calling next() here.  We got the next thing when we recovered
-            // from yielding.
         }
 
-        if (isEOF()) { return PlanStage::IS_EOF; }
-
-        // If we examined multiple keys in a prior work cycle, make up for it here by returning
-        // NEED_TIME. This is done for plan ranking. Refer to the comment for '_checkEndKeys'
-        // in the .h for details.
-        if (_checkEndKeys > 0) {
-            --_checkEndKeys;
-            ++_commonStats.needTime;
-            return PlanStage::NEED_TIME;
+        if (CHECKING_END == _scanState) {
+            checkEnd();
         }
 
-        // Grab the next (key, value) from the index.
-        BSONObj keyObj = _indexCursor->getKey();
-        DiskLoc loc = _indexCursor->getValue();
-
-        bool filterPasses = Filter::passes(keyObj, _keyPattern, _filter);
-        if ( filterPasses ) {
-            // We must make a copy of the on-disk data since it can mutate during the execution of
-            // this query.
-            keyObj = keyObj.getOwned();
+        if (isEOF()) {
+            _commonStats.isEOF = true;
+            return PlanStage::IS_EOF;
         }
 
-        // Move to the next result.
-        // The underlying IndexCursor points at the *next* thing we want to return.  We do this so
-        // that if we're scanning an index looking for docs to delete we don't continually clobber
-        // the thing we're pointing at.
-        _indexCursor->next();
-        checkEnd();
-
-        if (_shouldDedup) {
-            ++_specificStats.dupsTested;
-            if (_returned.end() != _returned.find(loc)) {
-                ++_specificStats.dupsDropped;
-                ++_commonStats.needTime;
-                return PlanStage::NEED_TIME;
-            }
-            else {
-                _returned.insert(loc);
-            }
-        }
+        if (GETTING_NEXT == _scanState) {
+            // Grab the next (key, value) from the index.
+            BSONObj keyObj = _indexCursor->getKey();
+            DiskLoc loc = _indexCursor->getValue();
 
-        if (filterPasses) {
-            if (NULL != _filter) {
-                ++_specificStats.matchTested;
+            bool filterPasses = Filter::passes(keyObj, _keyPattern, _filter);
+            if ( filterPasses ) {
+                // We must make a copy of the on-disk data since it can mutate during the execution
+                // of this query.
+                keyObj = keyObj.getOwned();
             }
 
-            // Fill out the WSM.
-            WorkingSetID id = _workingSet->allocate();
-            WorkingSetMember* member = _workingSet->get(id);
-            member->loc = loc;
-            member->keyData.push_back(IndexKeyDatum(_keyPattern, keyObj));
-            member->state = WorkingSetMember::LOC_AND_IDX;
-
-            if (_params.addKeyMetadata) {
-                BSONObjBuilder bob;
-                bob.appendKeys(_keyPattern, keyObj);
-                member->addComputed(new IndexKeyComputedData(bob.obj()));
+            // Move to the next result.
+            // The underlying IndexCursor points at the *next* thing we want to return.  We do this
+            // so that if we're scanning an index looking for docs to delete we don't continually
+            // clobber the thing we're pointing at.
+            _indexCursor->next();
+            _scanState = CHECKING_END;
+
+            if (_shouldDedup) {
+                ++_specificStats.dupsTested;
+                if (_returned.end() != _returned.find(loc)) {
+                    ++_specificStats.dupsDropped;
+                    ++_commonStats.needTime;
+                    return PlanStage::NEED_TIME;
+                }
+                else {
+                    _returned.insert(loc);
+                }
             }
 
-            *out = id;
-            ++_commonStats.advanced;
-            return PlanStage::ADVANCED;
+            if (filterPasses) {
+                if (NULL != _filter) {
+                    ++_specificStats.matchTested;
+                }
+
+                // Fill out the WSM.
+                WorkingSetID id = _workingSet->allocate();
+                WorkingSetMember* member = _workingSet->get(id);
+                member->loc = loc;
+                member->keyData.push_back(IndexKeyDatum(_keyPattern, keyObj));
+                member->state = WorkingSetMember::LOC_AND_IDX;
+
+                if (_params.addKeyMetadata) {
+                    BSONObjBuilder bob;
+                    bob.appendKeys(_keyPattern, keyObj);
+                    member->addComputed(new IndexKeyComputedData(bob.obj()));
+                }
+
+                *out = id;
+                ++_commonStats.advanced;
+                return PlanStage::ADVANCED;
+            }
         }
 
         ++_commonStats.needTime;
@@ -219,7 +223,7 @@ namespace mongo {
     }
 
     bool IndexScan::isEOF() {
-        if (NULL == _indexCursor.get()) {
+        if (INITIALIZING == _scanState) {
             // Have to call work() at least once.
             return false;
         }
@@ -231,17 +235,13 @@ namespace mongo {
             }
         }
 
-        if (_checkEndKeys != 0) {
-            return false;
-        }
-
-        return _hitEnd || _indexCursor->isEOF();
+        return HIT_END == _scanState || _indexCursor->isEOF();
     }
 
     void IndexScan::saveState() {
         ++_commonStats.yields;
 
-        if (_hitEnd || (NULL == _indexCursor.get())) { return; }
+        if (HIT_END == _scanState || INITIALIZING == _scanState) { return; }
         if (!_indexCursor->isEOF()) {
             _savedKey = _indexCursor->getKey().getOwned();
             _savedLoc = _indexCursor->getValue();
@@ -253,12 +253,12 @@ namespace mongo {
         _txn = opCtx;
         ++_commonStats.unyields;
 
-        if (_hitEnd || (NULL == _indexCursor.get())) { return; }
+        if (HIT_END == _scanState || INITIALIZING == _scanState) { return; }
 
         // We can have a valid position before we check isEOF(), restore the position, and then be
         // EOF upon restore.
         if (!_indexCursor->restorePosition( opCtx ).isOK() || _indexCursor->isEOF()) {
-            _hitEnd = true;
+            _scanState = HIT_END;
             return;
         }
 
@@ -266,12 +266,10 @@ namespace mongo {
             || _savedLoc != _indexCursor->getValue()) {
             // Our restored position isn't the same as the saved position.  When we call work()
             // again we want to return where we currently point, not past it.
-            _yieldMovedCursor = true;
-
             ++_specificStats.yieldMovedCursor;
 
             // Our restored position might be past endKey, see if we've hit the end.
-            checkEnd();
+            _scanState = CHECKING_END;
         }
     }
 
@@ -300,6 +298,8 @@ namespace mongo {
         }
 
         if (_params.bounds.isSimpleRange) {
+            _scanState = GETTING_NEXT;
+
             // "Normal" start -> end scanning.
             verify(NULL == _btreeCursor);
             verify(NULL == _checker.get());
@@ -311,12 +311,9 @@ namespace mongo {
 
             if ((cmp != 0 && cmp != _params.direction)
                 || (cmp == 0 && !_params.bounds.endKeyInclusive)) {
-
-                _hitEnd = true;
-                _commonStats.isEOF = true;
+                _scanState = HIT_END;
             }
-
-            if (!isEOF() && _params.bounds.isSimpleRange) {
+            else {
                 ++_specificStats.keysExamined;
             }
         }
@@ -324,41 +321,34 @@ namespace mongo {
             verify(NULL != _btreeCursor);
             verify(NULL != _checker.get());
 
-            // Use _checker to see how things are.
-            for (;;) {
-                //cout << "current index key is " << _indexCursor->getKey().toString() << endl;
-                //cout << "keysExamined is " << _specificStats.keysExamined << endl;
-                IndexBoundsChecker::KeyState keyState;
-                keyState = _checker->checkKey(_indexCursor->getKey(),
-                                              &_keyEltsToUse,
-                                              &_movePastKeyElts,
-                                              &_keyElts,
-                                              &_keyEltsInc);
-
-                if (IndexBoundsChecker::DONE == keyState) {
-                    _hitEnd = true;
-                    break;
-                }
+            IndexBoundsChecker::KeyState keyState;
+            keyState = _checker->checkKey(_indexCursor->getKey(),
+                                          &_keyEltsToUse,
+                                          &_movePastKeyElts,
+                                          &_keyElts,
+                                          &_keyEltsInc);
 
-                // This seems weird but it's the old definition of nscanned.
-                ++_specificStats.keysExamined;
+            if (IndexBoundsChecker::DONE == keyState) {
+                _scanState = HIT_END;
+                return;
+            }
 
-                if (IndexBoundsChecker::VALID == keyState) {
-                    break;
-                }
+            // This seems weird but it's the old definition of nscanned.
+            ++_specificStats.keysExamined;
 
-                //cout << "skipping...\n";
-                verify(IndexBoundsChecker::MUST_ADVANCE == keyState);
-                _btreeCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts,
-                                   _keyElts, _keyEltsInc);
+            if (IndexBoundsChecker::VALID == keyState) {
+                _scanState = GETTING_NEXT;
+                return;
+            }
 
-                // Must check underlying cursor EOF after every cursor movement.
-                if (_btreeCursor->isEOF()) {
-                    _hitEnd = true;
-                    break;
-                }
+            verify(IndexBoundsChecker::MUST_ADVANCE == keyState);
+            _btreeCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts,
+                               _keyElts, _keyEltsInc);
 
-                ++_checkEndKeys;
+            // Must check underlying cursor EOF after every cursor movement.
+            if (_btreeCursor->isEOF()) {
+                _scanState = HIT_END;
+                return;
             }
         }
     }
diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h
index 012566c403e..4121010d3f4 100644
--- a/src/mongo/db/exec/index_scan.h
+++ b/src/mongo/db/exec/index_scan.h
@@ -80,6 +80,25 @@ namespace mongo {
      */
     class IndexScan : public PlanStage {
     public:
+
+        /**
+         * Keeps track of what this index scan is currently doing so that it
+         * can do the right thing on the next call to work().
+         */
+        enum ScanState {
+            // Need to initialize the underlying index traversal machinery.
+            INITIALIZING,
+
+            // Skipping keys in order to check whether we have reached the end.
+            CHECKING_END,
+
+            // Retrieving the next key, and applying the filter if necessary.
+            GETTING_NEXT,
+
+            // The index scan is finished.
+            HIT_END
+        };
+
         IndexScan(OperationContext* txn,
                   const IndexScanParams& params,
                   WorkingSet* workingSet,
@@ -117,21 +136,6 @@ namespace mongo {
         // transactional context for read locks. Not owned by us
         OperationContext* _txn;
 
-        // The number of keys examined during a call to checkEnd() that have not yet been
-        // accounted for by returning a NEED_TIME.
-        //
-        // Good plan ranking requires that the index scan uses one work cycle per index key
-        // examined. Since checkEnd() may examine multiple keys, we keep track of them here
-        // and make up for it later by returning NEED_TIME.
-        //
-        // Example of how this is useful for plan ranking:
-        //   Say you have indices {a: 1, b: 1} and {a: 1, x: 1, b: 1}, with predicates over
-        //   fields 'a' and 'b'. It's cheaper to use index {a: 1, b: 1}. Why? Because for
-        //   index {a: 1, x: 1, b: 1} you have to skip lots of keys due to the interceding
-        //   'x' field. This skipping is done inside checkEnd(), and we use '_checkEndKeys'
-        //   to account for it.
-        size_t _checkEndKeys;
-
         // The WorkingSet we annotate with results.  Not owned by us.
         WorkingSet* _workingSet;
 
@@ -140,8 +144,8 @@ namespace mongo {
         scoped_ptr<IndexCursor> _indexCursor;
         BSONObj _keyPattern;
 
-        // Have we hit the end of the index scan?
-        bool _hitEnd;
+        // Keeps track of what work we need to do next.
+        ScanState _scanState;
 
         // Contains expressions only over fields in the index key.  We assume this is built
         // correctly by whomever creates this class.
@@ -156,9 +160,6 @@ namespace mongo {
         BSONObj _savedKey;
         DiskLoc _savedLoc;
 
-        // True if there was a yield and the yield changed the cursor position.
-        bool _yieldMovedCursor;
-
         IndexScanParams _params;
 
         // For our "fast" Btree-only navigation AKA the index bounds optimization.