diff options
author | Mathias Stearn <mathias@10gen.com> | 2015-03-25 12:25:04 -0400 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2015-04-09 11:35:56 -0400 |
commit | db59e0f31b12966f127bf39df5674e3239c57350 (patch) | |
tree | 70699492e09cb72c3d7a0654e74107e512a990ad /src/mongo/db/exec/index_scan.cpp | |
parent | 8b31673665dd2a9d38e4b65ea880fc49acc0bd81 (diff) | |
download | mongo-db59e0f31b12966f127bf39df5674e3239c57350.tar.gz |
SERVER-17635 Improve SortedDataInterface::Cursor API
Major changes:
* Implementation now responsible for simple end point checking.
* No way to ask for current position. Relocating methods now return position.
* Simplified seeking methods so they have clear uses.
* Callers can use saveUnpositioned to indicate they don't care about position.
Diffstat (limited to 'src/mongo/db/exec/index_scan.cpp')
-rw-r--r-- | src/mongo/db/exec/index_scan.cpp | 406 |
1 files changed, 103 insertions, 303 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index 8df75d2bdda..e8f59c07496 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -37,7 +37,6 @@ #include "mongo/db/exec/scoped_timer.h" #include "mongo/db/exec/working_set_computed_data.h" #include "mongo/db/index/index_access_method.h" -#include "mongo/db/index/index_cursor.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/util/log.h" @@ -55,9 +54,6 @@ namespace { namespace mongo { - using std::auto_ptr; - using std::vector; - // static const char* IndexScan::kStageType = "IXSCAN"; @@ -67,16 +63,15 @@ namespace mongo { const MatchExpression* filter) : _txn(txn), _workingSet(workingSet), + _iam(params.descriptor->getIndexCatalog()->getIndex(params.descriptor)), + _keyPattern(params.descriptor->keyPattern().getOwned()), _scanState(INITIALIZING), _filter(filter), _shouldDedup(true), + _forward(params.direction == 1), _params(params), _commonStats(kStageType), - _keyEltsToUse(0), - _movePastKeyElts(false), _endKeyInclusive(false) { - _iam = _params.descriptor->getIndexCatalog()->getIndex(_params.descriptor); - _keyPattern = _params.descriptor->keyPattern().getOwned(); // We can't always access the descriptor in the call to getStats() so we pull // any info we need for stats reporting out here. @@ -86,44 +81,24 @@ namespace mongo { _specificStats.indexVersion = _params.descriptor->version(); } - void IndexScan::initIndexScan() { - // This function transitions from the initializing state to CHECKING_END. If - // the initialization fails, however, then the state transitions to HIT_END. - invariant(INITIALIZING == _scanState); - - // Perform the possibly heavy-duty initialization of the underlying index cursor. + boost::optional<IndexKeyEntry> IndexScan::initIndexScan() { if (_params.doNotDedup) { _shouldDedup = false; } else { + // TODO it is incorrect to rely on this not changing. SERVER-17678 _shouldDedup = _params.descriptor->isMultikey(_txn); } - // Set up the index cursor. - CursorOptions cursorOptions; - - if (1 == _params.direction) { - cursorOptions.direction = CursorOptions::INCREASING; - } - else { - cursorOptions.direction = CursorOptions::DECREASING; - } - - IndexCursor *cursor; - Status s = _iam->newCursor(_txn, cursorOptions, &cursor); - verify(s.isOK()); - _indexCursor.reset(cursor); + // Perform the possibly heavy-duty initialization of the underlying index cursor. + _indexCursor = _iam->newCursor(_txn, _forward); if (_params.bounds.isSimpleRange) { // Start at one key, end at another. - Status status = _indexCursor->seek(_params.bounds.startKey); - if (!status.isOK()) { - warning() << "IndexCursor seek failed: " << status.toString(); - _scanState = HIT_END; - } - if (!isEOF()) { - _specificStats.keysExamined = 1; - } + _endKey = _params.bounds.endKey; + _endKeyInclusive = _params.bounds.endKeyInclusive; + _indexCursor->setEndPosition(_endKey, _endKeyInclusive); + return _indexCursor->seek(_params.bounds.startKey, /*inclusive*/true); } else { // For single intervals, we can use an optimized scan which checks against the position @@ -136,45 +111,20 @@ namespace mongo { &startKeyInclusive, &_endKey, &_endKeyInclusive)) { - // We want to point at the start key if it's inclusive, and we want to point past - // the start key if it's exclusive. - _indexCursor->seek(startKey, !startKeyInclusive); - - IndexCursor* endCursor; - invariant(_iam->newCursor(_txn, cursorOptions, &endCursor).isOK()); - invariant(endCursor); - _endCursor.reset(endCursor); - // If the end key is inclusive, we want to point *past* it since that's the end. - _endCursor->seek(_endKey, _endKeyInclusive); + _indexCursor->setEndPosition(_endKey, _endKeyInclusive); + return _indexCursor->seek(startKey, startKeyInclusive); } else { _checker.reset(new IndexBoundsChecker(&_params.bounds, _keyPattern, _params.direction)); - int nFields = _keyPattern.nFields(); - vector<const BSONElement*> key; - vector<bool> inc; - key.resize(nFields); - inc.resize(nFields); - if (_checker->getStartKey(&key, &inc)) { - _indexCursor->seek(key, inc); - _keyElts.resize(nFields); - _keyEltsInc.resize(nFields); - } - else { - _scanState = HIT_END; - } - } - } + if (!_checker->getStartSeekPoint(&_seekPoint)) + return boost::none; - // This method may throw an exception while it's doing initialization. If we've gotten - // here, then we've done all the initialization without an exception being thrown. This - // means it is safe to transition to the CHECKING_END state. In error cases, we transition - // to HIT_END, so we should not change state again here. - if (HIT_END != _scanState) { - _scanState = CHECKING_END; + return _indexCursor->seek(_seekPoint); + } } } @@ -184,120 +134,105 @@ namespace mongo { // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); - if (INITIALIZING == _scanState) { - invariant(NULL == _indexCursor.get()); - try { - initIndexScan(); - } - catch (const WriteConflictException& wce) { - // Release our owned cursors and try again next time. - _scanState = INITIALIZING; - _indexCursor.reset(); - _endCursor.reset(); - *out = WorkingSet::INVALID_ID; - return PlanStage::NEED_YIELD; + // Get the next kv pair from the index, if any. + boost::optional<IndexKeyEntry> kv; + try { + switch (_scanState) { + case INITIALIZING: kv = initIndexScan(); break; + case GETTING_NEXT: kv = _indexCursor->next(); break; + case NEED_SEEK: kv = _indexCursor->seek(_seekPoint); break; + case HIT_END: return PlanStage::IS_EOF; } } + catch (const WriteConflictException& wce) { + *out = WorkingSet::INVALID_ID; + return PlanStage::NEED_YIELD; + } - if (CHECKING_END == _scanState) { - try { - checkEnd(); + if (kv) { + // In debug mode, check that the cursor isn't lying to us. + if (kDebugBuild && !_endKey.isEmpty()) { + int cmp = kv->key.woCompare(_endKey, + Ordering::make(_params.descriptor->keyPattern()), + /*compareFieldNames*/false); + if (cmp == 0) dassert(_endKeyInclusive); + dassert(_forward ? cmp <= 0 : cmp >= 0); + } + + ++_specificStats.keysExamined; + if (_params.maxScan && _specificStats.keysExamined >= _params.maxScan) { + kv = boost::none; } - catch (const WriteConflictException& wce) { - // checkEnd only fails in ways that is safe to call again after yielding. - _scanState = CHECKING_END; - *out = WorkingSet::INVALID_ID; - return PlanStage::NEED_YIELD; + } + + if (kv && _checker) { + switch (_checker->checkKey(kv->key, &_seekPoint)) { + case IndexBoundsChecker::VALID: + break; + + case IndexBoundsChecker::DONE: + // This seems weird but it's the old definition of nscanned. + --_specificStats.keysExamined; + kv = boost::none; + break; + + case IndexBoundsChecker::MUST_ADVANCE: + _scanState = NEED_SEEK; + _commonStats.needTime++; + return PlanStage::NEED_TIME; } } - if (isEOF()) { + if (!kv) { + _scanState = HIT_END; _commonStats.isEOF = true; + _indexCursor.reset(); return PlanStage::IS_EOF; } - if (GETTING_NEXT == _scanState) { - // Grab the next (key, value) from the index. - BSONObj keyObj = _indexCursor->getKey(); - RecordId loc = _indexCursor->getValue(); - - bool filterPasses = Filter::passes(keyObj, _keyPattern, _filter); - if ( filterPasses ) { - // We must make a copy of the on-disk data since it can mutate during the execution - // of this query. - keyObj = keyObj.getOwned(); - } - - _scanState = CHECKING_END; + _scanState = GETTING_NEXT; - // Move to the next result. - // The underlying IndexCursor points at the *next* thing we want to return. We do this - // so that if we're scanning an index looking for docs to delete we don't continually - // clobber the thing we're pointing at. - try { - _indexCursor->next(); - } - catch (const WriteConflictException& wce) { - // If next throws, it leaves us at the original position. - invariant(_indexCursor->getValue() == loc); - *out = WorkingSet::INVALID_ID; - return PlanStage::NEED_YIELD; + if (_shouldDedup) { + ++_specificStats.dupsTested; + if (!_returned.insert(kv->loc).second) { + // We've seen this RecordId before. Skip it this time. + ++_specificStats.dupsDropped; + ++_commonStats.needTime; + return PlanStage::NEED_TIME; } + } - if (_shouldDedup) { - ++_specificStats.dupsTested; - if (_returned.end() != _returned.find(loc)) { - ++_specificStats.dupsDropped; - ++_commonStats.needTime; - return PlanStage::NEED_TIME; - } - else { - _returned.insert(loc); - } + if (_filter) { + if (!Filter::passes(kv->key, _keyPattern, _filter)) { + ++_commonStats.needTime; + return PlanStage::NEED_TIME; } - if (filterPasses) { - if (NULL != _filter) { - ++_specificStats.matchTested; - } - - // Fill out the WSM. - WorkingSetID id = _workingSet->allocate(); - WorkingSetMember* member = _workingSet->get(id); - member->loc = loc; - member->keyData.push_back(IndexKeyDatum(_keyPattern, keyObj, _iam)); - member->state = WorkingSetMember::LOC_AND_IDX; - - if (_params.addKeyMetadata) { - BSONObjBuilder bob; - bob.appendKeys(_keyPattern, keyObj); - member->addComputed(new IndexKeyComputedData(bob.obj())); - } - - *out = id; - ++_commonStats.advanced; - return PlanStage::ADVANCED; - } + ++_specificStats.matchTested; } + + if (!kv->key.isOwned()) kv->key = kv->key.getOwned(); - ++_commonStats.needTime; - return PlanStage::NEED_TIME; - } + // We found something to return, so fill out the WSM. + WorkingSetID id = _workingSet->allocate(); + WorkingSetMember* member = _workingSet->get(id); + member->loc = kv->loc; + member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, _iam)); + member->state = WorkingSetMember::LOC_AND_IDX; - bool IndexScan::isEOF() { - if (INITIALIZING == _scanState) { - // Have to call work() at least once. - return false; + if (_params.addKeyMetadata) { + BSONObjBuilder bob; + bob.appendKeys(_keyPattern, kv->key); + member->addComputed(new IndexKeyComputedData(bob.obj())); } - // If there's a limit on how many keys we can scan, we may be EOF when we hit that. - if (0 != _params.maxScan) { - if (_specificStats.keysExamined >= _params.maxScan) { - return true; - } - } + *out = id; + ++_commonStats.advanced; + return PlanStage::ADVANCED; + } - return HIT_END == _scanState || _indexCursor->isEOF(); + bool IndexScan::isEOF() { + return _commonStats.isEOF; } void IndexScan::saveState() { @@ -308,17 +243,14 @@ namespace mongo { _txn = NULL; ++_commonStats.yields; + if (!_indexCursor) return; - if (HIT_END == _scanState || INITIALIZING == _scanState) { return; } - if (!_indexCursor->isEOF()) { - _savedKey = _indexCursor->getKey().getOwned(); - _savedLoc = _indexCursor->getValue(); + if (_scanState == NEED_SEEK) { + _indexCursor->saveUnpositioned(); + return; } - _indexCursor->savePosition(); - if (_endCursor) { - _endCursor->savePosition(); - } + _indexCursor->savePositioned(); } void IndexScan::restoreState(OperationContext* opCtx) { @@ -326,61 +258,7 @@ namespace mongo { _txn = opCtx; ++_commonStats.unyields; - if (HIT_END == _scanState || INITIALIZING == _scanState) { return; } - - // We can have a valid position before we check isEOF(), restore the position, and then be - // EOF upon restore. - if (!_indexCursor->restorePosition( opCtx ).isOK() || _indexCursor->isEOF()) { - _scanState = HIT_END; - return; - } - - if (_endCursor) { - // Single interval case. - if (!_endCursor->restorePosition(opCtx).isOK()) { - _scanState = HIT_END; - return; - } - - // If we were EOF when we yielded, we don't always want to have '_indexCursor' run until - // EOF. New documents may have been inserted after our end key, and our end marker may - // be before them. - // - // As an example, say we're counting from 5 to 10 and the index only has keys for 6, 7, - // 8, and 9. '_indexCursor' will point at key 6 at the start and '_endCursor' will be - // EOF. If we insert a document with key 11 during a yield, we need to relocate - // '_endCursor' to point at the new key as the end key of our scan. - _endCursor->seek(_endKey, _endKeyInclusive); - - // It is possible that the re-positioning of the end cursor above will move the end - // cursor so that it is on the other side of the scanning cursor. If this happens, the - // scan is over, so we transition to HIT_END state. - // - // Example: - // Suppose we're counting from 5 to 10 and the index has keys 6 and 15. The end - // cursor will initially point at 15. Say that the scanning cursor advances, returning - // key 6 and now points at 15. Then the index scan state is saved. While saved, - // key 11 is inserted. The end cursor will seek to point at key 11. If we didn't have - // the check below, then the scan could erroneously return key 15, which is not in the - // desired range of [5, 10]. - int cmp = _endKey.woCompare(_indexCursor->getKey(), _keyPattern); - const bool cursorPastEndKey = (_params.direction == 1 ? cmp < 0 : cmp > 0); - const bool cursorAtExclusiveEndKey = (cmp == 0 && !_endKeyInclusive); - if (cursorPastEndKey || cursorAtExclusiveEndKey) { - _scanState = HIT_END; - return; - } - } - - if (!_savedKey.binaryEqual(_indexCursor->getKey()) - || _savedLoc != _indexCursor->getValue()) { - // Our restored position isn't the same as the saved position. When we call work() - // again we want to return where we currently point, not past it. - ++_specificStats.yieldMovedCursor; - - // Our restored position might be past endKey, see if we've hit the end. - _scanState = CHECKING_END; - } + if (_indexCursor) _indexCursor->restore(opCtx); } void IndexScan::invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { @@ -401,91 +279,13 @@ namespace mongo { } } - void IndexScan::checkEnd() { - if (isEOF()) { - _commonStats.isEOF = true; - return; - } - - if (_params.bounds.isSimpleRange) { - _scanState = GETTING_NEXT; - - // "Normal" start -> end scanning. - verify(NULL == _endCursor); - verify(NULL == _checker.get()); - - // If there is an empty endKey we will scan until we run out of index to scan over. - if (_params.bounds.endKey.isEmpty()) { return; } - - int cmp = sgn(_params.bounds.endKey.woCompare(_indexCursor->getKey(), _keyPattern)); - - if ((cmp != 0 && cmp != _params.direction) - || (cmp == 0 && !_params.bounds.endKeyInclusive)) { - _scanState = HIT_END; - } - else { - ++_specificStats.keysExamined; - } - } - else if (_endCursor) { - // We're in the single interval case, and we have a cursor pointing to the end position. - // We can check whether the scan is over by seeing if our cursor points at the same - // thing as the end cursor. - _scanState = GETTING_NEXT; - invariant(!_checker); - - if (_endCursor->pointsAt(*_indexCursor)) { - _scanState = HIT_END; - } - else { - ++_specificStats.keysExamined; - } - } - else { - verify(NULL != _indexCursor); - verify(NULL != _checker.get()); - - IndexBoundsChecker::KeyState keyState; - keyState = _checker->checkKey(_indexCursor->getKey(), - &_keyEltsToUse, - &_movePastKeyElts, - &_keyElts, - &_keyEltsInc); - - if (IndexBoundsChecker::DONE == keyState) { - _scanState = HIT_END; - return; - } - - // This seems weird but it's the old definition of nscanned. - ++_specificStats.keysExamined; - - if (IndexBoundsChecker::VALID == keyState) { - _scanState = GETTING_NEXT; - return; - } - - verify(IndexBoundsChecker::MUST_ADVANCE == keyState); - _indexCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts, - _keyElts, _keyEltsInc); - - // Must check underlying cursor EOF after every cursor movement. - if (_indexCursor->isEOF()) { - _scanState = HIT_END; - return; - } - } - } - - vector<PlanStage*> IndexScan::getChildren() const { - vector<PlanStage*> empty; - return empty; + std::vector<PlanStage*> IndexScan::getChildren() const { + return {}; } PlanStageStats* IndexScan::getStats() { // WARNING: this could be called even if the collection was dropped. Do not access any // catalog information here. - _commonStats.isEOF = isEOF(); // Add a BSON representation of the filter to the stats tree, if there is one. if (NULL != _filter) { @@ -503,7 +303,7 @@ namespace mongo { _specificStats.direction = _params.direction; } - auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_IXSCAN)); + std::unique_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_IXSCAN)); ret->specific.reset(new IndexScanStats(_specificStats)); return ret.release(); } |