// @file queryoptimizercursor.cpp
/**
* Copyright (C) 2011 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include "pch.h"
#include "queryoptimizer.h"
#include "pdfile.h"
#include "clientcursor.h"
#include "btree.h"
#include "queryoptimizercursor.h"
namespace mongo {
static const int OutOfOrderDocumentsAssertionCode = 14810;
/**
* A QueryOp implementation utilized by the QueryOptimizerCursor
*/
class QueryOptimizerCursorOp : public QueryOp {
public:
/**
* @param aggregateNscanned - shared long long counting total nscanned for
* query ops for all cursors.
* @param requireIndex - if unindexed scans should be prohibited.
*/
QueryOptimizerCursorOp( long long &aggregateNscanned, bool requireIndex, int cumulativeCount = 0 ) : _matchCounter( aggregateNscanned, cumulativeCount ), _countingMatches(), _mustAdvance(), _capped(), _yieldRecoveryFailed(), _requireIndex( requireIndex ) {}
virtual void _init() {
if ( qp().scanAndOrderRequired() ) {
throw MsgAssertionException( OutOfOrderDocumentsAssertionCode, "order spec cannot be satisfied with index" );
}
if ( _requireIndex && strcmp( qp().indexKey().firstElementFieldName(), "$natural" ) == 0 ) {
throw MsgAssertionException( 9011, "Not an index cursor" );
}
_c = qp().newCursor();
// The QueryOptimizerCursor::prepareToTouchEarlierIterate() implementation requires _c->prepareToYield() to work.
verify( 15940, _c->supportYields() );
_capped = _c->capped();
// TODO This violates the current Cursor interface abstraction, but for now it's simpler to keep our own set of
// dups rather than avoid poisoning the cursor's dup set with unreturned documents. Deduping documents
// matched in this QueryOptimizerCursorOp will run against the takeover cursor.
_matchCounter.setCheckDups( _c->isMultiKey() );
_matchCounter.updateNscanned( _c->nscanned() );
}
virtual long long nscanned() {
return _c ? _c->nscanned() : _matchCounter.nscanned();
}
virtual bool prepareToYield() {
if ( _c && !_cc ) {
_cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , qp().ns() ) );
}
if ( _cc ) {
recordCursorLocation();
return _cc->prepareToYield( _yieldData );
}
// no active cursor - ok to yield
return true;
}
virtual void recoverFromYield() {
if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) {
_yieldRecoveryFailed = true;
_c.reset();
_cc.reset();
if ( _capped ) {
msgassertedNoTrace( 13338, str::stream() << "capped cursor overrun: " << qp().ns() );
}
else if ( qp().mustAssertOnYieldFailure() ) {
msgassertedNoTrace( 15892, str::stream() << "QueryOptimizerCursorOp::recoverFromYield() failed to recover" );
}
else {
// we don't fail query since we're fine with returning partial data if collection dropped
// also, see SERVER-2454
}
}
else {
checkCursorAdvanced();
}
}
void prepareToTouchEarlierIterate() {
recordCursorLocation();
if ( _c ) {
_c->prepareToTouchEarlierIterate();
}
}
void recoverFromTouchingEarlierIterate() {
if ( _c ) {
_c->recoverFromTouchingEarlierIterate();
}
checkCursorAdvanced();
}
virtual void next() {
mayAdvance();
if ( _matchCounter.enoughCumulativeMatchesToChooseAPlan() ) {
setStop();
return;
}
if ( !_c || !_c->ok() ) {
setComplete();
return;
}
_mustAdvance = true;
}
virtual QueryOp *_createChild() const {
return new QueryOptimizerCursorOp( _matchCounter.aggregateNscanned(), _requireIndex, _matchCounter.cumulativeCount() );
}
DiskLoc currLoc() const { return _c ? _c->currLoc() : DiskLoc(); }
BSONObj currKey() const { return _c ? _c->currKey() : BSONObj(); }
bool currentMatches( MatchDetails *details ) {
bool ret = ( _c && _c->ok() ) ? matcher( _c.get() )->matchesCurrent( _c.get(), details ) : false;
// Cache the match, so we can count it in mayAdvance().
_matchCounter.setMatch( ret );
return ret;
}
virtual bool mayRecordPlan() const {
return !_yieldRecoveryFailed && complete() && ( !stopRequested() || _matchCounter.enoughMatchesToRecordPlan() );
}
shared_ptr cursor() const { return _c; }
private:
void mayAdvance() {
if ( !_c ) {
return;
}
if ( countingMatches() ) {
// Check match if not yet known.
if ( !_matchCounter.knowMatch() ) {
currentMatches( 0 );
}
_matchCounter.countMatch( currLoc() );
}
if ( _mustAdvance ) {
_c->advance();
handleCursorAdvanced();
}
_matchCounter.updateNscanned( _c->nscanned() );
}
// Don't count matches on the first call to next(), which occurs before the first result is returned.
bool countingMatches() {
if ( _countingMatches ) {
return true;
}
_countingMatches = true;
return false;
}
void recordCursorLocation() {
_posBeforeYield = currLoc();
}
void checkCursorAdvanced() {
// This check will not correctly determine if we are looking at a different document in
// all cases, but it is adequate for updating the query plan's match count (just used to pick
// plans, not returned to the client) and adjust iteration via _mustAdvance.
if ( _posBeforeYield != currLoc() ) {
// If the yield advanced our position, the next next() will be a no op.
handleCursorAdvanced();
}
}
void handleCursorAdvanced() {
_mustAdvance = false;
_matchCounter.resetMatch();
}
CachedMatchCounter _matchCounter;
bool _countingMatches;
bool _mustAdvance;
bool _capped;
shared_ptr _c;
ClientCursor::CleanupPointer _cc;
DiskLoc _posBeforeYield;
ClientCursor::YieldData _yieldData;
bool _yieldRecoveryFailed;
bool _requireIndex;
};
/**
* This cursor runs a MultiPlanScanner iteratively and returns results from
* the scanner's cursors as they become available. Once the scanner chooses
* a single plan, this cursor becomes a simple wrapper around that single
* plan's cursor (called the 'takeover' cursor).
*/
class QueryOptimizerCursor : public Cursor {
public:
QueryOptimizerCursor( auto_ptr &mps, bool requireIndex ) :
_mps( mps ),
_originalOp( new QueryOptimizerCursorOp( _nscanned, requireIndex ) ),
_currOp(),
_nscanned() {
_mps->initialOp( _originalOp );
shared_ptr op = _mps->nextOp();
rethrowOnError( op );
if ( !op->complete() ) {
_currOp = dynamic_cast( op.get() );
}
}
virtual bool ok() { return _takeover ? _takeover->ok() : !currLoc().isNull(); }
virtual Record* _current() {
if ( _takeover ) {
return _takeover->_current();
}
assertOk();
return currLoc().rec();
}
virtual BSONObj current() {
if ( _takeover ) {
return _takeover->current();
}
assertOk();
return currLoc().obj();
}
virtual DiskLoc currLoc() { return _takeover ? _takeover->currLoc() : _currLoc(); }
DiskLoc _currLoc() const {
dassert( !_takeover );
return _currOp ? _currOp->currLoc() : DiskLoc();
}
virtual bool advance() {
return _advance( false );
}
virtual BSONObj currKey() const {
if ( _takeover ) {
return _takeover->currKey();
}
assertOk();
return _currOp->currKey();
}
/**
* When return value isNull(), our cursor will be ignored for yielding by the client cursor implementation.
* In such cases, an internal ClientCursor will update the position of component cursors when necessary.
*/
virtual DiskLoc refLoc() { return _takeover ? _takeover->refLoc() : DiskLoc(); }
virtual BSONObj indexKeyPattern() {
if ( _takeover ) {
return _takeover->indexKeyPattern();
}
assertOk();
return _currOp->cursor()->indexKeyPattern();
}
virtual bool supportGetMore() { return false; }
virtual bool supportYields() { return _takeover ? _takeover->supportYields() : true; }
virtual void prepareToTouchEarlierIterate() {
if ( _takeover ) {
_takeover->prepareToTouchEarlierIterate();
}
else if ( _currOp ) {
if ( _mps->currentNPlans() == 1 ) {
// This single plan version is a bit more performant, so we use it when possible.
_currOp->prepareToTouchEarlierIterate();
}
else {
// With multiple plans, the 'earlier iterate' could be the current iterate of one of
// the component plans. We do a full yield of all plans, using ClientCursors.
verify( 15941, _mps->prepareToYield() );
}
}
}
virtual void recoverFromTouchingEarlierIterate() {
if ( _takeover ) {
_takeover->recoverFromTouchingEarlierIterate();
}
else if ( _currOp ) {
if ( _mps->currentNPlans() == 1 ) {
_currOp->recoverFromTouchingEarlierIterate();
}
else {
recoverFromYield();
}
}
}
virtual bool prepareToYield() {
if ( _takeover ) {
return _takeover->prepareToYield();
}
else if ( _currOp ) {
return _mps->prepareToYield();
}
else {
// No state needs to be protected, so yielding is fine.
return true;
}
}
virtual void recoverFromYield() {
if ( _takeover ) {
_takeover->recoverFromYield();
return;
}
if ( _currOp ) {
_mps->recoverFromYield();
if ( _currOp->error() || !ok() ) {
// Advance to a non error op if on of the ops errored out.
// Advance to a following $or clause if the $or clause returned all results.
_advance( true );
}
}
}
virtual string toString() { return "QueryOptimizerCursor"; }
virtual bool getsetdup(DiskLoc loc) {
if ( _takeover ) {
if ( getdupInternal( loc ) ) {
return true;
}
return _takeover->getsetdup( loc );
}
assertOk();
return getsetdupInternal( loc );
}
/** Matcher needs to know if the the cursor being forwarded to is multikey. */
virtual bool isMultiKey() const {
if ( _takeover ) {
return _takeover->isMultiKey();
}
assertOk();
return _currOp->cursor()->isMultiKey();
}
virtual bool modifiedKeys() const { return true; }
/** Initial capped wrapping cases (before takeover) are handled internally by a component ClientCursor. */
virtual bool capped() const { return _takeover ? _takeover->capped() : false; }
virtual long long nscanned() { return _takeover ? _takeover->nscanned() : _nscanned; }
virtual shared_ptr matcherPtr() const {
if ( _takeover ) {
return _takeover->matcherPtr();
}
assertOk();
return _currOp->matcher( _currOp->cursor() );
}
virtual CoveredIndexMatcher* matcher() const {
if ( _takeover ) {
return _takeover->matcher();
}
assertOk();
return _currOp->matcher( _currOp->cursor() ).get();
}
virtual bool currentMatches( MatchDetails *details = 0 ) {
if ( _takeover ) {
return _takeover->currentMatches( details );
}
assertOk();
return _currOp->currentMatches( details );
}
private:
/**
* Advances the QueryPlanSet::Runner.
* @param force - advance even if the current query op is not valid. The 'force' param should only be specified
* when there are plans left in the runner.
*/
bool _advance( bool force ) {
if ( _takeover ) {
return _takeover->advance();
}
if ( !force && !ok() ) {
return false;
}
DiskLoc prevLoc = _currLoc();
_currOp = 0;
shared_ptr op = _mps->nextOp();
rethrowOnError( op );
// Avoiding dynamic_cast here for performance. Soon we won't need to
// do a cast at all.
QueryOptimizerCursorOp *qocop = (QueryOptimizerCursorOp*)( op.get() );
if ( !op->complete() ) {
// The 'qocop' will be valid until we call _mps->nextOp() again. We return 'current' values from this op.
_currOp = qocop;
}
else if ( op->stopRequested() ) {
if ( qocop->cursor() ) {
// Ensure that prepareToTouchEarlierIterate() may be called safely when a BasicCursor takes over.
if ( !prevLoc.isNull() && prevLoc == qocop->currLoc() ) {
qocop->cursor()->advance();
}
// Clear the Runner and any unnecessary QueryOps and their ClientCursors.
_mps->clearRunner();
_takeover.reset( new MultiCursor( _mps,
qocop->cursor(),
op->matcher( qocop->cursor() ),
*op,
_nscanned - qocop->cursor()->nscanned() ) );
}
}
return ok();
}
/** Forward an exception when the runner errs out. */
void rethrowOnError( const shared_ptr< QueryOp > &op ) {
if ( op->error() ) {
throw MsgAssertionException( op->exception() );
}
}
void assertOk() const {
massert( 14809, "Invalid access for cursor that is not ok()", !_currLoc().isNull() );
}
/** Insert and check for dups before takeover occurs */
bool getsetdupInternal(const DiskLoc &loc) {
return _dups.getsetdup( loc );
}
/** Just check for dups - after takeover occurs */
bool getdupInternal(const DiskLoc &loc) {
dassert( _takeover );
return _dups.getdup( loc );
}
auto_ptr _mps;
shared_ptr _originalOp;
QueryOptimizerCursorOp *_currOp;
shared_ptr _takeover;
long long _nscanned;
// Using a SmallDupSet seems a bit hokey, but I've measured a 5% performance improvement with ~100 document non multi key scans.
SmallDupSet _dups;
};
shared_ptr newQueryOptimizerCursor( auto_ptr mps, bool requireIndex ) {
try {
return shared_ptr( new QueryOptimizerCursor( mps, requireIndex ) );
} catch( const AssertionException &e ) {
if ( e.getCode() == OutOfOrderDocumentsAssertionCode ) {
// If no indexes follow the requested sort order, return an
// empty pointer. This is legacy behavior based on bestGuessCursor().
return shared_ptr();
}
throw;
}
return shared_ptr();
}
shared_ptr NamespaceDetailsTransient::getCursor( const char *ns, const BSONObj &query,
const BSONObj &order, bool requireIndex,
bool *simpleEqualityMatch ) {
if ( simpleEqualityMatch ) {
*simpleEqualityMatch = false;
}
if ( query.isEmpty() && order.isEmpty() && !requireIndex ) {
// TODO This will not use a covered index currently.
return theDataFileMgr.findAll( ns );
}
if ( isSimpleIdQuery( query ) ) {
Database *database = cc().database();
verify( 15985, database );
NamespaceDetails *d = database->namespaceIndex.details(ns);
if ( d ) {
int idxNo = d->findIdIndex();
if ( idxNo >= 0 ) {
IndexDetails& i = d->idx( idxNo );
BSONObj key = i.getKeyFromQuery( query );
return shared_ptr( BtreeCursor::make( d, idxNo, i, key, key, true, 1 ) );
}
}
}
auto_ptr mps( new MultiPlanScanner( ns, query, order ) ); // mayYield == false
shared_ptr single = mps->singleCursor();
if ( single ) {
if ( !( requireIndex &&
dynamic_cast( single.get() ) /* May not use an unindexed cursor */ ) ) {
if ( !query.isEmpty() && !single->matcher() ) {
shared_ptr matcher( new CoveredIndexMatcher( query, single->indexKeyPattern() ) );
single->setMatcher( matcher );
}
if ( simpleEqualityMatch ) {
const QueryPlan *qp = mps->singlePlan();
if ( qp->exactKeyMatch() && !single->matcher()->needRecord() ) {
*simpleEqualityMatch = true;
}
}
return single;
}
}
return newQueryOptimizerCursor( mps, requireIndex );
}
/** This interface just available for testing. */
shared_ptr newQueryOptimizerCursor( const char *ns, const BSONObj &query, const BSONObj &order, bool requireIndex ) {
auto_ptr mps( new MultiPlanScanner( ns, query, order ) ); // mayYield == false
return newQueryOptimizerCursor( mps, requireIndex );
}
} // namespace mongo;