/** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "mongo/base/disallow_copying.h" #include "mongo/db/clientcursor.h" #include "mongo/db/cursor.h" #include "mongo/db/explain.h" #include "mongo/db/matcher.h" #include "mongo/db/query_plan.h" #include "mongo/db/queryutil.h" #include "mongo/util/elapsed_tracker.h" #pragma once namespace mongo { static const int OutOfOrderDocumentsAssertionCode = 14810; class FieldRangeSetPair; class QueryPlanSelectionPolicy; class OrRangeGenerator; class ParsedQuery; /** * Helper class for a QueryPlanRunner to cache and count matches. One object of this type is * used per candidate QueryPlan (as there is one QueryPlanRunner per QueryPlan). * * Typical usage: * 1) resetMatch() - reset stored match value to Unkonwn. * 2) setMatch() - set match value to a definite true/false value. * 3) knowMatch() - check if setMatch() has been called. * 4) incMatch() - increment count if match is true. */ class CachedMatchCounter { public: /** * @param aggregateNscanned - shared count of nscanned for this and other plans. * @param cumulativeCount - starting point for accumulated count over a series of plans. */ CachedMatchCounter( long long& aggregateNscanned, int cumulativeCount ); /** Set whether dup checking is enabled when counting. */ void setCheckDups( bool checkDups ) { _checkDups = checkDups; } void resetMatch(); /** @return true if the match was not previously recorded. */ bool setMatch( bool match ); bool knowMatch() const { return _match != Unknown; } void incMatch( const DiskLoc& loc ); bool wouldIncMatch( const DiskLoc& loc ) const; bool enoughCumulativeMatchesToChooseAPlan() const; bool enoughMatchesToRecordPlan() const; int cumulativeCount() const { return _cumulativeCount; } int count() const { return _count; } /** Update local and aggregate nscanned counts. */ void updateNscanned( long long nscanned ); long long nscanned() const { return _nscanned; } long long& aggregateNscanned() const { return _aggregateNscanned; } private: bool getsetdup( const DiskLoc& loc ); bool getdup( const DiskLoc& loc ) const; long long& _aggregateNscanned; long long _nscanned; int _cumulativeCount; int _count; bool _checkDups; enum MatchState { Unknown, False, True }; MatchState _match; bool _counted; set _dups; }; /** * Iterates through a QueryPlan's candidate matches, keeping track of accumulated nscanned. * Generally used along with runners for other QueryPlans in a QueryPlanRunnerQueue priority * queue. Eg if there are three candidate QueryPlans evaluated in parallel, there are three * QueryPlanRunners, one checking for matches on each query. * * Typical usage: * 1) A new QueryPlanRunner is generated using createChild(). * 2) A QueryPlan is assigned using setQueryPlan(). * 3) init() is called to initialize the runner. * 4) next() is called repeatedly, with nscanned() checked after each call. * 5) In one of these calls to next(), setComplete() is called internally. * 6) The QueryPattern for the QueryPlan may be recorded as a winning plan. */ class QueryPlanRunner { MONGO_DISALLOW_COPYING( QueryPlanRunner ); public: /** * @param aggregateNscanned Shared long long counting total nscanned for runners for all * cursors. * @param selectionPolicy Characterizes the set of QueryPlans allowed for this operation. * See queryoptimizercursor.h for more information. * @param requireOrder Whether only ordered plans are allowed. * @param alwaysCountMatches Whether matches are to be counted regardless of ordering. * @param cumulativeCount Total count. */ QueryPlanRunner( long long& aggregateNscanned, const QueryPlanSelectionPolicy& selectionPolicy, const bool& requireOrder, bool alwaysCountMatches, int cumulativeCount = 0 ); /** @return QueryPlan assigned to this runner by the query optimizer. */ const QueryPlan& queryPlan() const { return *_queryPlan; } /** Advance to the next potential matching document (eg using a cursor). */ void next(); /** * @return current 'nscanned' metric for this runner. Used to compare cost to other * runners. */ long long nscanned() const; /** Take any steps necessary before the db mutex is yielded. */ void prepareToYield(); /** Recover once the db mutex is regained. */ void recoverFromYield(); /** Take any steps necessary before an earlier iterate of the cursor is modified. */ void prepareToTouchEarlierIterate(); /** Recover after the earlier iterate is modified. */ void recoverFromTouchingEarlierIterate(); DiskLoc currLoc() const { return _c ? _c->currLoc() : DiskLoc(); } BSONObj currKey() const { return _c ? _c->currKey() : BSONObj(); } bool currentMatches( MatchDetails* details ); /** * @return true iff the QueryPlan for this runner may be registered * as a winning plan. */ bool mayRecordPlan() const; shared_ptr cursor() const { return _c; } /** @return true iff the implementation called setComplete() or setStop(). */ bool complete() const { return _complete; } /** @return true iff the implementation called setStop(). */ bool stopRequested() const { return _stopRequested; } bool completeWithoutStop() const { return complete() && !stopRequested(); } /** @return true iff the implementation errored out. */ bool error() const { return _error; } /** @return the error information. */ ExceptionInfo exception() const { return _exception; } /** To be called by QueryPlanSet::Runner only. */ /** * @return a copy of the inheriting class, which will be run with its own query plan. The * child runner will assume its parent runner has completed execution. */ QueryPlanRunner* createChild() const; void setQueryPlan( const QueryPlan* queryPlan ); /** Handle initialization after a QueryPlan has been set. */ void init(); void setException( const DBException& e ); /** @return an ExplainPlanInfo object that will be updated as the query runs. */ shared_ptr generateExplainInfo(); shared_ptr explainInfo() const { return _explainPlanInfo; } const Projection::KeyOnly* keyFieldsOnly() const { return queryPlan().keyFieldsOnly().get(); } private: /** Call if all results have been found. */ void setComplete() { _complete = true; } /** Call if the scan is complete even if not all results have been found. */ void setStop() { setComplete(); _stopRequested = true; } void mayAdvance(); bool countingMatches(); bool countMatches() const; /** * @return true if the results generated by this query plan will be loaded from the record * store (not built from an index entry). */ bool hasDocumentLoadingQueryPlan() const; void recordCursorLocation(); void checkCursorAdvanced(); void handleCursorAdvanced(); void checkCursorOrdering(); bool _complete; bool _stopRequested; ExceptionInfo _exception; const QueryPlan* _queryPlan; bool _error; CachedMatchCounter _matchCounter; bool _countingMatches; bool _mustAdvance; bool _capped; shared_ptr _c; ClientCursor::Holder _cc; DiskLoc _posBeforeYield; ClientCursor::YieldData _yieldData; const QueryPlanSelectionPolicy& _selectionPolicy; const bool& _requireOrder; // TODO don't use a ref for this, but signal change explicitly shared_ptr _explainPlanInfo; bool _alwaysCountMatches; }; /** * This class works if T::operator< is variant unlike a regular stl priority queue, but it's * very slow. However if _vec.size() is always very small, it would be fine, maybe even faster * than a smart impl that does more memory allocations. * TODO Clean up this temporary code. */ template class PriorityQueue { MONGO_DISALLOW_COPYING( PriorityQueue ); public: PriorityQueue() { _vec.reserve(4); } int size() const { return _vec.size(); } bool empty() const { return _vec.empty(); } void push(const T& x) { _vec.push_back(x); } T pop() { size_t t = 0; for( size_t i = 1; i < _vec.size(); i++ ) { if( _vec[t] < _vec[i] ) t = i; } T ret = _vec[t]; _vec.erase(_vec.begin()+t); return ret; } private: vector _vec; }; class QueryPlanSet; /** Populates a provided QueryPlanSet with candidate query plans, when requested. */ class QueryPlanGenerator { public: /** Policies for utilizing recorded plans. */ typedef enum { Ignore, // Ignore the recorded plan and try all candidate plans. UseIfInOrder, // Use the recorded plan if it is properly ordered. Use // Always use the recorded plan. } RecordedPlanPolicy; /** @param qps The QueryPlanSet to which plans will be provided. */ QueryPlanGenerator( QueryPlanSet& qps, auto_ptr originalFrsp, const shared_ptr& parsedQuery, const BSONObj& hint, RecordedPlanPolicy recordedPlanPolicy, const BSONObj& min, const BSONObj& max, bool allowSpecial ); /** Populate the provided QueryPlanSet with an initial set of plans. */ void addInitialPlans(); /** Supplement a cached plan provided earlier by adding additional query plans. */ void addFallbackPlans(); private: bool addShortCircuitPlan( NamespaceDetails* d ); bool addHintPlan( NamespaceDetails* d ); bool addSpecialPlan( NamespaceDetails* d ); void addStandardPlans( NamespaceDetails* d ); bool addCachedPlan( NamespaceDetails* d ); shared_ptr newPlan( NamespaceDetails* d, int idxNo, const BSONObj& min = BSONObj(), const BSONObj& max = BSONObj(), const string& special = "" ) const; bool setUnindexedPlanIf( bool set, NamespaceDetails* d ); void setSingleUnindexedPlan( NamespaceDetails* d ); void setHintedPlanForIndex( IndexDetails& id ); void validateAndSetHintedPlan( const shared_ptr& plan ); void warnOnCappedIdTableScan() const; QueryPlanSet& _qps; auto_ptr _originalFrsp; shared_ptr _parsedQuery; BSONObj _hint; RecordedPlanPolicy _recordedPlanPolicy; BSONObj _min; BSONObj _max; bool _allowSpecial; }; /** A set of candidate query plans for a query. */ class QueryPlanSet { public: typedef boost::shared_ptr QueryPlanPtr; typedef vector PlanVector; /** * @param originalFrsp - original constraints for this query clause; if null, frsp will be * used. */ static QueryPlanSet* make( const char* ns, auto_ptr frsp, auto_ptr originalFrsp, const BSONObj& originalQuery, const BSONObj& order, const shared_ptr& parsedQuery, const BSONObj& hint, QueryPlanGenerator::RecordedPlanPolicy recordedPlanPolicy, const BSONObj& min, const BSONObj& max, bool allowSpecial ); /** @return number of candidate plans. */ int nPlans() const { return _plans.size(); } QueryPlanPtr firstPlan() const { return _plans[ 0 ]; } /** @return true if a plan is selected based on previous success of this plan. */ bool usingCachedPlan() const { return _usingCachedPlan; } /** @return true if some candidate plans may have been excluded due to plan caching. */ bool hasPossiblyExcludedPlans() const; /** @return a single plan that may work well for the specified query. */ QueryPlanPtr getBestGuess() const; const FieldRangeSetPair& frsp() const { return *_frsp; } BSONObj originalQuery() const { return _originalQuery; } BSONObj order() const { return _order; } /** @return true if an active plan is in order. */ bool haveInOrderPlan() const; /** @return true if an active or fallback plan is in order. */ bool possibleInOrderPlan() const; /** @return true if an active or fallback plan is out of order. */ bool possibleOutOfOrderPlan() const; CandidatePlanCharacter characterizeCandidatePlans() const; bool prepareToRetryQuery(); string toString() const; /** Configure a single query plan if one has not already been provided. */ void setSinglePlan( const QueryPlanPtr& plan ); /** Configure a query plan from the plan cache. */ void setCachedPlan( const QueryPlanPtr& plan, const CachedQueryPlan& cachedPlan ); /** Add a candidate query plan, potentially one of many. */ void addCandidatePlan( const QueryPlanPtr& plan ); const PlanVector& plans() const { return _plans; } bool mayRecordPlan() const { return _mayRecordPlan; } int oldNScanned() const { return _oldNScanned; } void addFallbackPlans(); void setUsingCachedPlan( bool usingCachedPlan ) { _usingCachedPlan = usingCachedPlan; } //for testing bool modifiedKeys() const; bool hasMultiKey() const; private: QueryPlanSet( const char* ns, auto_ptr frsp, auto_ptr originalFrsp, const BSONObj& originalQuery, const BSONObj& order, const shared_ptr& parsedQuery, const BSONObj& hint, QueryPlanGenerator::RecordedPlanPolicy recordedPlanPolicy, const BSONObj& min, const BSONObj& max, bool allowSpecial ); void init(); void pushPlan( const QueryPlanPtr& plan ); QueryPlanGenerator _generator; BSONObj _originalQuery; auto_ptr _frsp; PlanVector _plans; bool _mayRecordPlan; bool _usingCachedPlan; CandidatePlanCharacter _cachedPlanCharacter; BSONObj _order; long long _oldNScanned; ElapsedTracker _yieldSometimesTracker; bool _allowSpecial; }; /** * A priority queue of QueryPlanRunners ordered by their nscanned values. The QueryPlanRunners * are iterated sequentially and reinserted into the queue until one runner completes or all * runners error out. */ class QueryPlanRunnerQueue { public: QueryPlanRunnerQueue( QueryPlanSet& plans, const QueryPlanRunner& prototypeRunner ); /** * Pull a runner from the priority queue, advance it if possible, re-insert it into the * queue if it is not done, and return it. But if this runner errors out, retry with * another runner until a non error runner is found or all runners have errored out. * @return the next non error runner if there is one, otherwise an error runner. * If the returned runner is complete() or error(), this queue becomes done(). */ shared_ptr next(); /** @return true if done iterating. */ bool done() const { return _done; } /** Prepare all runners for a database mutex yield. */ void prepareToYield(); /** Restore all runners after a database mutex yield. */ void recoverFromYield(); /** @return an ExplainClauseInfo object that will be updated as the query runs. */ shared_ptr generateExplainInfo() { _explainClauseInfo.reset( new ExplainClauseInfo() ); return _explainClauseInfo; } private: const QueryPlanRunner& _prototypeRunner; QueryPlanSet& _plans; static void initRunner( QueryPlanRunner& runner ); static void nextRunner( QueryPlanRunner& runner ); static void prepareToYieldRunner( QueryPlanRunner& runner ); static void recoverFromYieldRunner( QueryPlanRunner& runner ); /** Initialize the Runner. */ shared_ptr init(); /** Move the Runner forward one iteration, and @return the plan for the iteration. */ shared_ptr _next(); vector > _runners; struct RunnerHolder { RunnerHolder( const shared_ptr& runner ) : _runner( runner ), _offset() { } shared_ptr _runner; long long _offset; bool operator<( const RunnerHolder& other ) const { return _runner->nscanned() + _offset > other._runner->nscanned() + other._offset; } }; PriorityQueue _queue; shared_ptr _explainClauseInfo; bool _done; }; /** Handles $or type queries by generating a QueryPlanSet for each $or clause. */ class MultiPlanScanner { public: static MultiPlanScanner* make( const StringData& ns, const BSONObj& query, const BSONObj& order, const shared_ptr& parsedQuery = shared_ptr(), const BSONObj& hint = BSONObj(), QueryPlanGenerator::RecordedPlanPolicy recordedPlanPolicy = QueryPlanGenerator::Use, const BSONObj& min = BSONObj(), const BSONObj& max = BSONObj() ); /** Set the originalRunner for QueryPlanSet iteration. */ void initialRunner( const shared_ptr& originalRunner ) { _baseRunner = originalRunner; } /** * Advance to the next runner, if not doneRunners(). * @return the next non error runner if there is one, otherwise an error runner. * If the returned runner is complete() or error(), the MultiPlanScanner becomes * doneRunners() and no further runner iteration is possible. */ shared_ptr nextRunner(); /** @return true if done with runner iteration. */ bool doneRunners() const { return _doneRunners; } /** * Advance to the next $or clause; hasMoreClauses() must be true. * @param currentPlan QueryPlan of the current $or clause * @return best guess query plan of the next $or clause, 0 if there is no such plan. */ const QueryPlan* nextClauseBestGuessPlan( const QueryPlan& currentPlan ); /** Add explain information for a new clause. */ void addClauseInfo( const shared_ptr& clauseInfo ) { verify( _explainQueryInfo ); _explainQueryInfo->addClauseInfo( clauseInfo ); } /** @return an ExplainQueryInfo object that will be updated as the query runs. */ shared_ptr generateExplainInfo() { _explainQueryInfo.reset( new ExplainQueryInfo() ); return _explainQueryInfo; } /** Yield the runner member. */ void prepareToYield(); void recoverFromYield(); /** Clear the runner member. */ void clearRunnerQueue(); void setRecordedPlanPolicy( QueryPlanGenerator::RecordedPlanPolicy recordedPlanPolicy ) { _recordedPlanPolicy = recordedPlanPolicy; } int currentNPlans() const; /** * @return the query plan that would be used if the scanner would run a single * cursor for this query, otherwise 0. The returned plan is invalid if this * MultiPlanScanner is destroyed, hence we return a raw pointer. */ const QueryPlan* singlePlan() const; /** @return true if more $or clauses need to be scanned. */ bool hasMoreClauses() const; /** * @return plan information if there is a cached plan for a non $or query, otherwise an * empty object. */ BSONObj cachedPlanExplainSummary() const; /** * @return true if this is not a $or query and some candidate plans may have been excluded * due to plan caching. */ bool hasPossiblyExcludedPlans() const { return !_or && _currentQps->hasPossiblyExcludedPlans(); } bool hasMultiKey() const { return _currentQps->hasMultiKey(); } /** Clear recorded indexes for the current QueryPlanSet's patterns. */ void clearIndexesForPatterns() const; /** @return true if an active plan of _currentQps is in order. */ bool haveInOrderPlan() const; /** @return true if an active or fallback plan of _currentQps is in order. */ bool possibleInOrderPlan() const; /** @return true if an active or fallback plan of _currentQps is out of order. */ bool possibleOutOfOrderPlan() const; int i() const { return _i; } string toString() const; private: MultiPlanScanner( const StringData& ns, const BSONObj& query, const shared_ptr& parsedQuery, const BSONObj& hint, QueryPlanGenerator::RecordedPlanPolicy recordedPlanPolicy ); void init( const BSONObj& order, const BSONObj& min, const BSONObj& max ); /** Initialize or iterate a runner generated from @param originalOp. */ shared_ptr iterateRunnerQueue( QueryPlanRunner& originalRunner, bool retried = false ); shared_ptr nextRunnerSimple(); shared_ptr nextRunnerOr(); void updateCurrentQps( QueryPlanSet* qps ); void assertNotOr() const { massert( 13266, "not implemented for $or query", !_or ); } void assertHasMoreClauses() const { massert( 13271, "no more clauses", hasMoreClauses() ); } void handleEndOfClause( const QueryPlan& clausePlan ); void handleBeginningOfClause(); bool mayHandleBeginningOfClause(); bool haveUselessOr() const; const string _ns; bool _or; BSONObj _query; shared_ptr _parsedQuery; scoped_ptr _org; // May be null in certain non $or query cases. scoped_ptr _currentQps; int _i; QueryPlanGenerator::RecordedPlanPolicy _recordedPlanPolicy; BSONObj _hint; bool _tableScanned; shared_ptr _baseRunner; scoped_ptr _runnerQueue; shared_ptr _explainQueryInfo; bool _doneRunners; }; /** * Provides a cursor interface for serial single Cursor iteration using a MultiPlanScanner. * Currently used internally by a QueryOptimizerCursor. * * A MultiCursor is backed by one BasicCursor or BtreeCursor at a time and forwards calls for * ensuring a consistent state after a write to its backing Cursor. */ class MultiCursor : public Cursor { public: /** @param nscanned is the initial nscanned value. */ MultiCursor( auto_ptr mps, const shared_ptr& c, const shared_ptr& matcher, const shared_ptr& explainPlanInfo, const QueryPlanRunner& runner, long long nscanned ); virtual bool ok() { return _c->ok(); } virtual Record* _current() { return _c->_current(); } virtual BSONObj current() { return _c->current(); } virtual DiskLoc currLoc() { return _c->currLoc(); } virtual bool advance(); virtual BSONObj currKey() const { return _c->currKey(); } virtual DiskLoc refLoc() { return _c->refLoc(); } virtual void noteLocation() { _c->noteLocation(); } virtual void checkLocation() { _c->checkLocation(); } virtual void recoverFromYield(); virtual bool supportGetMore() { return true; } virtual bool supportYields() { return true; } virtual BSONObj indexKeyPattern() { return _c->indexKeyPattern(); } /** Deduping documents from a prior cursor is handled by the matcher. */ virtual bool getsetdup(DiskLoc loc) { return _c->getsetdup( loc ); } virtual bool modifiedKeys() const { return true; } virtual bool isMultiKey() const { return _mps->hasMultiKey(); } virtual CoveredIndexMatcher* matcher() const { return _matcher.get(); } virtual bool capped() const { return _c->capped(); } virtual long long nscanned() { return _nscanned + _c->nscanned(); } void noteIterate( bool match, bool loadedRecord ); const QueryPlan& queryPlan() const { verify( _c->ok() && _queryPlan ); return *_queryPlan; } const Projection::KeyOnly* keyFieldsOnly() const { verify( _c->ok() && _queryPlan ); return _queryPlan->keyFieldsOnly().get(); } private: void advanceClause(); void advanceExhaustedClauses(); auto_ptr _mps; shared_ptr _c; shared_ptr _matcher; const QueryPlan* _queryPlan; long long _nscanned; shared_ptr _explainPlanInfo; }; /** NOTE min, max, and keyPattern will be updated to be consistent with the selected index. */ IndexDetails* indexDetailsForRange( const char* ns, string& errmsg, BSONObj& min, BSONObj& max, BSONObj& keyPattern ); class CachedQueryPlan; /** * Add-on functionality for queryutil classes requiring access to indexing * functionality not currently linked to mongos. * TODO Clean this up a bit, possibly with separate sharded and non sharded * implementations for the appropriate queryutil classes or by pulling index * related functionality into separate wrapper classes. */ struct QueryUtilIndexed { /** @return true if the index may be useful according to its KeySpec. */ static bool indexUseful( const FieldRangeSetPair& frsp, NamespaceDetails* d, int idxNo, const BSONObj& order ); /** Clear any indexes recorded as the best for either the single or multi key pattern. */ static void clearIndexesForPatterns( const FieldRangeSetPair& frsp, const BSONObj& order ); /** Return a recorded best index for the single or multi key pattern. */ static CachedQueryPlan bestIndexForPatterns( const FieldRangeSetPair& frsp, const BSONObj& order ); static bool uselessOr( const OrRangeGenerator& org, NamespaceDetails* d, int hintIdx ); }; } // namespace mongo