diff options
Diffstat (limited to 'src/mongo/db/exec/near.h')
-rw-r--r-- | src/mongo/db/exec/near.h | 360 |
1 files changed, 178 insertions, 182 deletions
diff --git a/src/mongo/db/exec/near.h b/src/mongo/db/exec/near.h index cc6d170f380..203ba8495a3 100644 --- a/src/mongo/db/exec/near.h +++ b/src/mongo/db/exec/near.h @@ -43,193 +43,189 @@ namespace mongo { +/** + * An abstract stage which uses a progressive sort to return results sorted by distance. This + * is useful when we do not have a full ordering computed over the distance metric and don't + * want to generate one. + * + * Child stages need to implement functionality which: + * + * - defines a distance metric + * - iterates through ordered distance intervals, nearest to furthest + * - provides a covering for each distance interval + * + * For example - given a distance search over documents with distances from [0 -> 10], the child + * stage might break up the search into intervals [0->5),[5,7),[7->10]. + * + * Each interval requires a PlanStage which *covers* the interval (returns all results in the + * interval). Results in each interval are buffered fully before being returned to ensure that + * ordering is preserved. + * + * For efficient search, the child stage which covers the distance interval in question should + * not return too many results outside the interval, but correctness only depends on the child + * stage returning all results inside the interval. As an example, a PlanStage which covers the + * interval [0->5) might just be a full collection scan - this will always cover every interval, + * but is slow. If there is an index available, an IndexScan stage might also return all + * documents with distance [0->5) but would be much faster. + * + * Also for efficient search, the intervals should not be too large or too small - though again + * correctness does not depend on interval size. + * + * TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow + * aborting and shrinking a covered range being buffered if we guess wrong. + */ +class NearStage : public PlanStage { +public: + struct CoveredInterval; + + virtual ~NearStage(); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void saveState(); + virtual void restoreState(OperationContext* opCtx); + virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); + + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const; + virtual PlanStageStats* getStats(); + virtual const CommonStats* getCommonStats(); + virtual const SpecificStats* getSpecificStats(); + +protected: /** - * An abstract stage which uses a progressive sort to return results sorted by distance. This - * is useful when we do not have a full ordering computed over the distance metric and don't - * want to generate one. - * - * Child stages need to implement functionality which: - * - * - defines a distance metric - * - iterates through ordered distance intervals, nearest to furthest - * - provides a covering for each distance interval - * - * For example - given a distance search over documents with distances from [0 -> 10], the child - * stage might break up the search into intervals [0->5),[5,7),[7->10]. - * - * Each interval requires a PlanStage which *covers* the interval (returns all results in the - * interval). Results in each interval are buffered fully before being returned to ensure that - * ordering is preserved. - * - * For efficient search, the child stage which covers the distance interval in question should - * not return too many results outside the interval, but correctness only depends on the child - * stage returning all results inside the interval. As an example, a PlanStage which covers the - * interval [0->5) might just be a full collection scan - this will always cover every interval, - * but is slow. If there is an index available, an IndexScan stage might also return all - * documents with distance [0->5) but would be much faster. - * - * Also for efficient search, the intervals should not be too large or too small - though again - * correctness does not depend on interval size. + * Subclasses of NearStage must provide basics + a stats object which gets owned here. + * The stats object must have specific stats which are a subclass of NearStats, otherwise + * it's generated automatically. + */ + NearStage(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + PlanStageStats* stats); + + /** + * Exposes NearStats for adaptive search, allows additional specific stats in subclasses. + */ + NearStats* getNearStats(); + + // + // Methods implemented for specific search functionality + // + + /** + * Constructs the next covering over the next interval to buffer results from, or NULL + * if the full range has been searched. Use the provided working set as the working + * set for the covering stage if required. * - * TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow - * aborting and shrinking a covered range being buffered if we guess wrong. + * Returns !OK on failure to create next stage. */ - class NearStage : public PlanStage { - public: - - struct CoveredInterval; - - virtual ~NearStage(); - - virtual bool isEOF(); - virtual StageState work(WorkingSetID* out); - - virtual void saveState(); - virtual void restoreState(OperationContext* opCtx); - virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); - - virtual std::vector<PlanStage*> getChildren() const; - - virtual StageType stageType() const; - virtual PlanStageStats* getStats(); - virtual const CommonStats* getCommonStats(); - virtual const SpecificStats* getSpecificStats(); - - protected: - - /** - * Subclasses of NearStage must provide basics + a stats object which gets owned here. - * The stats object must have specific stats which are a subclass of NearStats, otherwise - * it's generated automatically. - */ - NearStage(OperationContext* txn, - WorkingSet* workingSet, - Collection* collection, - PlanStageStats* stats); - - /** - * Exposes NearStats for adaptive search, allows additional specific stats in subclasses. - */ - NearStats* getNearStats(); - - // - // Methods implemented for specific search functionality - // - - /** - * Constructs the next covering over the next interval to buffer results from, or NULL - * if the full range has been searched. Use the provided working set as the working - * set for the covering stage if required. - * - * Returns !OK on failure to create next stage. - */ - virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, - WorkingSet* workingSet, - Collection* collection) = 0; - - /** - * Computes the distance value for the given member data, or -1 if the member should not be - * returned in the sorted results. - * - * Returns !OK on invalid member data. - */ - virtual StatusWith<double> computeDistance(WorkingSetMember* member) = 0; - - /* - * Initialize near stage before buffering the data. - * Return IS_EOF if subclass finishes the initialization. - * Return NEED_TIME if we need more time. - * Return errors if an error occurs. - * Can't return ADVANCED. - */ - virtual StageState initialize(OperationContext* txn, - WorkingSet* workingSet, - Collection* collection); - - private: - - // - // Save/restore/invalidate work specific to the search type. - // - - virtual void finishSaveState() = 0; - - virtual void finishRestoreState(OperationContext* txn) = 0; - - virtual void finishInvalidate(OperationContext* txn, - const RecordId& dl, - InvalidationType type) = 0; - - // - // Generic methods for progressive search functionality - // - - StageState initNext(); - StageState bufferNext(WorkingSetID* toReturn, Status* error); - StageState advanceNext(WorkingSetID* toReturn); - - // - // Generic state for progressive near search - // - - // Not owned here - OperationContext* _txn; - // Not owned here - WorkingSet* const _workingSet; - // Not owned here, used for fetching buffered results before invalidation - Collection* const _collection; - - // A progressive search works in stages of buffering and then advancing - enum SearchState { - SearchState_Initializing, - SearchState_Buffering, - SearchState_Advancing, - SearchState_Finished - } _searchState; - - // May need to track disklocs from the child stage to do our own deduping, also to do - // invalidation of buffered results. - unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _nextIntervalSeen; - - // Stats for the stage covering this interval - boost::scoped_ptr<IntervalStats> _nextIntervalStats; - - // Sorted buffered results to be returned - the current interval - struct SearchResult; - std::priority_queue<SearchResult> _resultBuffer; - - // Stats - boost::scoped_ptr<PlanStageStats> _stats; - - // The current stage from which this stage should buffer results - // Pointer to the last interval in _childrenIntervals. Owned by _childrenIntervals. - CoveredInterval* _nextInterval; - - // All children CoveredIntervals and the sub-stages owned by them. - // - // All children intervals except the last active one are only used by getStats(), - // because they are all EOF. - OwnedPointerVector<CoveredInterval> _childrenIntervals; - }; + virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection) = 0; /** - * A covered interval over which a portion of a near search can be run. + * Computes the distance value for the given member data, or -1 if the member should not be + * returned in the sorted results. + * + * Returns !OK on invalid member data. */ - struct NearStage::CoveredInterval { - - CoveredInterval(PlanStage* covering, - bool dedupCovering, - double minDistance, - double maxDistance, - bool inclusiveMax); - - // Owned by NearStage - boost::scoped_ptr<PlanStage> const covering; - const bool dedupCovering; - - const double minDistance; - const double maxDistance; - const bool inclusiveMax; - }; + virtual StatusWith<double> computeDistance(WorkingSetMember* member) = 0; + + /* + * Initialize near stage before buffering the data. + * Return IS_EOF if subclass finishes the initialization. + * Return NEED_TIME if we need more time. + * Return errors if an error occurs. + * Can't return ADVANCED. + */ + virtual StageState initialize(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection); + +private: + // + // Save/restore/invalidate work specific to the search type. + // + + virtual void finishSaveState() = 0; + + virtual void finishRestoreState(OperationContext* txn) = 0; + + virtual void finishInvalidate(OperationContext* txn, + const RecordId& dl, + InvalidationType type) = 0; + + // + // Generic methods for progressive search functionality + // + + StageState initNext(); + StageState bufferNext(WorkingSetID* toReturn, Status* error); + StageState advanceNext(WorkingSetID* toReturn); + + // + // Generic state for progressive near search + // + + // Not owned here + OperationContext* _txn; + // Not owned here + WorkingSet* const _workingSet; + // Not owned here, used for fetching buffered results before invalidation + Collection* const _collection; + + // A progressive search works in stages of buffering and then advancing + enum SearchState { + SearchState_Initializing, + SearchState_Buffering, + SearchState_Advancing, + SearchState_Finished + } _searchState; + + // May need to track disklocs from the child stage to do our own deduping, also to do + // invalidation of buffered results. + unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _nextIntervalSeen; + + // Stats for the stage covering this interval + boost::scoped_ptr<IntervalStats> _nextIntervalStats; + + // Sorted buffered results to be returned - the current interval + struct SearchResult; + std::priority_queue<SearchResult> _resultBuffer; + + // Stats + boost::scoped_ptr<PlanStageStats> _stats; + + // The current stage from which this stage should buffer results + // Pointer to the last interval in _childrenIntervals. Owned by _childrenIntervals. + CoveredInterval* _nextInterval; + + // All children CoveredIntervals and the sub-stages owned by them. + // + // All children intervals except the last active one are only used by getStats(), + // because they are all EOF. + OwnedPointerVector<CoveredInterval> _childrenIntervals; +}; + +/** + * A covered interval over which a portion of a near search can be run. + */ +struct NearStage::CoveredInterval { + CoveredInterval(PlanStage* covering, + bool dedupCovering, + double minDistance, + double maxDistance, + bool inclusiveMax); + + // Owned by NearStage + boost::scoped_ptr<PlanStage> const covering; + const bool dedupCovering; + + const double minDistance; + const double maxDistance; + const bool inclusiveMax; +}; } // namespace mongo |