diff options
Diffstat (limited to 'src/mongo/db/exec/sort.h')
-rw-r--r-- | src/mongo/db/exec/sort.h | 373 |
1 files changed, 186 insertions, 187 deletions
diff --git a/src/mongo/db/exec/sort.h b/src/mongo/db/exec/sort.h index 692b4b3b4cb..b04a3d07e43 100644 --- a/src/mongo/db/exec/sort.h +++ b/src/mongo/db/exec/sort.h @@ -41,235 +41,234 @@ namespace mongo { - class BtreeKeyGenerator; +class BtreeKeyGenerator; - // Parameters that must be provided to a SortStage - class SortStageParams { - public: - SortStageParams() : collection(NULL), limit(0) { } +// Parameters that must be provided to a SortStage +class SortStageParams { +public: + SortStageParams() : collection(NULL), limit(0) {} - // Used for resolving RecordIds to BSON - const Collection* collection; + // Used for resolving RecordIds to BSON + const Collection* collection; - // How we're sorting. - BSONObj pattern; + // How we're sorting. + BSONObj pattern; - // The query. Used to create the IndexBounds for the sorting. - BSONObj query; + // The query. Used to create the IndexBounds for the sorting. + BSONObj query; - // Equal to 0 for no limit. - size_t limit; - }; + // Equal to 0 for no limit. + size_t limit; +}; +/** + * Maps a WSM value to a BSONObj key that can then be sorted via BSONObjCmp. + */ +class SortStageKeyGenerator { +public: /** - * Maps a WSM value to a BSONObj key that can then be sorted via BSONObjCmp. + * 'sortSpec' is the BSONObj in the .sort(...) clause. + * + * 'queryObj' is the BSONObj in the .find(...) clause. For multikey arrays we have to + * ensure that the value we select to sort by is within bounds generated by + * executing 'queryObj' using the virtual index with key pattern 'sortSpec'. */ - class SortStageKeyGenerator { - public: - /** - * 'sortSpec' is the BSONObj in the .sort(...) clause. - * - * 'queryObj' is the BSONObj in the .find(...) clause. For multikey arrays we have to - * ensure that the value we select to sort by is within bounds generated by - * executing 'queryObj' using the virtual index with key pattern 'sortSpec'. - */ - SortStageKeyGenerator(const Collection* collection, - const BSONObj& sortSpec, - const BSONObj& queryObj); - - /** - * Returns the key used to sort 'member'. - */ - Status getSortKey(const WorkingSetMember& member, - BSONObj* objOut) const; - - /** - * Passed to std::sort and used to sort the keys that are returned from getSortKey. - * - * Returned reference lives as long as 'this'. - */ - const BSONObj& getSortComparator() const { return _comparatorObj; } - - private: - Status getBtreeKey(const BSONObj& memberObj, BSONObj* objOut) const; - - /** - * In order to emulate the existing sort behavior we must make unindexed sort behavior as - * consistent as possible with indexed sort behavior. As such, we must only consider index - * keys that we would encounter if we were answering the query using the sort-providing - * index. - * - * Populates _hasBounds and _bounds. - */ - void getBoundsForSort(const BSONObj& queryObj, - const BSONObj& sortObj); - - // Not owned by us - const Collection* _collection; - - // The object that we use to call woCompare on our resulting key. Is equal to _rawSortSpec - // unless we have some $meta expressions. Each $meta expression has a default sort order. - BSONObj _comparatorObj; - - // The raw object in .sort() - BSONObj _rawSortSpec; - - // The sort pattern with any non-Btree sort pulled out. - BSONObj _btreeObj; - - // If we're not sorting with a $meta value we can short-cut some work. - bool _sortHasMeta; - - // True if the bounds are valid. - bool _hasBounds; - - // The bounds generated from the query we're sorting. - IndexBounds _bounds; - - // Helper to extract sorting keys from documents. - std::unique_ptr<BtreeKeyGenerator> _keyGen; - - // Helper to filter keys, ensuring keys generated with _keyGen are within _bounds. - std::unique_ptr<IndexBoundsChecker> _boundsChecker; - }; + SortStageKeyGenerator(const Collection* collection, + const BSONObj& sortSpec, + const BSONObj& queryObj); + + /** + * Returns the key used to sort 'member'. + */ + Status getSortKey(const WorkingSetMember& member, BSONObj* objOut) const; + + /** + * Passed to std::sort and used to sort the keys that are returned from getSortKey. + * + * Returned reference lives as long as 'this'. + */ + const BSONObj& getSortComparator() const { + return _comparatorObj; + } + +private: + Status getBtreeKey(const BSONObj& memberObj, BSONObj* objOut) const; /** - * Sorts the input received from the child according to the sort pattern provided. + * In order to emulate the existing sort behavior we must make unindexed sort behavior as + * consistent as possible with indexed sort behavior. As such, we must only consider index + * keys that we would encounter if we were answering the query using the sort-providing + * index. * - * Preconditions: For each field in 'pattern', all inputs in the child must handle a - * getFieldDotted for that field. + * Populates _hasBounds and _bounds. */ - class SortStage : public PlanStage { - public: - SortStage(const SortStageParams& params, - WorkingSet* ws, - PlanStage* child); + void getBoundsForSort(const BSONObj& queryObj, const BSONObj& sortObj); - virtual ~SortStage(); + // Not owned by us + const Collection* _collection; - virtual bool isEOF(); - virtual StageState work(WorkingSetID* out); + // The object that we use to call woCompare on our resulting key. Is equal to _rawSortSpec + // unless we have some $meta expressions. Each $meta expression has a default sort order. + BSONObj _comparatorObj; - virtual void saveState(); - virtual void restoreState(OperationContext* opCtx); - virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); + // The raw object in .sort() + BSONObj _rawSortSpec; - virtual std::vector<PlanStage*> getChildren() const; + // The sort pattern with any non-Btree sort pulled out. + BSONObj _btreeObj; - virtual StageType stageType() const { return STAGE_SORT; } + // If we're not sorting with a $meta value we can short-cut some work. + bool _sortHasMeta; - PlanStageStats* getStats(); + // True if the bounds are valid. + bool _hasBounds; - virtual const CommonStats* getCommonStats() const; + // The bounds generated from the query we're sorting. + IndexBounds _bounds; - virtual const SpecificStats* getSpecificStats() const; + // Helper to extract sorting keys from documents. + std::unique_ptr<BtreeKeyGenerator> _keyGen; - static const char* kStageType; + // Helper to filter keys, ensuring keys generated with _keyGen are within _bounds. + std::unique_ptr<IndexBoundsChecker> _boundsChecker; +}; - private: +/** + * Sorts the input received from the child according to the sort pattern provided. + * + * Preconditions: For each field in 'pattern', all inputs in the child must handle a + * getFieldDotted for that field. + */ +class SortStage : public PlanStage { +public: + SortStage(const SortStageParams& params, WorkingSet* ws, PlanStage* child); + + virtual ~SortStage(); - // - // Query Stage - // + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); - // Not owned by us. - const Collection* _collection; + virtual void saveState(); + virtual void restoreState(OperationContext* opCtx); + virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); - // Not owned by us. - WorkingSet* _ws; + virtual std::vector<PlanStage*> getChildren() const; - // Where we're reading data to sort from. - std::unique_ptr<PlanStage> _child; + virtual StageType stageType() const { + return STAGE_SORT; + } - // The raw sort _pattern as expressed by the user - BSONObj _pattern; + PlanStageStats* getStats(); - // The raw query as expressed by the user - BSONObj _query; + virtual const CommonStats* getCommonStats() const; - // Equal to 0 for no limit. - size_t _limit; + virtual const SpecificStats* getSpecificStats() const; - // - // Sort key generation - // - std::unique_ptr<SortStageKeyGenerator> _sortKeyGen; + static const char* kStageType; - // - // Data storage - // +private: + // + // Query Stage + // - // Have we sorted our data? If so, we can access _resultIterator. If not, - // we're still populating _data. - bool _sorted; + // Not owned by us. + const Collection* _collection; - // Collection of working set members to sort with their respective sort key. - struct SortableDataItem { - WorkingSetID wsid; - BSONObj sortKey; - // Since we must replicate the behavior of a covered sort as much as possible we use the - // RecordId to break sortKey ties. - // See sorta.js. - RecordId loc; - }; + // Not owned by us. + WorkingSet* _ws; - // Comparison object for data buffers (vector and set). - // Items are compared on (sortKey, loc). This is also how the items are - // ordered in the indices. - // Keys are compared using BSONObj::woCompare() with RecordId as a tie-breaker. - struct WorkingSetComparator { - explicit WorkingSetComparator(BSONObj p); + // Where we're reading data to sort from. + std::unique_ptr<PlanStage> _child; - bool operator()(const SortableDataItem& lhs, const SortableDataItem& rhs) const; + // The raw sort _pattern as expressed by the user + BSONObj _pattern; - BSONObj pattern; - }; + // The raw query as expressed by the user + BSONObj _query; - /** - * Inserts one item into data buffer (vector or set). - * If limit is exceeded, remove item with lowest key. - */ - void addToBuffer(const SortableDataItem& item); + // Equal to 0 for no limit. + size_t _limit; - /** - * Sorts data buffer. - * Assumes no more items will be added to buffer. - * If data is stored in set, copy set - * contents to vector and clear set. - */ - void sortBuffer(); + // + // Sort key generation + // + std::unique_ptr<SortStageKeyGenerator> _sortKeyGen; - // Comparator for data buffer - // Initialization follows sort key generator - std::unique_ptr<WorkingSetComparator> _sortKeyComparator; + // + // Data storage + // + + // Have we sorted our data? If so, we can access _resultIterator. If not, + // we're still populating _data. + bool _sorted; + + // Collection of working set members to sort with their respective sort key. + struct SortableDataItem { + WorkingSetID wsid; + BSONObj sortKey; + // Since we must replicate the behavior of a covered sort as much as possible we use the + // RecordId to break sortKey ties. + // See sorta.js. + RecordId loc; + }; - // The data we buffer and sort. - // _data will contain sorted data when all data is gathered - // and sorted. - // When _limit is greater than 1 and not all data has been gathered from child stage, - // _dataSet is used instead to maintain an ordered set of the incomplete data set. - // When the data set is complete, we copy the items from _dataSet to _data which will - // be used to provide the results of this stage through _resultIterator. - std::vector<SortableDataItem> _data; - typedef std::set<SortableDataItem, WorkingSetComparator> SortableDataItemSet; - std::unique_ptr<SortableDataItemSet> _dataSet; + // Comparison object for data buffers (vector and set). + // Items are compared on (sortKey, loc). This is also how the items are + // ordered in the indices. + // Keys are compared using BSONObj::woCompare() with RecordId as a tie-breaker. + struct WorkingSetComparator { + explicit WorkingSetComparator(BSONObj p); - // Iterates through _data post-sort returning it. - std::vector<SortableDataItem>::iterator _resultIterator; + bool operator()(const SortableDataItem& lhs, const SortableDataItem& rhs) const; - // We buffer a lot of data and we want to look it up by RecordId quickly upon invalidation. - typedef unordered_map<RecordId, WorkingSetID, RecordId::Hasher> DataMap; - DataMap _wsidByDiskLoc; - - // - // Stats - // - - CommonStats _commonStats; - SortStats _specificStats; - - // The usage in bytes of all buffered data that we're sorting. - size_t _memUsage; + BSONObj pattern; }; + /** + * Inserts one item into data buffer (vector or set). + * If limit is exceeded, remove item with lowest key. + */ + void addToBuffer(const SortableDataItem& item); + + /** + * Sorts data buffer. + * Assumes no more items will be added to buffer. + * If data is stored in set, copy set + * contents to vector and clear set. + */ + void sortBuffer(); + + // Comparator for data buffer + // Initialization follows sort key generator + std::unique_ptr<WorkingSetComparator> _sortKeyComparator; + + // The data we buffer and sort. + // _data will contain sorted data when all data is gathered + // and sorted. + // When _limit is greater than 1 and not all data has been gathered from child stage, + // _dataSet is used instead to maintain an ordered set of the incomplete data set. + // When the data set is complete, we copy the items from _dataSet to _data which will + // be used to provide the results of this stage through _resultIterator. + std::vector<SortableDataItem> _data; + typedef std::set<SortableDataItem, WorkingSetComparator> SortableDataItemSet; + std::unique_ptr<SortableDataItemSet> _dataSet; + + // Iterates through _data post-sort returning it. + std::vector<SortableDataItem>::iterator _resultIterator; + + // We buffer a lot of data and we want to look it up by RecordId quickly upon invalidation. + typedef unordered_map<RecordId, WorkingSetID, RecordId::Hasher> DataMap; + DataMap _wsidByDiskLoc; + + // + // Stats + // + + CommonStats _commonStats; + SortStats _specificStats; + + // The usage in bytes of all buffered data that we're sorting. + size_t _memUsage; +}; + } // namespace mongo |