// extsort.h /** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #pragma once #include "mongo/pch.h" #include "mongo/db/index.h" #include "mongo/db/jsobj.h" #include "mongo/db/curop-inl.h" #include "mongo/util/array.h" #define MONGO_USE_NEW_SORTER 1 #if MONGO_USE_NEW_SORTER # include "mongo/db/sorter/sorter.h" #endif namespace mongo { typedef pair ExternalSortDatum; /** * To external sort, you provide a pointer to an implementation of this class. * The compare function follows the usual -1, 0, 1 semantics. */ class ExternalSortComparison { public: virtual ~ExternalSortComparison() { } virtual int compare(const ExternalSortDatum& l, const ExternalSortDatum& r) const = 0; }; #if MONGO_USE_NEW_SORTER // TODO This class will probably disappear in the future or be replaced with a typedef class BSONObjExternalSorter : boost::noncopyable { public: typedef pair Data; typedef SortIteratorInterface Iterator; BSONObjExternalSorter(const ExternalSortComparison* comp, long maxFileSize=100*1024*1024); void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ) { *_mayInterrupt = mayInterrupt; _sorter->add(o.getOwned(), loc); } auto_ptr iterator() { return auto_ptr(_sorter->done()); } void sort( bool mayInterrupt ) { *_mayInterrupt = mayInterrupt; } int numFiles() { return _sorter->numFiles(); } long getCurSizeSoFar() { return _sorter->memUsed(); } void hintNumObjects(long long) {} // unused private: shared_ptr _mayInterrupt; scoped_ptr > _sorter; }; #else /** for external (disk) sorting by BSONObj and attaching a value */ class BSONObjExternalSorter : boost::noncopyable { public: BSONObjExternalSorter(const ExternalSortComparison* cmp, long maxFileSize = 1024 * 1024 * 100 ); ~BSONObjExternalSorter(); private: static HLMutex _extSortMutex; static int _compare(const ExternalSortComparison* cmp, const ExternalSortDatum& l, const ExternalSortDatum& r); class MyCmp { public: MyCmp(const ExternalSortComparison* cmp) : _cmp(cmp) { } bool operator()( const ExternalSortDatum &l, const ExternalSortDatum &r ) const { return _cmp->compare(l, r) < 0; }; private: const ExternalSortComparison* _cmp; }; static bool extSortMayInterrupt; static int extSortComp( const void *lv, const void *rv ); static const ExternalSortComparison* staticExtSortCmp; class FileIterator : boost::noncopyable { public: FileIterator( const std::string& file ); ~FileIterator(); bool more(); ExternalSortDatum next(); private: bool _read( char* buf, long long count ); int _file; unsigned long long _length; unsigned long long _readSoFar; }; public: typedef FastArray InMemory; class Iterator : boost::noncopyable { public: Iterator( BSONObjExternalSorter * sorter ); ~Iterator(); bool more(); ExternalSortDatum next(); private: MyCmp _cmp; vector _files; vector< pair > _stash; InMemory * _in; InMemory::iterator _it; }; void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ); /* call after adding values, and before fetching the iterator */ void sort( bool mayInterrupt ); auto_ptr iterator() { uassert( 10052 , "not sorted" , _sorted ); return auto_ptr( new Iterator( this ) ); } int numFiles() { return _files.size(); } long getCurSizeSoFar() { return _curSizeSoFar; } void hintNumObjects( long long numObjects ) { if ( numObjects < _arraySize ) _arraySize = (int)(numObjects + 100); } private: void _sortInMem( bool mayInterrupt ); void sort( const std::string& file ); void finishMap( bool mayInterrupt ); const ExternalSortComparison* _cmp; long _maxFilesize; boost::filesystem::path _root; int _arraySize; InMemory * _cur; long _curSizeSoFar; list _files; bool _sorted; static unsigned long long _compares; static unsigned long long _uniqueNumber; }; #endif }