diff options
author | Mathias Stearn <mathias@10gen.com> | 2014-01-16 15:38:27 -0500 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2014-01-21 12:55:49 -0500 |
commit | 51e761f1f76581f6c43cd81cb132c3f5c39c12d3 (patch) | |
tree | 526a0bd3377441c8acb1417110c7bcc4f5e5972e | |
parent | 278537043b7f6149eedc6392bbaab4caa5820ab2 (diff) | |
download | mongo-51e761f1f76581f6c43cd81cb132c3f5c39c12d3.tar.gz |
SERVER-6851 Remove old external sort implementation
Also removes HLMutext as it was only used by the old implementation.
-rw-r--r-- | src/mongo/db/catalog/index_create.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/d_concurrency.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/d_concurrency.h | 11 | ||||
-rw-r--r-- | src/mongo/db/extsort.cpp | 340 | ||||
-rw-r--r-- | src/mongo/db/extsort.h | 123 | ||||
-rw-r--r-- | src/mongo/db/index/btree_based_access_method.cpp | 2 |
6 files changed, 12 insertions, 471 deletions
diff --git a/src/mongo/db/catalog/index_create.cpp b/src/mongo/db/catalog/index_create.cpp index 05ba18599f6..59528dd0cd8 100644 --- a/src/mongo/db/catalog/index_create.cpp +++ b/src/mongo/db/catalog/index_create.cpp @@ -35,6 +35,7 @@ #include "mongo/db/background.h" #include "mongo/db/structure/btree/btreebuilder.h" #include "mongo/db/clientcursor.h" +#include "mongo/db/curop.h" #include "mongo/db/extsort.h" #include "mongo/db/structure/catalog/index_details.h" #include "mongo/db/kill_current_op.h" @@ -47,6 +48,7 @@ #include "mongo/db/repl/rs.h" #include "mongo/db/catalog/collection.h" #include "mongo/util/processinfo.h" +#include "mongo/util/progress_meter.h" namespace mongo { diff --git a/src/mongo/db/d_concurrency.cpp b/src/mongo/db/d_concurrency.cpp index dc4d3ebada9..32e3c6326a3 100644 --- a/src/mongo/db/d_concurrency.cpp +++ b/src/mongo/db/d_concurrency.cpp @@ -84,11 +84,6 @@ namespace mongo { void releasingWriteLock(); } - // e.g. externalobjsortmutex uses hlmutex as it can be locked for very long times - // todo : report HLMutex status in db.currentOp() output - // perhaps move this elsewhere as this could be used in mongos and this file is for mongod - HLMutex::HLMutex(const char *name) : SimpleMutex(name) { } - /* dbname->lock Currently these are never deleted - will linger if db was closed. (that should be fine.) We don't put the lock inside the Database object as those can come and go with open and diff --git a/src/mongo/db/d_concurrency.h b/src/mongo/db/d_concurrency.h index c7261193ba2..50760662ced 100644 --- a/src/mongo/db/d_concurrency.h +++ b/src/mongo/db/d_concurrency.h @@ -248,15 +248,4 @@ namespace mongo { ~writelocktry(); bool got() const { return _got; } }; - - /** a mutex, but reported in curop() - thus a "high level" (HL) one - some overhead so we don't use this for everything. the externalobjsort mutex - uses this, as it can be held for eons. implementation still needed. */ - class HLMutex : public SimpleMutex { - LockStat ls; - public: - HLMutex(const char *name); - }; - - } diff --git a/src/mongo/db/extsort.cpp b/src/mongo/db/extsort.cpp index e4aeb5f53ca..a25888c692e 100644 --- a/src/mongo/db/extsort.cpp +++ b/src/mongo/db/extsort.cpp @@ -28,36 +28,22 @@ * it in the license file. */ -#include "mongo/pch.h" +#include "mongo/platform/basic.h" #include "mongo/db/extsort.h" -#if defined(_WIN32) -# include <io.h> -#endif - -#include <boost/filesystem/convenience.hpp> -#include <boost/filesystem/operations.hpp> -#include <fcntl.h> -#include <fstream> -#include <sys/stat.h> -#include <sys/types.h> - #include "mongo/db/kill_current_op.h" #include "mongo/db/storage_options.h" -#include "mongo/platform/posix_fadvise.h" -#include "mongo/util/file.h" -#if MONGO_USE_NEW_SORTER namespace mongo { namespace { - class OldExtSortComparator { + class ComparatorWithInterruptCheck { public: typedef pair<BSONObj, DiskLoc> Data; - OldExtSortComparator(const ExternalSortComparison* comp, - boost::shared_ptr<const bool> mayInterrupt) + ComparatorWithInterruptCheck(const ExternalSortComparison* comp, + boost::shared_ptr<const bool> mayInterrupt) : _comp(comp) , _mayInterrupt(mayInterrupt) {} @@ -83,323 +69,9 @@ namespace mongo { SortOptions().TempDir(storageGlobalParams.dbpath + "/_tmp") .ExtSortAllowed() .MaxMemoryUsageBytes(maxFileSize), - OldExtSortComparator(comp, _mayInterrupt))) + ComparatorWithInterruptCheck(comp, _mayInterrupt))) {} } #include "mongo/db/sorter/sorter.cpp" -MONGO_CREATE_SORTER(mongo::BSONObj, mongo::DiskLoc, mongo::OldExtSortComparator); - -#else - -namespace mongo { - HLMutex BSONObjExternalSorter::_extSortMutex("s"); - bool BSONObjExternalSorter::extSortMayInterrupt( false ); - unsigned long long BSONObjExternalSorter::_compares = 0; - unsigned long long BSONObjExternalSorter::_uniqueNumber = 0; - const ExternalSortComparison* BSONObjExternalSorter::staticExtSortCmp = NULL; - static SimpleMutex _uniqueNumberMutex( "uniqueNumberMutex" ); - - /*static*/ - int BSONObjExternalSorter::_compare(const ExternalSortComparison* cmp, - const ExternalSortDatum& l, const ExternalSortDatum& r) { - _compares++; - return cmp->compare(l, r); - } - - /*static*/ - int BSONObjExternalSorter::extSortComp( const void *lv, const void *rv ) { - DEV RARELY { - _extSortMutex.dassertLocked(); // must be as we use a global var - } -#ifndef __sunos__ - // Some solaris gnu qsort implementations do not support callback exceptions. - RARELY killCurrentOp.checkForInterrupt(!extSortMayInterrupt); -#endif - ExternalSortDatum * l = (ExternalSortDatum*)lv; - ExternalSortDatum * r = (ExternalSortDatum*)rv; - return _compare(staticExtSortCmp, *l, *r); - }; - - BSONObjExternalSorter::BSONObjExternalSorter(const ExternalSortComparison* cmp, - long maxFileSize ) - : _cmp(cmp), _maxFilesize(maxFileSize), _arraySize(1000000), _cur(0), _curSizeSoFar(0), - _sorted(0) { - - stringstream rootpath; - rootpath << storageGlobalParams.dbpath; - if (storageGlobalParams.dbpath[storageGlobalParams.dbpath.size()-1] != '/') - rootpath << "/"; - - unsigned long long thisUniqueNumber; - { - SimpleMutex::scoped_lock lk(_uniqueNumberMutex); - thisUniqueNumber = _uniqueNumber; - ++_uniqueNumber; - } - rootpath << "_tmp/esort." << time(0) << "." << thisUniqueNumber << "/"; - _root = rootpath.str(); - - LOG(1) << "external sort root: " << _root.string() << endl; - - create_directories( _root ); - _compares = 0; - } - - BSONObjExternalSorter::~BSONObjExternalSorter() { - if ( _cur ) { - delete _cur; - _cur = 0; - } - unsigned long removed = remove_all( _root ); - wassert( removed == 1 + _files.size() ); - } - - void BSONObjExternalSorter::_sortInMem( bool mayInterrupt ) { - // extSortComp needs to use glpbals - // qsort_r only seems available on bsd, which is what i really want to use - HLMutex::scoped_lock lk(_extSortMutex); - extSortMayInterrupt = mayInterrupt; - staticExtSortCmp = _cmp; - _cur->sort( BSONObjExternalSorter::extSortComp ); - } - - void BSONObjExternalSorter::sort( bool mayInterrupt ) { - uassert( 10048 , "already sorted" , ! _sorted ); - - _sorted = true; - - if ( _cur && _files.size() == 0 ) { - _sortInMem( mayInterrupt ); - LOG(1) << "\t\t not using file. size:" << _curSizeSoFar << " _compares:" - << _compares << endl; - return; - } - - if ( _cur ) { - finishMap( mayInterrupt ); - } - - if ( _cur ) { - delete _cur; - _cur = 0; - } - - if ( _files.size() == 0 ) - return; - } - - void BSONObjExternalSorter::add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ) { - uassert( 10049 , "sorted already" , ! _sorted ); - - if ( ! _cur ) { - _cur = new InMemory( _arraySize ); - } - - ExternalSortDatum& d = _cur->getNext(); - d.first = o.getOwned(); - d.second = loc; - - long size = o.objsize(); - _curSizeSoFar += size + sizeof( DiskLoc ) + sizeof( BSONObj ); - - if ( _cur->hasSpace() == false || _curSizeSoFar > _maxFilesize ) { - finishMap( mayInterrupt ); - LOG(1) << "finishing map" << endl; - } - } - - void BSONObjExternalSorter::finishMap( bool mayInterrupt ) { - uassert( 10050 , "bad" , _cur ); - - _curSizeSoFar = 0; - if ( _cur->size() == 0 ) - return; - - _sortInMem( mayInterrupt ); - - stringstream ss; - ss << _root.string() << "/file." << _files.size(); - string file = ss.str(); - - // todo: it may make sense to fadvise that this not be cached so that building the index - // doesn't eject other things the db is using from the file system cache. while we will - // soon be reading this back, if it fit in ram, there wouldn't have been a need for an - // external sort in the first place. - - ofstream out; - out.open( file.c_str() , ios_base::out | ios_base::binary ); - assertStreamGood( 10051 , (string)"couldn't open file: " + file , out ); - - int num = 0; - for ( InMemory::iterator i=_cur->begin(); i != _cur->end(); ++i ) { - ExternalSortDatum p = *i; - out.write( p.first.objdata() , p.first.objsize() ); - out.write( (char*)(&p.second) , sizeof( DiskLoc ) ); - num++; - } - - _cur->clear(); - - _files.push_back( file ); - out.close(); - - LOG(2) << "Added file: " << file << " with " << num << "objects for external sort" << endl; - } - - // --------------------------------- - - BSONObjExternalSorter::Iterator::Iterator( BSONObjExternalSorter * sorter ) : - _cmp( sorter->_cmp), _in(0) { - - for ( list<string>::iterator i=sorter->_files.begin(); i!=sorter->_files.end(); i++ ) { - _files.push_back( new FileIterator( *i ) ); - _stash.push_back( pair<ExternalSortDatum, bool>( - ExternalSortDatum( BSONObj() , DiskLoc() ) , false ) ); - } - - if ( _files.size() == 0 && sorter->_cur ) { - _in = sorter->_cur; - _it = sorter->_cur->begin(); - } - } - - BSONObjExternalSorter::Iterator::~Iterator() { - for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ ) - delete *i; - _files.clear(); - } - - bool BSONObjExternalSorter::Iterator::more() { - if ( _in ) - return _it != _in->end(); - - for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ ) - if ( (*i)->more() ) - return true; - for (vector< pair<ExternalSortDatum, bool> >::iterator i=_stash.begin(); - i!=_stash.end(); i++ ) { - if ( i->second ) { return true; } - } - return false; - } - - ExternalSortDatum BSONObjExternalSorter::Iterator::next() { - if ( _in ) { - ExternalSortDatum& d = *_it; - ++_it; - return d; - } - - ExternalSortDatum best; - int slot = -1; - - for ( unsigned i=0; i<_stash.size(); i++ ) { - if ( ! _stash[i].second ) { - if ( _files[i]->more() ) - _stash[i] = pair<ExternalSortDatum,bool>( _files[i]->next() , true ); - else - continue; - } - - if ( slot == -1 || _cmp( best , _stash[i].first ) == 0 ) { - best = _stash[i].first; - slot = i; - } - } - - verify( slot >= 0 ); - _stash[slot].second = false; - - return best; - } - - // ----------------------------------- - - BSONObjExternalSorter::FileIterator::FileIterator( const std::string& file ) { -#ifdef _WIN32 - _file = ::_open( file.c_str(), _O_BINARY | _O_RDWR | _O_CREAT , _S_IREAD | _S_IWRITE ); -#else -#ifndef O_NOATIME -#define O_NOATIME 0 -#endif - _file = ::open( file.c_str(), O_CREAT | O_RDWR | O_NOATIME , S_IRUSR | S_IWUSR ); -#endif - massert( 16392, - str::stream() << "FileIterator can't open file: " - << file << errnoWithDescription(), - _file >= 0 ); - -#ifdef POSIX_FADV_SEQUENTIAL - int err = posix_fadvise(_file, 0, 0, POSIX_FADV_SEQUENTIAL ); - if ( err ) - log() << "posix_fadvise failed: " << err << endl; -#endif - - _length = (unsigned long long)boost::filesystem::file_size( file ); - _readSoFar = 0; - } - BSONObjExternalSorter::FileIterator::~FileIterator() { - if ( _file >= 0 ) { -#ifdef _WIN32 - _close( _file ); -#else - ::close( _file ); -#endif - } - } - - bool BSONObjExternalSorter::FileIterator::more() { - return _readSoFar < _length; - } - - - bool BSONObjExternalSorter::FileIterator::_read( char* buf, long long count ) { - long long total = 0; - while ( total < count ) { -#ifdef _WIN32 - long long now = ::_read( _file, buf, count ); -#else - long long now = ::read( _file, buf, count ); -#endif - if ( now < 0 ) { - log() << "read failed for BSONObjExternalSorter " << errnoWithDescription() << endl; - return false; - } - if ( now == 0 ) { - return false; - } - total += now; - buf += now; - } - return true; - } - - ExternalSortDatum BSONObjExternalSorter::FileIterator::next() { - // read BSONObj - - int size; - verify( _read( reinterpret_cast<char*>(&size), 4 ) ); - char* buf = reinterpret_cast<char*>( malloc( sizeof(unsigned) + size ) ); - verify( buf ); - - memset( buf, 0, 4 ); // for Holder - memcpy( buf+sizeof(unsigned), reinterpret_cast<char*>(&size), sizeof(int) ); // size of doc - if ( ! _read( buf + sizeof(unsigned) + sizeof(int), size-sizeof(int) ) ) { // doc content - free( buf ); - msgasserted( 16394, std::string("reading doc for external sort failed:") + errnoWithDescription() ); - } - - // read DiskLoc - DiskLoc l; - if ( ! _read( reinterpret_cast<char*>(&l), 8 ) ) { - free( buf ); - msgasserted( 16393, std::string("reading DiskLoc for external sort failed") + errnoWithDescription() ); - } - _readSoFar += 8 + size; - - BSONObj::Holder* h = reinterpret_cast<BSONObj::Holder*>(buf); - return ExternalSortDatum( BSONObj(h), l ); - } -} -#endif +MONGO_CREATE_SORTER(mongo::BSONObj, mongo::DiskLoc, mongo::ComparatorWithInterruptCheck); diff --git a/src/mongo/db/extsort.h b/src/mongo/db/extsort.h index 9471e2f7876..73a3f35f06a 100644 --- a/src/mongo/db/extsort.h +++ b/src/mongo/db/extsort.h @@ -30,18 +30,9 @@ #pragma once -#include "mongo/pch.h" - -#include "mongo/db/structure/catalog/index_details.h" #include "mongo/db/jsobj.h" -#include "mongo/db/curop-inl.h" -#include "mongo/util/array.h" - -#define MONGO_USE_NEW_SORTER 1 - -#if MONGO_USE_NEW_SORTER -# include "mongo/db/sorter/sorter.h" -#endif +#include "mongo/db/diskloc.h" +#include "mongo/db/sorter/sorter.h" namespace mongo { @@ -57,7 +48,6 @@ namespace mongo { virtual int compare(const ExternalSortDatum& l, const ExternalSortDatum& r) const = 0; }; -#if MONGO_USE_NEW_SORTER // TODO This class will probably disappear in the future or be replaced with a typedef class BSONObjExternalSorter : boost::noncopyable { public: @@ -82,113 +72,4 @@ namespace mongo { shared_ptr<bool> _mayInterrupt; scoped_ptr<Sorter<BSONObj, DiskLoc> > _sorter; }; -#else - /** - for external (disk) sorting by BSONObj and attaching a value - */ - class BSONObjExternalSorter : boost::noncopyable { - public: - BSONObjExternalSorter(const ExternalSortComparison* cmp, - long maxFileSize = 1024 * 1024 * 100 ); - ~BSONObjExternalSorter(); - - private: - static HLMutex _extSortMutex; - - static int _compare(const ExternalSortComparison* cmp, const ExternalSortDatum& l, - const ExternalSortDatum& r); - - class MyCmp { - public: - MyCmp(const ExternalSortComparison* cmp) : _cmp(cmp) { } - bool operator()( const ExternalSortDatum &l, const ExternalSortDatum &r ) const { - return _cmp->compare(l, r) < 0; - }; - private: - const ExternalSortComparison* _cmp; - }; - - static bool extSortMayInterrupt; - static int extSortComp( const void *lv, const void *rv ); - static const ExternalSortComparison* staticExtSortCmp; - - class FileIterator : boost::noncopyable { - public: - FileIterator( const std::string& file ); - ~FileIterator(); - bool more(); - ExternalSortDatum next(); - private: - bool _read( char* buf, long long count ); - - int _file; - unsigned long long _length; - unsigned long long _readSoFar; - }; - - public: - - typedef FastArray<ExternalSortDatum> InMemory; - - class Iterator : boost::noncopyable { - public: - - Iterator( BSONObjExternalSorter * sorter ); - ~Iterator(); - bool more(); - ExternalSortDatum next(); - - private: - MyCmp _cmp; - vector<FileIterator*> _files; - vector< pair<ExternalSortDatum,bool> > _stash; - - InMemory * _in; - InMemory::iterator _it; - - }; - - void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ); - - /* call after adding values, and before fetching the iterator */ - void sort( bool mayInterrupt ); - - auto_ptr<Iterator> iterator() { - uassert( 10052 , "not sorted" , _sorted ); - return auto_ptr<Iterator>( new Iterator( this ) ); - } - - int numFiles() { - return _files.size(); - } - - long getCurSizeSoFar() { return _curSizeSoFar; } - - void hintNumObjects( long long numObjects ) { - if ( numObjects < _arraySize ) - _arraySize = (int)(numObjects + 100); - } - - private: - - void _sortInMem( bool mayInterrupt ); - - void sort( const std::string& file ); - void finishMap( bool mayInterrupt ); - - const ExternalSortComparison* _cmp; - long _maxFilesize; - boost::filesystem::path _root; - - int _arraySize; - InMemory * _cur; - long _curSizeSoFar; - - list<string> _files; - bool _sorted; - - static unsigned long long _compares; - static unsigned long long _uniqueNumber; - }; -#endif } diff --git a/src/mongo/db/index/btree_based_access_method.cpp b/src/mongo/db/index/btree_based_access_method.cpp index e4fd7992009..150e1e35a47 100644 --- a/src/mongo/db/index/btree_based_access_method.cpp +++ b/src/mongo/db/index/btree_based_access_method.cpp @@ -31,6 +31,7 @@ #include <vector> #include "mongo/base/status.h" +#include "mongo/db/curop.h" #include "mongo/db/extsort.h" #include "mongo/db/index/btree_index_cursor.h" #include "mongo/db/index/btree_interface.h" @@ -42,6 +43,7 @@ #include "mongo/db/repl/rs.h" #include "mongo/db/sort_phase_one.h" #include "mongo/db/structure/btree/btreebuilder.h" +#include "mongo/util/progress_meter.h" namespace mongo { |