summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2014-01-16 15:38:27 -0500
committerMathias Stearn <mathias@10gen.com>2014-01-21 12:55:49 -0500
commit51e761f1f76581f6c43cd81cb132c3f5c39c12d3 (patch)
tree526a0bd3377441c8acb1417110c7bcc4f5e5972e
parent278537043b7f6149eedc6392bbaab4caa5820ab2 (diff)
downloadmongo-51e761f1f76581f6c43cd81cb132c3f5c39c12d3.tar.gz
SERVER-6851 Remove old external sort implementation
Also removes HLMutext as it was only used by the old implementation.
-rw-r--r--src/mongo/db/catalog/index_create.cpp2
-rw-r--r--src/mongo/db/d_concurrency.cpp5
-rw-r--r--src/mongo/db/d_concurrency.h11
-rw-r--r--src/mongo/db/extsort.cpp340
-rw-r--r--src/mongo/db/extsort.h123
-rw-r--r--src/mongo/db/index/btree_based_access_method.cpp2
6 files changed, 12 insertions, 471 deletions
diff --git a/src/mongo/db/catalog/index_create.cpp b/src/mongo/db/catalog/index_create.cpp
index 05ba18599f6..59528dd0cd8 100644
--- a/src/mongo/db/catalog/index_create.cpp
+++ b/src/mongo/db/catalog/index_create.cpp
@@ -35,6 +35,7 @@
#include "mongo/db/background.h"
#include "mongo/db/structure/btree/btreebuilder.h"
#include "mongo/db/clientcursor.h"
+#include "mongo/db/curop.h"
#include "mongo/db/extsort.h"
#include "mongo/db/structure/catalog/index_details.h"
#include "mongo/db/kill_current_op.h"
@@ -47,6 +48,7 @@
#include "mongo/db/repl/rs.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/util/processinfo.h"
+#include "mongo/util/progress_meter.h"
namespace mongo {
diff --git a/src/mongo/db/d_concurrency.cpp b/src/mongo/db/d_concurrency.cpp
index dc4d3ebada9..32e3c6326a3 100644
--- a/src/mongo/db/d_concurrency.cpp
+++ b/src/mongo/db/d_concurrency.cpp
@@ -84,11 +84,6 @@ namespace mongo {
void releasingWriteLock();
}
- // e.g. externalobjsortmutex uses hlmutex as it can be locked for very long times
- // todo : report HLMutex status in db.currentOp() output
- // perhaps move this elsewhere as this could be used in mongos and this file is for mongod
- HLMutex::HLMutex(const char *name) : SimpleMutex(name) { }
-
/* dbname->lock
Currently these are never deleted - will linger if db was closed. (that should be fine.)
We don't put the lock inside the Database object as those can come and go with open and
diff --git a/src/mongo/db/d_concurrency.h b/src/mongo/db/d_concurrency.h
index c7261193ba2..50760662ced 100644
--- a/src/mongo/db/d_concurrency.h
+++ b/src/mongo/db/d_concurrency.h
@@ -248,15 +248,4 @@ namespace mongo {
~writelocktry();
bool got() const { return _got; }
};
-
- /** a mutex, but reported in curop() - thus a "high level" (HL) one
- some overhead so we don't use this for everything. the externalobjsort mutex
- uses this, as it can be held for eons. implementation still needed. */
- class HLMutex : public SimpleMutex {
- LockStat ls;
- public:
- HLMutex(const char *name);
- };
-
-
}
diff --git a/src/mongo/db/extsort.cpp b/src/mongo/db/extsort.cpp
index e4aeb5f53ca..a25888c692e 100644
--- a/src/mongo/db/extsort.cpp
+++ b/src/mongo/db/extsort.cpp
@@ -28,36 +28,22 @@
* it in the license file.
*/
-#include "mongo/pch.h"
+#include "mongo/platform/basic.h"
#include "mongo/db/extsort.h"
-#if defined(_WIN32)
-# include <io.h>
-#endif
-
-#include <boost/filesystem/convenience.hpp>
-#include <boost/filesystem/operations.hpp>
-#include <fcntl.h>
-#include <fstream>
-#include <sys/stat.h>
-#include <sys/types.h>
-
#include "mongo/db/kill_current_op.h"
#include "mongo/db/storage_options.h"
-#include "mongo/platform/posix_fadvise.h"
-#include "mongo/util/file.h"
-#if MONGO_USE_NEW_SORTER
namespace mongo {
namespace {
- class OldExtSortComparator {
+ class ComparatorWithInterruptCheck {
public:
typedef pair<BSONObj, DiskLoc> Data;
- OldExtSortComparator(const ExternalSortComparison* comp,
- boost::shared_ptr<const bool> mayInterrupt)
+ ComparatorWithInterruptCheck(const ExternalSortComparison* comp,
+ boost::shared_ptr<const bool> mayInterrupt)
: _comp(comp)
, _mayInterrupt(mayInterrupt)
{}
@@ -83,323 +69,9 @@ namespace mongo {
SortOptions().TempDir(storageGlobalParams.dbpath + "/_tmp")
.ExtSortAllowed()
.MaxMemoryUsageBytes(maxFileSize),
- OldExtSortComparator(comp, _mayInterrupt)))
+ ComparatorWithInterruptCheck(comp, _mayInterrupt)))
{}
}
#include "mongo/db/sorter/sorter.cpp"
-MONGO_CREATE_SORTER(mongo::BSONObj, mongo::DiskLoc, mongo::OldExtSortComparator);
-
-#else
-
-namespace mongo {
- HLMutex BSONObjExternalSorter::_extSortMutex("s");
- bool BSONObjExternalSorter::extSortMayInterrupt( false );
- unsigned long long BSONObjExternalSorter::_compares = 0;
- unsigned long long BSONObjExternalSorter::_uniqueNumber = 0;
- const ExternalSortComparison* BSONObjExternalSorter::staticExtSortCmp = NULL;
- static SimpleMutex _uniqueNumberMutex( "uniqueNumberMutex" );
-
- /*static*/
- int BSONObjExternalSorter::_compare(const ExternalSortComparison* cmp,
- const ExternalSortDatum& l, const ExternalSortDatum& r) {
- _compares++;
- return cmp->compare(l, r);
- }
-
- /*static*/
- int BSONObjExternalSorter::extSortComp( const void *lv, const void *rv ) {
- DEV RARELY {
- _extSortMutex.dassertLocked(); // must be as we use a global var
- }
-#ifndef __sunos__
- // Some solaris gnu qsort implementations do not support callback exceptions.
- RARELY killCurrentOp.checkForInterrupt(!extSortMayInterrupt);
-#endif
- ExternalSortDatum * l = (ExternalSortDatum*)lv;
- ExternalSortDatum * r = (ExternalSortDatum*)rv;
- return _compare(staticExtSortCmp, *l, *r);
- };
-
- BSONObjExternalSorter::BSONObjExternalSorter(const ExternalSortComparison* cmp,
- long maxFileSize )
- : _cmp(cmp), _maxFilesize(maxFileSize), _arraySize(1000000), _cur(0), _curSizeSoFar(0),
- _sorted(0) {
-
- stringstream rootpath;
- rootpath << storageGlobalParams.dbpath;
- if (storageGlobalParams.dbpath[storageGlobalParams.dbpath.size()-1] != '/')
- rootpath << "/";
-
- unsigned long long thisUniqueNumber;
- {
- SimpleMutex::scoped_lock lk(_uniqueNumberMutex);
- thisUniqueNumber = _uniqueNumber;
- ++_uniqueNumber;
- }
- rootpath << "_tmp/esort." << time(0) << "." << thisUniqueNumber << "/";
- _root = rootpath.str();
-
- LOG(1) << "external sort root: " << _root.string() << endl;
-
- create_directories( _root );
- _compares = 0;
- }
-
- BSONObjExternalSorter::~BSONObjExternalSorter() {
- if ( _cur ) {
- delete _cur;
- _cur = 0;
- }
- unsigned long removed = remove_all( _root );
- wassert( removed == 1 + _files.size() );
- }
-
- void BSONObjExternalSorter::_sortInMem( bool mayInterrupt ) {
- // extSortComp needs to use glpbals
- // qsort_r only seems available on bsd, which is what i really want to use
- HLMutex::scoped_lock lk(_extSortMutex);
- extSortMayInterrupt = mayInterrupt;
- staticExtSortCmp = _cmp;
- _cur->sort( BSONObjExternalSorter::extSortComp );
- }
-
- void BSONObjExternalSorter::sort( bool mayInterrupt ) {
- uassert( 10048 , "already sorted" , ! _sorted );
-
- _sorted = true;
-
- if ( _cur && _files.size() == 0 ) {
- _sortInMem( mayInterrupt );
- LOG(1) << "\t\t not using file. size:" << _curSizeSoFar << " _compares:"
- << _compares << endl;
- return;
- }
-
- if ( _cur ) {
- finishMap( mayInterrupt );
- }
-
- if ( _cur ) {
- delete _cur;
- _cur = 0;
- }
-
- if ( _files.size() == 0 )
- return;
- }
-
- void BSONObjExternalSorter::add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ) {
- uassert( 10049 , "sorted already" , ! _sorted );
-
- if ( ! _cur ) {
- _cur = new InMemory( _arraySize );
- }
-
- ExternalSortDatum& d = _cur->getNext();
- d.first = o.getOwned();
- d.second = loc;
-
- long size = o.objsize();
- _curSizeSoFar += size + sizeof( DiskLoc ) + sizeof( BSONObj );
-
- if ( _cur->hasSpace() == false || _curSizeSoFar > _maxFilesize ) {
- finishMap( mayInterrupt );
- LOG(1) << "finishing map" << endl;
- }
- }
-
- void BSONObjExternalSorter::finishMap( bool mayInterrupt ) {
- uassert( 10050 , "bad" , _cur );
-
- _curSizeSoFar = 0;
- if ( _cur->size() == 0 )
- return;
-
- _sortInMem( mayInterrupt );
-
- stringstream ss;
- ss << _root.string() << "/file." << _files.size();
- string file = ss.str();
-
- // todo: it may make sense to fadvise that this not be cached so that building the index
- // doesn't eject other things the db is using from the file system cache. while we will
- // soon be reading this back, if it fit in ram, there wouldn't have been a need for an
- // external sort in the first place.
-
- ofstream out;
- out.open( file.c_str() , ios_base::out | ios_base::binary );
- assertStreamGood( 10051 , (string)"couldn't open file: " + file , out );
-
- int num = 0;
- for ( InMemory::iterator i=_cur->begin(); i != _cur->end(); ++i ) {
- ExternalSortDatum p = *i;
- out.write( p.first.objdata() , p.first.objsize() );
- out.write( (char*)(&p.second) , sizeof( DiskLoc ) );
- num++;
- }
-
- _cur->clear();
-
- _files.push_back( file );
- out.close();
-
- LOG(2) << "Added file: " << file << " with " << num << "objects for external sort" << endl;
- }
-
- // ---------------------------------
-
- BSONObjExternalSorter::Iterator::Iterator( BSONObjExternalSorter * sorter ) :
- _cmp( sorter->_cmp), _in(0) {
-
- for ( list<string>::iterator i=sorter->_files.begin(); i!=sorter->_files.end(); i++ ) {
- _files.push_back( new FileIterator( *i ) );
- _stash.push_back( pair<ExternalSortDatum, bool>(
- ExternalSortDatum( BSONObj() , DiskLoc() ) , false ) );
- }
-
- if ( _files.size() == 0 && sorter->_cur ) {
- _in = sorter->_cur;
- _it = sorter->_cur->begin();
- }
- }
-
- BSONObjExternalSorter::Iterator::~Iterator() {
- for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ )
- delete *i;
- _files.clear();
- }
-
- bool BSONObjExternalSorter::Iterator::more() {
- if ( _in )
- return _it != _in->end();
-
- for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ )
- if ( (*i)->more() )
- return true;
- for (vector< pair<ExternalSortDatum, bool> >::iterator i=_stash.begin();
- i!=_stash.end(); i++ ) {
- if ( i->second ) { return true; }
- }
- return false;
- }
-
- ExternalSortDatum BSONObjExternalSorter::Iterator::next() {
- if ( _in ) {
- ExternalSortDatum& d = *_it;
- ++_it;
- return d;
- }
-
- ExternalSortDatum best;
- int slot = -1;
-
- for ( unsigned i=0; i<_stash.size(); i++ ) {
- if ( ! _stash[i].second ) {
- if ( _files[i]->more() )
- _stash[i] = pair<ExternalSortDatum,bool>( _files[i]->next() , true );
- else
- continue;
- }
-
- if ( slot == -1 || _cmp( best , _stash[i].first ) == 0 ) {
- best = _stash[i].first;
- slot = i;
- }
- }
-
- verify( slot >= 0 );
- _stash[slot].second = false;
-
- return best;
- }
-
- // -----------------------------------
-
- BSONObjExternalSorter::FileIterator::FileIterator( const std::string& file ) {
-#ifdef _WIN32
- _file = ::_open( file.c_str(), _O_BINARY | _O_RDWR | _O_CREAT , _S_IREAD | _S_IWRITE );
-#else
-#ifndef O_NOATIME
-#define O_NOATIME 0
-#endif
- _file = ::open( file.c_str(), O_CREAT | O_RDWR | O_NOATIME , S_IRUSR | S_IWUSR );
-#endif
- massert( 16392,
- str::stream() << "FileIterator can't open file: "
- << file << errnoWithDescription(),
- _file >= 0 );
-
-#ifdef POSIX_FADV_SEQUENTIAL
- int err = posix_fadvise(_file, 0, 0, POSIX_FADV_SEQUENTIAL );
- if ( err )
- log() << "posix_fadvise failed: " << err << endl;
-#endif
-
- _length = (unsigned long long)boost::filesystem::file_size( file );
- _readSoFar = 0;
- }
- BSONObjExternalSorter::FileIterator::~FileIterator() {
- if ( _file >= 0 ) {
-#ifdef _WIN32
- _close( _file );
-#else
- ::close( _file );
-#endif
- }
- }
-
- bool BSONObjExternalSorter::FileIterator::more() {
- return _readSoFar < _length;
- }
-
-
- bool BSONObjExternalSorter::FileIterator::_read( char* buf, long long count ) {
- long long total = 0;
- while ( total < count ) {
-#ifdef _WIN32
- long long now = ::_read( _file, buf, count );
-#else
- long long now = ::read( _file, buf, count );
-#endif
- if ( now < 0 ) {
- log() << "read failed for BSONObjExternalSorter " << errnoWithDescription() << endl;
- return false;
- }
- if ( now == 0 ) {
- return false;
- }
- total += now;
- buf += now;
- }
- return true;
- }
-
- ExternalSortDatum BSONObjExternalSorter::FileIterator::next() {
- // read BSONObj
-
- int size;
- verify( _read( reinterpret_cast<char*>(&size), 4 ) );
- char* buf = reinterpret_cast<char*>( malloc( sizeof(unsigned) + size ) );
- verify( buf );
-
- memset( buf, 0, 4 ); // for Holder
- memcpy( buf+sizeof(unsigned), reinterpret_cast<char*>(&size), sizeof(int) ); // size of doc
- if ( ! _read( buf + sizeof(unsigned) + sizeof(int), size-sizeof(int) ) ) { // doc content
- free( buf );
- msgasserted( 16394, std::string("reading doc for external sort failed:") + errnoWithDescription() );
- }
-
- // read DiskLoc
- DiskLoc l;
- if ( ! _read( reinterpret_cast<char*>(&l), 8 ) ) {
- free( buf );
- msgasserted( 16393, std::string("reading DiskLoc for external sort failed") + errnoWithDescription() );
- }
- _readSoFar += 8 + size;
-
- BSONObj::Holder* h = reinterpret_cast<BSONObj::Holder*>(buf);
- return ExternalSortDatum( BSONObj(h), l );
- }
-}
-#endif
+MONGO_CREATE_SORTER(mongo::BSONObj, mongo::DiskLoc, mongo::ComparatorWithInterruptCheck);
diff --git a/src/mongo/db/extsort.h b/src/mongo/db/extsort.h
index 9471e2f7876..73a3f35f06a 100644
--- a/src/mongo/db/extsort.h
+++ b/src/mongo/db/extsort.h
@@ -30,18 +30,9 @@
#pragma once
-#include "mongo/pch.h"
-
-#include "mongo/db/structure/catalog/index_details.h"
#include "mongo/db/jsobj.h"
-#include "mongo/db/curop-inl.h"
-#include "mongo/util/array.h"
-
-#define MONGO_USE_NEW_SORTER 1
-
-#if MONGO_USE_NEW_SORTER
-# include "mongo/db/sorter/sorter.h"
-#endif
+#include "mongo/db/diskloc.h"
+#include "mongo/db/sorter/sorter.h"
namespace mongo {
@@ -57,7 +48,6 @@ namespace mongo {
virtual int compare(const ExternalSortDatum& l, const ExternalSortDatum& r) const = 0;
};
-#if MONGO_USE_NEW_SORTER
// TODO This class will probably disappear in the future or be replaced with a typedef
class BSONObjExternalSorter : boost::noncopyable {
public:
@@ -82,113 +72,4 @@ namespace mongo {
shared_ptr<bool> _mayInterrupt;
scoped_ptr<Sorter<BSONObj, DiskLoc> > _sorter;
};
-#else
- /**
- for external (disk) sorting by BSONObj and attaching a value
- */
- class BSONObjExternalSorter : boost::noncopyable {
- public:
- BSONObjExternalSorter(const ExternalSortComparison* cmp,
- long maxFileSize = 1024 * 1024 * 100 );
- ~BSONObjExternalSorter();
-
- private:
- static HLMutex _extSortMutex;
-
- static int _compare(const ExternalSortComparison* cmp, const ExternalSortDatum& l,
- const ExternalSortDatum& r);
-
- class MyCmp {
- public:
- MyCmp(const ExternalSortComparison* cmp) : _cmp(cmp) { }
- bool operator()( const ExternalSortDatum &l, const ExternalSortDatum &r ) const {
- return _cmp->compare(l, r) < 0;
- };
- private:
- const ExternalSortComparison* _cmp;
- };
-
- static bool extSortMayInterrupt;
- static int extSortComp( const void *lv, const void *rv );
- static const ExternalSortComparison* staticExtSortCmp;
-
- class FileIterator : boost::noncopyable {
- public:
- FileIterator( const std::string& file );
- ~FileIterator();
- bool more();
- ExternalSortDatum next();
- private:
- bool _read( char* buf, long long count );
-
- int _file;
- unsigned long long _length;
- unsigned long long _readSoFar;
- };
-
- public:
-
- typedef FastArray<ExternalSortDatum> InMemory;
-
- class Iterator : boost::noncopyable {
- public:
-
- Iterator( BSONObjExternalSorter * sorter );
- ~Iterator();
- bool more();
- ExternalSortDatum next();
-
- private:
- MyCmp _cmp;
- vector<FileIterator*> _files;
- vector< pair<ExternalSortDatum,bool> > _stash;
-
- InMemory * _in;
- InMemory::iterator _it;
-
- };
-
- void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt );
-
- /* call after adding values, and before fetching the iterator */
- void sort( bool mayInterrupt );
-
- auto_ptr<Iterator> iterator() {
- uassert( 10052 , "not sorted" , _sorted );
- return auto_ptr<Iterator>( new Iterator( this ) );
- }
-
- int numFiles() {
- return _files.size();
- }
-
- long getCurSizeSoFar() { return _curSizeSoFar; }
-
- void hintNumObjects( long long numObjects ) {
- if ( numObjects < _arraySize )
- _arraySize = (int)(numObjects + 100);
- }
-
- private:
-
- void _sortInMem( bool mayInterrupt );
-
- void sort( const std::string& file );
- void finishMap( bool mayInterrupt );
-
- const ExternalSortComparison* _cmp;
- long _maxFilesize;
- boost::filesystem::path _root;
-
- int _arraySize;
- InMemory * _cur;
- long _curSizeSoFar;
-
- list<string> _files;
- bool _sorted;
-
- static unsigned long long _compares;
- static unsigned long long _uniqueNumber;
- };
-#endif
}
diff --git a/src/mongo/db/index/btree_based_access_method.cpp b/src/mongo/db/index/btree_based_access_method.cpp
index e4fd7992009..150e1e35a47 100644
--- a/src/mongo/db/index/btree_based_access_method.cpp
+++ b/src/mongo/db/index/btree_based_access_method.cpp
@@ -31,6 +31,7 @@
#include <vector>
#include "mongo/base/status.h"
+#include "mongo/db/curop.h"
#include "mongo/db/extsort.h"
#include "mongo/db/index/btree_index_cursor.h"
#include "mongo/db/index/btree_interface.h"
@@ -42,6 +43,7 @@
#include "mongo/db/repl/rs.h"
#include "mongo/db/sort_phase_one.h"
#include "mongo/db/structure/btree/btreebuilder.h"
+#include "mongo/util/progress_meter.h"
namespace mongo {