// extsort.h
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#pragma once
#include "mongo/pch.h"
#include "mongo/db/index.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/curop-inl.h"
#include "mongo/util/array.h"
#define MONGO_USE_NEW_SORTER 1
#if MONGO_USE_NEW_SORTER
# include "mongo/db/sorter/sorter.h"
#endif
namespace mongo {
typedef pair ExternalSortDatum;
/**
* To external sort, you provide a pointer to an implementation of this class.
* The compare function follows the usual -1, 0, 1 semantics.
*/
class ExternalSortComparison {
public:
virtual ~ExternalSortComparison() { }
virtual int compare(const ExternalSortDatum& l, const ExternalSortDatum& r) const = 0;
};
#if MONGO_USE_NEW_SORTER
// TODO This class will probably disappear in the future or be replaced with a typedef
class BSONObjExternalSorter : boost::noncopyable {
public:
typedef pair Data;
typedef SortIteratorInterface Iterator;
BSONObjExternalSorter(const ExternalSortComparison* comp, long maxFileSize=100*1024*1024);
void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt ) {
*_mayInterrupt = mayInterrupt;
_sorter->add(o.getOwned(), loc);
}
auto_ptr iterator() { return auto_ptr(_sorter->done()); }
void sort( bool mayInterrupt ) { *_mayInterrupt = mayInterrupt; }
int numFiles() { return _sorter->numFiles(); }
long getCurSizeSoFar() { return _sorter->memUsed(); }
void hintNumObjects(long long) {} // unused
private:
shared_ptr _mayInterrupt;
scoped_ptr > _sorter;
};
#else
/**
for external (disk) sorting by BSONObj and attaching a value
*/
class BSONObjExternalSorter : boost::noncopyable {
public:
BSONObjExternalSorter(const ExternalSortComparison* cmp,
long maxFileSize = 1024 * 1024 * 100 );
~BSONObjExternalSorter();
private:
static HLMutex _extSortMutex;
static int _compare(const ExternalSortComparison* cmp, const ExternalSortDatum& l,
const ExternalSortDatum& r);
class MyCmp {
public:
MyCmp(const ExternalSortComparison* cmp) : _cmp(cmp) { }
bool operator()( const ExternalSortDatum &l, const ExternalSortDatum &r ) const {
return _cmp->compare(l, r) < 0;
};
private:
const ExternalSortComparison* _cmp;
};
static bool extSortMayInterrupt;
static int extSortComp( const void *lv, const void *rv );
static const ExternalSortComparison* staticExtSortCmp;
class FileIterator : boost::noncopyable {
public:
FileIterator( const std::string& file );
~FileIterator();
bool more();
ExternalSortDatum next();
private:
bool _read( char* buf, long long count );
int _file;
unsigned long long _length;
unsigned long long _readSoFar;
};
public:
typedef FastArray InMemory;
class Iterator : boost::noncopyable {
public:
Iterator( BSONObjExternalSorter * sorter );
~Iterator();
bool more();
ExternalSortDatum next();
private:
MyCmp _cmp;
vector _files;
vector< pair > _stash;
InMemory * _in;
InMemory::iterator _it;
};
void add( const BSONObj& o, const DiskLoc& loc, bool mayInterrupt );
/* call after adding values, and before fetching the iterator */
void sort( bool mayInterrupt );
auto_ptr iterator() {
uassert( 10052 , "not sorted" , _sorted );
return auto_ptr( new Iterator( this ) );
}
int numFiles() {
return _files.size();
}
long getCurSizeSoFar() { return _curSizeSoFar; }
void hintNumObjects( long long numObjects ) {
if ( numObjects < _arraySize )
_arraySize = (int)(numObjects + 100);
}
private:
void _sortInMem( bool mayInterrupt );
void sort( const std::string& file );
void finishMap( bool mayInterrupt );
const ExternalSortComparison* _cmp;
long _maxFilesize;
boost::filesystem::path _root;
int _arraySize;
InMemory * _cur;
long _curSizeSoFar;
list _files;
bool _sorted;
static unsigned long long _compares;
static unsigned long long _uniqueNumber;
};
#endif
}