/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#include "mongo/platform/basic.h"
#include "mongo/db/sorter/sorter.h"
#include
#include "mongo/base/data_type_endian.h"
#include "mongo/base/init.h"
#include "mongo/base/static_assert.h"
#include "mongo/config.h"
#include "mongo/db/service_context.h"
#include "mongo/db/service_context_noop.h"
#include "mongo/stdx/memory.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/temp_dir.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/mongoutils/str.h"
// Need access to internal classes
#include "mongo/db/sorter/sorter.cpp"
namespace mongo {
using namespace mongo::sorter;
using std::make_shared;
using std::pair;
namespace {
// Stub to avoid including the server environment library.
MONGO_INITIALIZER(SetGlobalEnvironment)(InitializerContext* context) {
setGlobalServiceContext(stdx::make_unique());
return Status::OK();
}
} // namespace
//
// Sorter framework testing utilities
//
class IntWrapper {
public:
IntWrapper(int i = 0) : _i(i) {}
operator const int&() const {
return _i;
}
/// members for Sorter
struct SorterDeserializeSettings {}; // unused
void serializeForSorter(BufBuilder& buf) const {
buf.appendNum(_i);
}
static IntWrapper deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) {
return buf.read>().value;
}
int memUsageForSorter() const {
return sizeof(IntWrapper);
}
IntWrapper getOwned() const {
return *this;
}
private:
int _i;
};
typedef pair IWPair;
typedef SortIteratorInterface IWIterator;
typedef Sorter IWSorter;
enum Direction { ASC = 1, DESC = -1 };
class IWComparator {
public:
IWComparator(Direction dir = ASC) : _dir(dir) {}
int operator()(const IWPair& lhs, const IWPair& rhs) const {
if (lhs.first == rhs.first)
return 0;
if (lhs.first < rhs.first)
return -1 * _dir;
return 1 * _dir;
}
private:
Direction _dir;
};
class IntIterator : public IWIterator {
public:
IntIterator(int start = 0, int stop = INT_MAX, int increment = 1)
: _current(start), _increment(increment), _stop(stop) {}
bool more() {
if (_increment == 0)
return true;
if (_increment > 0)
return _current < _stop;
return _current > _stop;
}
IWPair next() {
IWPair out(_current, -_current);
_current += _increment;
return out;
}
private:
int _current;
int _increment;
int _stop;
};
class EmptyIterator : public IWIterator {
public:
bool more() {
return false;
}
Data next() {
verify(false);
}
};
class LimitIterator : public IWIterator {
public:
LimitIterator(long long limit, std::shared_ptr source)
: _remaining(limit), _source(source) {
verify(limit > 0);
}
bool more() {
return _remaining && _source->more();
}
Data next() {
verify(more());
_remaining--;
return _source->next();
}
private:
long long _remaining;
std::shared_ptr _source;
};
template
void _assertIteratorsEquivalent(It1 it1, It2 it2, int line) {
int iteration;
try {
for (iteration = 0; true; iteration++) {
ASSERT_EQUALS(it1->more(), it2->more());
ASSERT_EQUALS(it1->more(), it2->more()); // make sure more() is safe to call twice
if (!it1->more())
return;
IWPair pair1 = it1->next();
IWPair pair2 = it2->next();
ASSERT_EQUALS(pair1.first, pair2.first);
ASSERT_EQUALS(pair1.second, pair2.second);
}
} catch (...) {
mongo::unittest::log() << "Failure from line " << line << " on iteration " << iteration
<< std::endl;
throw;
}
}
#define ASSERT_ITERATORS_EQUIVALENT(it1, it2) _assertIteratorsEquivalent(it1, it2, __LINE__)
template
std::shared_ptr makeInMemIterator(const int (&array)[N]) {
std::vector vec;
for (int i = 0; i < N; i++)
vec.push_back(IWPair(array[i], -array[i]));
return std::make_shared>(vec);
}
template
std::shared_ptr mergeIterators(IteratorPtr (&array)[N],
Direction Dir = ASC,
const SortOptions& opts = SortOptions()) {
std::vector> vec;
for (int i = 0; i < N; i++)
vec.push_back(std::shared_ptr(array[i]));
return std::shared_ptr(IWIterator::merge(vec, opts, IWComparator(Dir)));
}
//
// Tests for Sorter framework internals
//
class InMemIterTests {
public:
void run() {
{
EmptyIterator empty;
sorter::InMemIterator inMem;
ASSERT_ITERATORS_EQUIVALENT(&inMem, &empty);
}
{
static const int zeroUpTo20[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
ASSERT_ITERATORS_EQUIVALENT(makeInMemIterator(zeroUpTo20),
make_shared(0, 20));
}
{
// make sure InMemIterator doesn't do any reordering on it's own
static const int unsorted[] = {6, 3, 7, 4, 0, 9, 5, 7, 1, 8};
class UnsortedIter : public IWIterator {
public:
UnsortedIter() : _pos(0) {}
bool more() {
return _pos < sizeof(unsorted) / sizeof(unsorted[0]);
}
IWPair next() {
IWPair ret(unsorted[_pos], -unsorted[_pos]);
_pos++;
return ret;
}
size_t _pos;
} unsortedIter;
ASSERT_ITERATORS_EQUIVALENT(makeInMemIterator(unsorted),
static_cast(&unsortedIter));
}
}
};
class SortedFileWriterAndFileIteratorTests {
public:
void run() {
unittest::TempDir tempDir("sortedFileWriterTests");
const SortOptions opts = SortOptions().TempDir(tempDir.path());
{ // small
SortedFileWriter sorter(opts);
sorter.addAlreadySorted(0, 0);
sorter.addAlreadySorted(1, -1);
sorter.addAlreadySorted(2, -2);
sorter.addAlreadySorted(3, -3);
sorter.addAlreadySorted(4, -4);
ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr(sorter.done()),
make_shared(0, 5));
}
{ // big
SortedFileWriter sorter(opts);
for (int i = 0; i < 10 * 1000 * 1000; i++)
sorter.addAlreadySorted(i, -i);
ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr(sorter.done()),
make_shared(0, 10 * 1000 * 1000));
}
ASSERT(boost::filesystem::is_empty(tempDir.path()));
}
};
class MergeIteratorTests {
public:
void run() {
{ // test empty (no inputs)
std::vector> vec;
std::shared_ptr mergeIter(
IWIterator::merge(vec, SortOptions(), IWComparator()));
ASSERT_ITERATORS_EQUIVALENT(mergeIter, make_shared());
}
{ // test empty (only empty inputs)
std::shared_ptr iterators[] = {make_shared(),
make_shared(),
make_shared()};
ASSERT_ITERATORS_EQUIVALENT(mergeIterators(iterators, ASC),
make_shared());
}
{ // test ASC
std::shared_ptr iterators[] = {
make_shared(1, 20, 2) // 1, 3, ... 19
,
make_shared(0, 20, 2) // 0, 2, ... 18
};
ASSERT_ITERATORS_EQUIVALENT(mergeIterators(iterators, ASC),
make_shared(0, 20, 1));
}
{ // test DESC with an empty source
std::shared_ptr iterators[] = {
make_shared(30, 0, -3) // 30, 27, ... 3
,
make_shared(29, 0, -3) // 29, 26, ... 2
,
make_shared(28, 0, -3) // 28, 25, ... 1
,
make_shared()};
ASSERT_ITERATORS_EQUIVALENT(mergeIterators(iterators, DESC),
make_shared(30, 0, -1));
}
{ // test Limit
std::shared_ptr iterators[] = {
make_shared(1, 20, 2) // 1, 3, ... 19
,
make_shared(0, 20, 2) // 0, 2, ... 18
};
ASSERT_ITERATORS_EQUIVALENT(
mergeIterators(iterators, ASC, SortOptions().Limit(10)),
make_shared(10, make_shared(0, 20, 1)));
}
}
};
namespace SorterTests {
class Basic {
public:
virtual ~Basic() {}
void run() {
unittest::TempDir tempDir("sorterTests");
const SortOptions opts = SortOptions().TempDir(tempDir.path());
{ // test empty (no limit)
ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(opts)), make_shared());
}
{ // test empty (limit 1)
ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(SortOptions(opts).Limit(1))),
make_shared());
}
{ // test empty (limit 10)
ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(SortOptions(opts).Limit(10))),
make_shared());
}
{ // test all data ASC
std::shared_ptr sorter = makeSorter(opts, IWComparator(ASC));
addData(sorter);
ASSERT_ITERATORS_EQUIVALENT(done(sorter), correct());
}
{ // test all data DESC
std::shared_ptr sorter = makeSorter(opts, IWComparator(DESC));
addData(sorter);
ASSERT_ITERATORS_EQUIVALENT(done(sorter), correctReverse());
}
// The debug builds are too slow to run these tests.
// Among other things, MSVC++ makes all heap functions O(N) not O(logN).
#if !defined(MONGO_CONFIG_DEBUG_BUILD)
{ // merge all data ASC
std::shared_ptr sorters[] = {makeSorter(opts, IWComparator(ASC)),
makeSorter(opts, IWComparator(ASC))};
addData(sorters[0]);
addData(sorters[1]);
std::shared_ptr iters1[] = {done(sorters[0]), done(sorters[1])};
std::shared_ptr iters2[] = {correct(), correct()};
ASSERT_ITERATORS_EQUIVALENT(mergeIterators(iters1, ASC), mergeIterators(iters2, ASC));
}
{ // merge all data DESC and use multiple threads to insert
std::shared_ptr sorters[] = {makeSorter(opts, IWComparator(DESC)),
makeSorter(opts, IWComparator(DESC))};
stdx::thread inBackground(&Basic::addData, this, sorters[0]);
addData(sorters[1]);
inBackground.join();
std::shared_ptr iters1[] = {done(sorters[0]), done(sorters[1])};
std::shared_ptr iters2[] = {correctReverse(), correctReverse()};
ASSERT_ITERATORS_EQUIVALENT(mergeIterators(iters1, DESC), mergeIterators(iters2, DESC));
}
#endif
ASSERT(boost::filesystem::is_empty(tempDir.path()));
}
// add data to the sorter
virtual void addData(unowned_ptr sorter) {
sorter->add(2, -2);
sorter->add(1, -1);
sorter->add(0, 0);
sorter->add(4, -4);
sorter->add(3, -3);
}
// returns an iterator with the correct results
virtual std::shared_ptr correct() {
return make_shared(0, 5); // 0, 1, ... 4
}
// like correct but with opposite sort direction
virtual std::shared_ptr correctReverse() {
return make_shared(4, -1, -1); // 4, 3, ... 0
}
// It is safe to ignore / overwrite any part of options
virtual SortOptions adjustSortOptions(SortOptions opts) {
return opts;
}
private:
// Make a new sorter with desired opts and comp. Opts may be ignored but not comp
std::shared_ptr makeSorter(SortOptions opts, IWComparator comp = IWComparator(ASC)) {
return std::shared_ptr(IWSorter::make(adjustSortOptions(opts), comp));
}
std::shared_ptr done(unowned_ptr sorter) {
return std::shared_ptr(sorter->done());
}
};
class Limit : public Basic {
virtual SortOptions adjustSortOptions(SortOptions opts) {
return opts.Limit(5);
}
void addData(unowned_ptr sorter) {
sorter->add(0, 0);
sorter->add(3, -3);
sorter->add(4, -4);
sorter->add(2, -2);
sorter->add(1, -1);
sorter->add(-1, 1);
}
virtual std::shared_ptr correct() {
return make_shared(-1, 4);
}
virtual std::shared_ptr correctReverse() {
return make_shared(4, -1, -1);
}
};
class Dupes : public Basic {
void addData(unowned_ptr sorter) {
sorter->add(1, -1);
sorter->add(-1, 1);
sorter->add(1, -1);
sorter->add(-1, 1);
sorter->add(1, -1);
sorter->add(0, 0);
sorter->add(2, -2);
sorter->add(-1, 1);
sorter->add(2, -2);
sorter->add(3, -3);
}
virtual std::shared_ptr correct() {
const int array[] = {-1, -1, -1, 0, 1, 1, 1, 2, 2, 3};
return makeInMemIterator(array);
}
virtual std::shared_ptr correctReverse() {
const int array[] = {3, 2, 2, 1, 1, 1, 0, -1, -1, -1};
return makeInMemIterator(array);
}
};
template
class LotsOfDataLittleMemory : public Basic {
public:
LotsOfDataLittleMemory() : _array(new int[NUM_ITEMS]) {
for (int i = 0; i < NUM_ITEMS; i++)
_array[i] = i;
if (Random)
std::random_shuffle(_array.get(), _array.get() + NUM_ITEMS);
}
SortOptions adjustSortOptions(SortOptions opts) {
// Make sure we use a reasonable number of files when we spill
MONGO_STATIC_ASSERT((NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT > 50);
MONGO_STATIC_ASSERT((NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT < 500);
return opts.MaxMemoryUsageBytes(MEM_LIMIT).ExtSortAllowed();
}
void addData(unowned_ptr sorter) {
for (int i = 0; i < NUM_ITEMS; i++)
sorter->add(_array[i], -_array[i]);
if (typeid(*this) == typeid(LotsOfDataLittleMemory)) {
// don't do this check in subclasses since they may set a limit
ASSERT_GREATER_THAN_OR_EQUALS(static_cast(sorter->numFiles()),
(NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT);
}
}
virtual std::shared_ptr correct() {
return make_shared(0, NUM_ITEMS);
}
virtual std::shared_ptr correctReverse() {
return make_shared(NUM_ITEMS - 1, -1, -1);
}
enum Constants {
NUM_ITEMS = 500 * 1000,
MEM_LIMIT = 64 * 1024,
};
std::unique_ptr _array;
};
template
class LotsOfDataWithLimit : public LotsOfDataLittleMemory {
typedef LotsOfDataLittleMemory Parent;
SortOptions adjustSortOptions(SortOptions opts) {
// Make sure our tests will spill or not as desired
MONGO_STATIC_ASSERT(MEM_LIMIT / 2 > (100 * sizeof(IWPair)));
MONGO_STATIC_ASSERT(MEM_LIMIT < (5000 * sizeof(IWPair)));
MONGO_STATIC_ASSERT(MEM_LIMIT * 2 > (5000 * sizeof(IWPair)));
// Make sure we use a reasonable number of files when we spill
MONGO_STATIC_ASSERT((Parent::NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT > 100);
MONGO_STATIC_ASSERT((Parent::NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT < 500);
return opts.MaxMemoryUsageBytes(MEM_LIMIT).ExtSortAllowed().Limit(Limit);
}
virtual std::shared_ptr correct() {
return make_shared(Limit, Parent::correct());
}
virtual std::shared_ptr correctReverse() {
return make_shared(Limit, Parent::correctReverse());
}
enum { MEM_LIMIT = 32 * 1024 };
};
}
class SorterSuite : public mongo::unittest::Suite {
public:
SorterSuite() : Suite("sorter") {}
void setupTests() {
add();
add();
add();
add();
add();
add();
add>();
add>();
add>(); // limit=1 is special case
add>(); // limit=1 is special case
add>(); // fits in mem
add>(); // fits in mem
add>(); // spills
add>(); // spills
}
};
mongo::unittest::SuiteInstance extSortTests;
}