summaryrefslogtreecommitdiff
path: root/src/mongo/dbtests/perf/btreeperf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/dbtests/perf/btreeperf.cpp')
-rw-r--r--src/mongo/dbtests/perf/btreeperf.cpp442
1 files changed, 442 insertions, 0 deletions
diff --git a/src/mongo/dbtests/perf/btreeperf.cpp b/src/mongo/dbtests/perf/btreeperf.cpp
new file mode 100644
index 00000000000..7d68d8f5cc7
--- /dev/null
+++ b/src/mongo/dbtests/perf/btreeperf.cpp
@@ -0,0 +1,442 @@
+// btreeperf.cpp
+
+/* Copyright 2010 10gen Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Performance timing and space utilization testing for btree indexes.
+ */
+
+#include <iostream>
+
+#include <boost/random/bernoulli_distribution.hpp>
+#include <boost/random/geometric_distribution.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/random/uniform_int.hpp>
+
+#include "client/dbclient.h"
+#include "../../util/timer.h"
+
+using namespace std;
+using namespace mongo;
+using namespace boost;
+
+const char *ns = "test.btreeperf";
+const char *db = "test";
+const char *index_collection = "btreeperf.$_id_";
+
+// This random number generator has a much larger period than the default
+// generator and is half as fast as the default. Given that we intend to
+// generate large numbers of documents and will utilize more than one random
+// sample per document, choosing this generator seems like a worthwhile tradeoff.
+mt19937 randomNumberGenerator;
+
+/**
+ * An interface for generating documents to be inserted and document specs for
+ * remove requests.
+ */
+class InsertAndRemoveStrategy {
+public:
+ virtual ~InsertAndRemoveStrategy() {}
+ virtual BSONObj insertObj() = 0;
+ virtual BSONObj removeObj() = 0;
+protected:
+ /**
+ * Helper functions for converting a sample value to a sample object with
+ * specified _id, to be inserted or removed.
+ */
+
+ template< class T >
+ BSONObj insertObjWithVal( const T &val ) {
+ BSONObjBuilder b;
+ b.append( "_id", val );
+ return b.obj();
+ }
+ template< class T >
+ BSONObj removeObjWithVal( const T &val ) {
+ BSONObjBuilder b;
+ b.append( "_id", val );
+ return b.obj();
+ }
+};
+
+/**
+ * Manages a set of elements of type T. Supports inserting unique elements and
+ * sampling a random element without replacement.
+ *
+ * TODO In the contexts where this class is currently used, duplicate keys are
+ * either impossible or highly unlikely. And an occasional duplicate value will
+ * not much affect the procedure by wich a random element is chosen. We could
+ * stop checking for duplicates in push(), eliminate _set from the implementaiton,
+ * and potentially improve performance and memory requirements somewhat.
+ */
+template< class T >
+class SetSampler {
+public:
+ /** @param val Insert this value in the set if not already present. */
+ void push( const T& val ) {
+ if ( _set.insert( val ).second ) {
+ _vector.push_back( val );
+ }
+ }
+ /** @return a random element removed from the set */
+ T pull() {
+ if ( _vector.size() == 0 ) {
+ return T();
+ }
+ uniform_int< size_t > sizeRange( 0, _vector.size() - 1 );
+ variate_generator< mt19937&, uniform_int< size_t > > sizeGenerator( randomNumberGenerator, sizeRange );
+ size_t toRemove = sizeGenerator();
+ T val = _vector[ toRemove ];
+ // Replace the random element with the last element, then remove the
+ // last element.
+ _vector[ toRemove ] = _vector.back();
+ _vector.pop_back();
+ _set.erase( val );
+ return val;
+ }
+private:
+ vector< T > _vector;
+ set< T > _set;
+};
+
+/**
+ * Tracks values that have been specified for insertion by the derived class's
+ * implementation of insertVal() and selects uniformally from among values that
+ * have been inserted but not yet removed for the next value to remove.
+ *
+ * The implementation is probabilistically sound, but may be resource intensive
+ * and slow due to the use of a SetSampler.
+ */
+template< class T >
+class InsertAndUniformRemoveStrategy : public InsertAndRemoveStrategy {
+public:
+ virtual BSONObj insertObj() {
+ T val = insertVal();
+ _sampler.push( val );
+ return insertObjWithVal( val );
+ }
+ virtual BSONObj removeObj() { return removeObjWithVal( _sampler.pull() ); }
+protected:
+ /** @return value to insert. This is the only function a derived class need implement. */
+ virtual T insertVal() = 0;
+private:
+ SetSampler< T > _sampler;
+};
+
+/**
+ * The derived class supplies keys to be inserted and removed. The key removal
+ * strategy is similar to the strategy for selecting a random element described
+ * in the MongoDB cookbook: the first key in the collection greater than or
+ * equal to the supplied removal key is removed. This allows selecting an
+ * exising key for removal without the overhead required by a SetSampler.
+ *
+ * While this ranged selection strategy can work well for selecting a random
+ * element, there are some theoretical and empirically observed shortcomings
+ * when the strategy is applied to removing nodes for btree performance measurement:
+ * 1 The likelihood that a given key is removed is proportional to the difference
+ * in value between it and the previous key. Because key deletion increases
+ * the difference in value between adjacent keys, neighboring keys will be
+ * more likely to be deleted than they would be in a true uniform distribution.
+ * 2 MongoDB 1.6 uses 'unused' nodes in the btree implementation. With a ranged
+ * removal strategy, those nodes must be traversed to find a node available
+ * for removal.
+ * 3 Ranged removal was observed to be biased against the balancing policy of
+ * MongoDB 1.7 in some cases, in terms of storage size. This may be a
+ * consequence of point 1 above.
+ * 4 Ranged removal was observed to be significantly biased against the btree
+ * implementation in MongoDB 1.6 in terms of performance. This is likely a
+ * consequence of point 2 above.
+ * 5 In some cases the biases described above were not evident in tests lasting
+ * several minutes, but were evident in tests lasting several hours.
+ */
+template< class T >
+class InsertAndRangedRemoveStrategy : public InsertAndRemoveStrategy {
+public:
+ virtual BSONObj insertObj() { return insertObjWithVal( insertVal() ); }
+ virtual BSONObj removeObj() { return rangedRemoveObjWithVal( removeVal() ); }
+protected:
+ /** Small likelihood that this removal spec will not match any document */
+ template< class U >
+ BSONObj rangedRemoveObjWithVal( const U &val ) {
+ BSONObjBuilder b1;
+ BSONObjBuilder b2( b1.subobjStart( "_id" ) );
+ b2.append( "$gte", val );
+ b2.done();
+ return b1.obj();
+ }
+ virtual T insertVal() = 0;
+ virtual T removeVal() = 0;
+};
+
+/**
+ * Integer Keys
+ * Uniform Inserts
+ * Uniform Removes
+ */
+class UniformInsertRangedUniformRemoveInteger : public InsertAndRangedRemoveStrategy< long long > {
+public:
+ UniformInsertRangedUniformRemoveInteger() :
+ _uniform_int( 0ULL, ~0ULL ),
+ _nextLongLong( randomNumberGenerator, _uniform_int ) {
+ }
+ /** Small likelihood of duplicates */
+ virtual long long insertVal() { return _nextLongLong(); }
+ virtual long long removeVal() { return _nextLongLong(); }
+private:
+ uniform_int< unsigned long long > _uniform_int;
+ variate_generator< mt19937&, uniform_int< unsigned long long > > _nextLongLong;
+};
+
+class UniformInsertUniformRemoveInteger : public InsertAndUniformRemoveStrategy< long long > {
+public:
+ virtual long long insertVal() { return _gen.insertVal(); }
+private:
+ UniformInsertRangedUniformRemoveInteger _gen;
+};
+
+/**
+ * String Keys
+ * Uniform Inserts
+ * Uniform Removes
+ */
+class UniformInsertRangedUniformRemoveString : public InsertAndRangedRemoveStrategy< string > {
+public:
+ UniformInsertRangedUniformRemoveString() :
+ _geometric_distribution( 0.9 ),
+ _nextLength( randomNumberGenerator, _geometric_distribution ),
+ _uniform_char( 'a', 'z' ),
+ _nextChar( randomNumberGenerator, _uniform_char ) {
+ }
+ /** Small likelihood of duplicates */
+ virtual string insertVal() { return nextString(); }
+ virtual string removeVal() { return nextString(); }
+private:
+ string nextString() {
+ // The longer the minimum string length, the lower the likelihood of duplicates
+ int len = _nextLength() + 5;
+ len = len > 100 ? 100 : len;
+ string ret( len, 'x' );
+ for( int i = 0; i < len; ++i ) {
+ ret[ i ] = _nextChar();
+ }
+ return ret;
+ }
+ geometric_distribution<> _geometric_distribution;
+ variate_generator< mt19937&, geometric_distribution<> > _nextLength;
+ uniform_int< char > _uniform_char;
+ variate_generator< mt19937&, uniform_int< char > > _nextChar;
+};
+
+class UniformInsertUniformRemoveString : public InsertAndUniformRemoveStrategy< string > {
+public:
+ virtual string insertVal() { return _gen.insertVal(); }
+private:
+ UniformInsertRangedUniformRemoveString _gen;
+};
+
+/**
+ * OID Keys
+ * Increasing Inserts
+ * Uniform Removes
+ */
+class IncreasingInsertRangedUniformRemoveOID : public InsertAndRangedRemoveStrategy< OID > {
+public:
+ IncreasingInsertRangedUniformRemoveOID() :
+ _max( -1 ) {
+ }
+ virtual OID insertVal() { return oidFromULL( ++_max ); }
+ virtual OID removeVal() {
+ uniform_int< unsigned long long > distribution( 0, _max > 0 ? _max : 0 );
+ variate_generator< mt19937&, uniform_int< unsigned long long > > generator( randomNumberGenerator, distribution );
+ return oidFromULL( generator() );
+ }
+private:
+ static OID oidFromULL( unsigned long long val ) {
+ val = __builtin_bswap64( val );
+ OID oid;
+ oid.clear();
+ memcpy( (char*)&oid + 4, &val, 8 );
+ return oid;
+ }
+ long long _max;
+};
+
+class IncreasingInsertUniformRemoveOID : public InsertAndUniformRemoveStrategy< OID > {
+public:
+ virtual OID insertVal() { return _gen.insertVal(); }
+private:
+ IncreasingInsertRangedUniformRemoveOID _gen;
+};
+
+/**
+ * Integer Keys
+ * Increasing Inserts
+ * Increasing Removes (on remove, the lowest key is always removed)
+ */
+class IncreasingInsertIncreasingRemoveInteger : public InsertAndRemoveStrategy {
+public:
+ IncreasingInsertIncreasingRemoveInteger() :
+ // Start with a large value so data type will be preserved if we round
+ // trip through json.
+ _min( 1LL << 32 ),
+ _max( 1LL << 32 ) {
+ }
+ virtual BSONObj insertObj() { return insertObjWithVal( ++_max ); }
+ virtual BSONObj removeObj() { return removeObjWithVal( _min < _max ? ++_min : _min ); }
+private:
+ long long _min;
+ long long _max;
+};
+
+/** Generate a random boolean value. */
+class BernoulliGenerator {
+public:
+ /**
+ * @param excessFalsePercent This specifies the desired rate of false values
+ * vs true values. If we want false to be 5% more likely than true, we
+ * specify 5 for this argument.
+ */
+ BernoulliGenerator( int excessFalsePercent ) :
+ _bernoulli_distribution( 1.0 / ( 2.0 + excessFalsePercent / 100.0 ) ),
+ _generator( randomNumberGenerator, _bernoulli_distribution ) {
+ }
+ bool operator()() { return _generator(); }
+private:
+ bernoulli_distribution<> _bernoulli_distribution;
+ variate_generator< mt19937&, bernoulli_distribution<> > _generator;
+};
+
+/** Runs a strategy on a connection, with specified mix of inserts and removes. */
+class InsertAndRemoveRunner {
+public:
+ InsertAndRemoveRunner( DBClientConnection &conn, InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
+ _conn( conn ),
+ _strategy( strategy ),
+ _nextOpTypeRemove( excessInsertPercent ) {
+ }
+ void writeOne() {
+ if ( _nextOpTypeRemove() ) {
+ _conn.remove( ns, _strategy.removeObj(), true );
+ }
+ else {
+ _conn.insert( ns, _strategy.insertObj() );
+ }
+ }
+private:
+ DBClientConnection &_conn;
+ InsertAndRemoveStrategy &_strategy;
+ BernoulliGenerator _nextOpTypeRemove;
+};
+
+/**
+ * Writes a test script to cout based on a strategy and specified mix of inserts
+ * and removes. The script can be subsequently executed by InsertAndRemoveRunner.
+ * Script generation is intended for strategies that are memory or cpu intensive
+ * and might either divert resources from a mongod instance being analyzed on the
+ * same machine or fail to generate requests as quickly as the mongod might
+ * accept them.
+ * The script contains one line per operation. Each line begins
+ * with a letter indicating the operation type, followed by a space. Next
+ * follows the json representation of a document for the specified operation
+ * type.
+ */
+class InsertAndRemoveScriptGenerator {
+public:
+ InsertAndRemoveScriptGenerator( InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
+ _strategy( strategy ),
+ _nextOpTypeRemove( excessInsertPercent ) {
+ }
+ void writeOne() {
+ if ( _nextOpTypeRemove() ) {
+ cout << "r " << _strategy.removeObj().jsonString() << endl;
+ }
+ else {
+ cout << "i " << _strategy.insertObj().jsonString() << endl;
+ }
+ }
+private:
+ InsertAndRemoveStrategy &_strategy;
+ BernoulliGenerator _nextOpTypeRemove;
+};
+
+/**
+ * Run a test script from cin that was generated by
+ * InsertAndRemoveScriptGenerator. Running the script is intended to be
+ * lightweight in terms of memory and cpu usage, and fast.
+ */
+class InsertAndRemoveScriptRunner {
+public:
+ InsertAndRemoveScriptRunner( DBClientConnection &conn ) :
+ _conn( conn ) {
+ }
+ void writeOne() {
+ cin.getline( _buf, 1024 );
+ BSONObj val = fromjson( _buf + 2 );
+ if ( _buf[ 0 ] == 'r' ) {
+ _conn.remove( ns, val, true );
+ }
+ else {
+ _conn.insert( ns, val );
+ }
+ }
+private:
+ DBClientConnection &_conn;
+ char _buf[ 1024 ];
+};
+
+int main( int argc, const char **argv ) {
+
+ DBClientConnection conn;
+ conn.connect( "127.0.0.1:27017" );
+ conn.dropCollection( ns );
+
+// UniformInsertRangedUniformRemoveInteger strategy;
+// UniformInsertUniformRemoveInteger strategy;
+// UniformInsertRangedUniformRemoveString strategy;
+// UniformInsertUniformRemoveString strategy;
+// IncreasingInsertRangedUniformRemoveOID strategy;
+// IncreasingInsertUniformRemoveOID strategy;
+// IncreasingInsertIncreasingRemoveInteger strategy;
+// InsertAndRemoveScriptGenerator runner( strategy, 5 );
+ InsertAndRemoveScriptRunner runner( conn );
+
+ Timer t;
+ BSONObj statsCmd = BSON( "collstats" << index_collection );
+
+ // Print header, unless we are generating a script (in that case, comment this out).
+ cout << "ops,milliseconds,docs,totalBucketSize" << endl;
+
+ long long i = 0;
+ long long n = 10000000000;
+ while( i < n ) {
+ runner.writeOne();
+ // Print statistics, unless we are generating a script (in that case, comment this out).
+ // The stats collection requests below provide regular read operations,
+ // ensuring we are caught up with the progress being made by the mongod
+ // under analysis.
+ if ( ++i % 50000 == 0 ) {
+ // The total number of documents present.
+ long long docs = conn.count( ns );
+ BSONObj result;
+ conn.runCommand( db, statsCmd, result );
+ // The total number of bytes used for all allocated 8K buckets of the
+ // btree.
+ long long totalBucketSize = result.getField( "count" ).numberLong() * 8192;
+ cout << i << ',' << t.millis() << ',' << docs << ',' << totalBucketSize << endl;
+ }
+ }
+}