1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
|
// btreeperf.cpp
/* Copyright 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
/**
* Performance timing and space utilization testing for btree indexes.
*/
#include <iostream>
#include <boost/random/bernoulli_distribution.hpp>
#include <boost/random/geometric_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/variate_generator.hpp>
#include <boost/random/uniform_int.hpp>
#include "client/dbclient.h"
#include "mongo/util/timer.h"
using namespace std;
using namespace mongo;
using namespace boost;
const char *ns = "test.btreeperf";
const char *db = "test";
const char *index_collection = "btreeperf.$_id_";
// This random number generator has a much larger period than the default
// generator and is half as fast as the default. Given that we intend to
// generate large numbers of documents and will utilize more than one random
// sample per document, choosing this generator seems like a worthwhile tradeoff.
mt19937 randomNumberGenerator;
/**
* An interface for generating documents to be inserted and document specs for
* remove requests.
*/
class InsertAndRemoveStrategy {
public:
virtual ~InsertAndRemoveStrategy() {}
virtual BSONObj insertObj() = 0;
virtual BSONObj removeObj() = 0;
protected:
/**
* Helper functions for converting a sample value to a sample object with
* specified _id, to be inserted or removed.
*/
template< class T >
BSONObj insertObjWithVal( const T &val ) {
BSONObjBuilder b;
b.append( "_id", val );
return b.obj();
}
template< class T >
BSONObj removeObjWithVal( const T &val ) {
BSONObjBuilder b;
b.append( "_id", val );
return b.obj();
}
};
/**
* Manages a set of elements of type T. Supports inserting unique elements and
* sampling a random element without replacement.
*
* TODO In the contexts where this class is currently used, duplicate keys are
* either impossible or highly unlikely. And an occasional duplicate value will
* not much affect the procedure by which a random element is chosen. We could
* stop checking for duplicates in push(), eliminate _set from the implementation,
* and potentially improve performance and memory requirements somewhat.
*/
template< class T >
class SetSampler {
public:
/** @param val Insert this value in the set if not already present. */
void push( const T& val ) {
if ( _set.insert( val ).second ) {
_vector.push_back( val );
}
}
/** @return a random element removed from the set */
T pull() {
if ( _vector.size() == 0 ) {
return T();
}
uniform_int< size_t > sizeRange( 0, _vector.size() - 1 );
variate_generator< mt19937&, uniform_int< size_t > > sizeGenerator( randomNumberGenerator, sizeRange );
size_t toRemove = sizeGenerator();
T val = _vector[ toRemove ];
// Replace the random element with the last element, then remove the
// last element.
_vector[ toRemove ] = _vector.back();
_vector.pop_back();
_set.erase( val );
return val;
}
private:
vector< T > _vector;
set< T > _set;
};
/**
* Tracks values that have been specified for insertion by the derived class's
* implementation of insertVal() and selects uniformally from among values that
* have been inserted but not yet removed for the next value to remove.
*
* The implementation is probabilistically sound, but may be resource intensive
* and slow due to the use of a SetSampler.
*/
template< class T >
class InsertAndUniformRemoveStrategy : public InsertAndRemoveStrategy {
public:
virtual BSONObj insertObj() {
T val = insertVal();
_sampler.push( val );
return insertObjWithVal( val );
}
virtual BSONObj removeObj() { return removeObjWithVal( _sampler.pull() ); }
protected:
/** @return value to insert. This is the only function a derived class need implement. */
virtual T insertVal() = 0;
private:
SetSampler< T > _sampler;
};
/**
* The derived class supplies keys to be inserted and removed. The key removal
* strategy is similar to the strategy for selecting a random element described
* in the MongoDB cookbook: the first key in the collection greater than or
* equal to the supplied removal key is removed. This allows selecting an
* exising key for removal without the overhead required by a SetSampler.
*
* While this ranged selection strategy can work well for selecting a random
* element, there are some theoretical and empirically observed shortcomings
* when the strategy is applied to removing nodes for btree performance measurement:
* 1 The likelihood that a given key is removed is proportional to the difference
* in value between it and the previous key. Because key deletion increases
* the difference in value between adjacent keys, neighboring keys will be
* more likely to be deleted than they would be in a true uniform distribution.
* 2 MongoDB 1.6 uses 'unused' nodes in the btree implementation. With a ranged
* removal strategy, those nodes must be traversed to find a node available
* for removal.
* 3 Ranged removal was observed to be biased against the balancing policy of
* MongoDB 1.7 in some cases, in terms of storage size. This may be a
* consequence of point 1 above.
* 4 Ranged removal was observed to be significantly biased against the btree
* implementation in MongoDB 1.6 in terms of performance. This is likely a
* consequence of point 2 above.
* 5 In some cases the biases described above were not evident in tests lasting
* several minutes, but were evident in tests lasting several hours.
*/
template< class T >
class InsertAndRangedRemoveStrategy : public InsertAndRemoveStrategy {
public:
virtual BSONObj insertObj() { return insertObjWithVal( insertVal() ); }
virtual BSONObj removeObj() { return rangedRemoveObjWithVal( removeVal() ); }
protected:
/** Small likelihood that this removal spec will not match any document */
template< class U >
BSONObj rangedRemoveObjWithVal( const U &val ) {
BSONObjBuilder b1;
BSONObjBuilder b2( b1.subobjStart( "_id" ) );
b2.append( "$gte", val );
b2.done();
return b1.obj();
}
virtual T insertVal() = 0;
virtual T removeVal() = 0;
};
/**
* Integer Keys
* Uniform Inserts
* Uniform Removes
*/
class UniformInsertRangedUniformRemoveInteger : public InsertAndRangedRemoveStrategy< long long > {
public:
UniformInsertRangedUniformRemoveInteger() :
_uniform_int( 0ULL, ~0ULL ),
_nextLongLong( randomNumberGenerator, _uniform_int ) {
}
/** Small likelihood of duplicates */
virtual long long insertVal() { return _nextLongLong(); }
virtual long long removeVal() { return _nextLongLong(); }
private:
uniform_int< unsigned long long > _uniform_int;
variate_generator< mt19937&, uniform_int< unsigned long long > > _nextLongLong;
};
class UniformInsertUniformRemoveInteger : public InsertAndUniformRemoveStrategy< long long > {
public:
virtual long long insertVal() { return _gen.insertVal(); }
private:
UniformInsertRangedUniformRemoveInteger _gen;
};
/**
* String Keys
* Uniform Inserts
* Uniform Removes
*/
class UniformInsertRangedUniformRemoveString : public InsertAndRangedRemoveStrategy< string > {
public:
UniformInsertRangedUniformRemoveString() :
_geometric_distribution( 0.9 ),
_nextLength( randomNumberGenerator, _geometric_distribution ),
_uniform_char( 'a', 'z' ),
_nextChar( randomNumberGenerator, _uniform_char ) {
}
/** Small likelihood of duplicates */
virtual string insertVal() { return nextString(); }
virtual string removeVal() { return nextString(); }
private:
string nextString() {
// The longer the minimum string length, the lower the likelihood of duplicates
int len = _nextLength() + 5;
len = len > 100 ? 100 : len;
string ret( len, 'x' );
for( int i = 0; i < len; ++i ) {
ret[ i ] = _nextChar();
}
return ret;
}
geometric_distribution<> _geometric_distribution;
variate_generator< mt19937&, geometric_distribution<> > _nextLength;
uniform_int< char > _uniform_char;
variate_generator< mt19937&, uniform_int< char > > _nextChar;
};
class UniformInsertUniformRemoveString : public InsertAndUniformRemoveStrategy< string > {
public:
virtual string insertVal() { return _gen.insertVal(); }
private:
UniformInsertRangedUniformRemoveString _gen;
};
/**
* OID Keys
* Increasing Inserts
* Uniform Removes
*/
class IncreasingInsertRangedUniformRemoveOID : public InsertAndRangedRemoveStrategy< OID > {
public:
IncreasingInsertRangedUniformRemoveOID() :
_max( -1 ) {
}
virtual OID insertVal() { return oidFromULL( ++_max ); }
virtual OID removeVal() {
uniform_int< unsigned long long > distribution( 0, _max > 0 ? _max : 0 );
variate_generator< mt19937&, uniform_int< unsigned long long > > generator( randomNumberGenerator, distribution );
return oidFromULL( generator() );
}
private:
static OID oidFromULL( unsigned long long val ) {
val = __builtin_bswap64( val );
OID oid;
oid.clear();
memcpy( (char*)&oid + 4, &val, 8 );
return oid;
}
long long _max;
};
class IncreasingInsertUniformRemoveOID : public InsertAndUniformRemoveStrategy< OID > {
public:
virtual OID insertVal() { return _gen.insertVal(); }
private:
IncreasingInsertRangedUniformRemoveOID _gen;
};
/**
* Integer Keys
* Increasing Inserts
* Increasing Removes (on remove, the lowest key is always removed)
*/
class IncreasingInsertIncreasingRemoveInteger : public InsertAndRemoveStrategy {
public:
IncreasingInsertIncreasingRemoveInteger() :
// Start with a large value so data type will be preserved if we round
// trip through json.
_min( 1LL << 32 ),
_max( 1LL << 32 ) {
}
virtual BSONObj insertObj() { return insertObjWithVal( ++_max ); }
virtual BSONObj removeObj() { return removeObjWithVal( _min < _max ? ++_min : _min ); }
private:
long long _min;
long long _max;
};
/** Generate a random boolean value. */
class BernoulliGenerator {
public:
/**
* @param excessFalsePercent This specifies the desired rate of false values
* vs true values. If we want false to be 5% more likely than true, we
* specify 5 for this argument.
*/
BernoulliGenerator( int excessFalsePercent ) :
_bernoulli_distribution( 1.0 / ( 2.0 + excessFalsePercent / 100.0 ) ),
_generator( randomNumberGenerator, _bernoulli_distribution ) {
}
bool operator()() { return _generator(); }
private:
bernoulli_distribution<> _bernoulli_distribution;
variate_generator< mt19937&, bernoulli_distribution<> > _generator;
};
/** Runs a strategy on a connection, with specified mix of inserts and removes. */
class InsertAndRemoveRunner {
public:
InsertAndRemoveRunner( DBClientConnection &conn, InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
_conn( conn ),
_strategy( strategy ),
_nextOpTypeRemove( excessInsertPercent ) {
}
void writeOne() {
if ( _nextOpTypeRemove() ) {
_conn.remove( ns, _strategy.removeObj(), true );
}
else {
_conn.insert( ns, _strategy.insertObj() );
}
}
private:
DBClientConnection &_conn;
InsertAndRemoveStrategy &_strategy;
BernoulliGenerator _nextOpTypeRemove;
};
/**
* Writes a test script to cout based on a strategy and specified mix of inserts
* and removes. The script can be subsequently executed by InsertAndRemoveRunner.
* Script generation is intended for strategies that are memory or cpu intensive
* and might either divert resources from a mongod instance being analyzed on the
* same machine or fail to generate requests as quickly as the mongod might
* accept them.
* The script contains one line per operation. Each line begins
* with a letter indicating the operation type, followed by a space. Next
* follows the json representation of a document for the specified operation
* type.
*/
class InsertAndRemoveScriptGenerator {
public:
InsertAndRemoveScriptGenerator( InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
_strategy( strategy ),
_nextOpTypeRemove( excessInsertPercent ) {
}
void writeOne() {
if ( _nextOpTypeRemove() ) {
cout << "r " << _strategy.removeObj().jsonString() << endl;
}
else {
cout << "i " << _strategy.insertObj().jsonString() << endl;
}
}
private:
InsertAndRemoveStrategy &_strategy;
BernoulliGenerator _nextOpTypeRemove;
};
/**
* Run a test script from cin that was generated by
* InsertAndRemoveScriptGenerator. Running the script is intended to be
* lightweight in terms of memory and cpu usage, and fast.
*/
class InsertAndRemoveScriptRunner {
public:
InsertAndRemoveScriptRunner( DBClientConnection &conn ) :
_conn( conn ) {
}
void writeOne() {
cin.getline( _buf, 1024 );
BSONObj val = fromjson( _buf + 2 );
if ( _buf[ 0 ] == 'r' ) {
_conn.remove( ns, val, true );
}
else {
_conn.insert( ns, val );
}
}
private:
DBClientConnection &_conn;
char _buf[ 1024 ];
};
int main( int argc, const char **argv ) {
DBClientConnection conn;
conn.connect( "127.0.0.1:27017" );
conn.dropCollection( ns );
// UniformInsertRangedUniformRemoveInteger strategy;
// UniformInsertUniformRemoveInteger strategy;
// UniformInsertRangedUniformRemoveString strategy;
// UniformInsertUniformRemoveString strategy;
// IncreasingInsertRangedUniformRemoveOID strategy;
// IncreasingInsertUniformRemoveOID strategy;
// IncreasingInsertIncreasingRemoveInteger strategy;
// InsertAndRemoveScriptGenerator runner( strategy, 5 );
InsertAndRemoveScriptRunner runner( conn );
Timer t;
BSONObj statsCmd = BSON( "collstats" << index_collection );
// Print header, unless we are generating a script (in that case, comment this out).
cout << "ops,milliseconds,docs,totalBucketSize" << endl;
long long i = 0;
long long n = 10000000000;
while( i < n ) {
runner.writeOne();
// Print statistics, unless we are generating a script (in that case, comment this out).
// The stats collection requests below provide regular read operations,
// ensuring we are caught up with the progress being made by the mongod
// under analysis.
if ( ++i % 50000 == 0 ) {
// The total number of documents present.
long long docs = conn.count( ns );
BSONObj result;
conn.runCommand( db, statsCmd, result );
// The total number of bytes used for all allocated 8K buckets of the
// btree.
long long totalBucketSize = result.getField( "count" ).numberLong() * 8192;
cout << i << ',' << t.millis() << ',' << docs << ',' << totalBucketSize << endl;
}
}
}
|