diff options
45 files changed, 3765 insertions, 2597 deletions
diff --git a/SConstruct b/SConstruct index 88205906ab3..8405ef4bdbd 100644 --- a/SConstruct +++ b/SConstruct @@ -354,7 +354,7 @@ if has_option( "asio" ): coreServerFiles += [ "util/message_server_asio.cpp" ] # mongod files - also files used in tools -serverOnlyFiles = Split( "util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/queryoptimizer.cpp db/extsort.cpp db/cmdline.cpp" ) +serverOnlyFiles = Split( "db/key.cpp db/btreebuilder.cpp util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/queryoptimizer.cpp db/extsort.cpp db/cmdline.cpp" ) serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" ) diff --git a/bson/bsonobjbuilder.h b/bson/bsonobjbuilder.h index ae655cbd6da..27d4201600b 100644 --- a/bson/bsonobjbuilder.h +++ b/bson/bsonobjbuilder.h @@ -377,12 +377,13 @@ namespace mongo { return *this; } - /** Append a string element. len DOES include terminating nul */ - BSONObjBuilder& append(const StringData& fieldName, const char *str, int len) { + /** Append a string element. + @param sz size includes terminating null character */ + BSONObjBuilder& append(const StringData& fieldName, const char *str, int sz) { _b.appendNum((char) String); _b.appendStr(fieldName); - _b.appendNum((int)len); - _b.appendBuf(str, len); + _b.appendNum((int)sz); + _b.appendBuf(str, sz); return *this; } /** Append a string element */ diff --git a/bson/util/builder.h b/bson/util/builder.h index 6f4ff9eabd3..68994b35399 100644 --- a/bson/util/builder.h +++ b/bson/util/builder.h @@ -73,7 +73,10 @@ namespace mongo { } } - void reset( int maxSize = 0 ) { + void reset() { + l = 0; + } + void reset( int maxSize ) { l = 0; if ( maxSize && size > maxSize ) { free(data); @@ -94,6 +97,9 @@ namespace mongo { /* assume ownership of the buffer - you must then free() it */ void decouple() { data = 0; } + void appendUChar(unsigned char j) { + *((unsigned char*)grow(sizeof(unsigned char))) = j; + } void appendChar(char j) { *((char*)grow(sizeof(char))) = j; } diff --git a/client/dbclient.cpp b/client/dbclient.cpp index 53cd392ae8d..44ff45b2e3e 100644 --- a/client/dbclient.cpp +++ b/client/dbclient.cpp @@ -824,7 +824,7 @@ namespace mongo { return ss.str(); } - bool DBClientWithCommands::ensureIndex( const string &ns , BSONObj keys , bool unique, const string & name , bool cache, bool background ) { + bool DBClientWithCommands::ensureIndex( const string &ns , BSONObj keys , bool unique, const string & name , bool cache, bool background, int version ) { BSONObjBuilder toSave; toSave.append( "ns" , ns ); toSave.append( "key" , keys ); @@ -842,6 +842,9 @@ namespace mongo { cacheKey += nn; } + if( version >= 0 ) + toSave.append("v", version); + if ( unique ) toSave.appendBool( "unique", unique ); diff --git a/client/dbclient.h b/client/dbclient.h index 03030a3c6be..e219f7127f3 100644 --- a/client/dbclient.h +++ b/client/dbclient.h @@ -647,11 +647,12 @@ namespace mongo { @param name if not specified, it will be created from the keys automatically (which is recommended) @param cache if set to false, the index cache for the connection won't remember this call @param background build index in the background (see mongodb docs/wiki for details) + @param v index version. leave at default value. (unit tests set this parameter.) @return whether or not sent message to db. should be true on first call, false on subsequent unless resetIndexCache was called */ virtual bool ensureIndex( const string &ns , BSONObj keys , bool unique = false, const string &name = "", - bool cache = true, bool background = false ); + bool cache = true, bool background = false, int v = -1 ); /** clears the index cache, so the subsequent call to ensureIndex for any index will go to the server diff --git a/db/btree.cpp b/db/btree.cpp index 4162d6a381e..cd8b2f5d715 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -27,33 +27,38 @@ #include "curop-inl.h" #include "stats/counters.h" #include "dur_commitjob.h" +#include "btreebuilder.h" namespace mongo { -#define VERIFYTHISLOC dassert( thisLoc.btree() == this ); + BOOST_STATIC_ASSERT( Record::HeaderSize == 16 ); + BOOST_STATIC_ASSERT( Record::HeaderSize + BtreeData_V1::BucketSize == 8192 ); + +#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this ); /** * give us a writable version of the btree bucket (declares write intent). * note it is likely more efficient to declare write intent on something smaller when you can. */ - BtreeBucket* DiskLoc::btreemod() const { + template< class V > + BtreeBucket<V> * DiskLoc::btreemod() const { assert( _a != -1 ); - BtreeBucket *b = const_cast< BtreeBucket * >( btree() ); - return static_cast< BtreeBucket* >( getDur().writingPtr( b, BucketSize ) ); + BtreeBucket<V> *b = const_cast< BtreeBucket<V> * >( btree<V>() ); + return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, V::BucketSize ) ); } _KeyNode& _KeyNode::writing() const { return *getDur().writing( const_cast< _KeyNode* >( this ) ); } - KeyNode::KeyNode(const BucketBasics& bb, const _KeyNode &k) : + template< class V > + BucketBasics<V>::KeyNode::KeyNode(const BucketBasics<V>& bb, const _KeyNode &k) : prevChildBucket(k.prevChildBucket), recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs()) { } - // largest key size we allow. note we very much need to support bigger keys (somehow) in the future. - static const int KeyMax = BucketSize / 10; - + // BucketBasics::lowWaterMark() + // // We define this value as the maximum number of bytes such that, if we have // fewer than this many bytes, we must be able to either merge with or receive // keys from any neighboring node. If our utilization goes below this value we @@ -65,12 +70,11 @@ namespace mongo { // rebalancedSeparatorPos(). The conditions for lowWaterMark - 1 are as // follows: We know we cannot merge with the neighbor, so the total data size // for us, the neighbor, and the separator must be at least - // BtreeBucket::bodySize() + 1. We must be able to accept one key of any + // BtreeBucket<V>::bodySize() + 1. We must be able to accept one key of any // allowed size, so our size plus storage for that additional key must be - // <= BtreeBucket::bodySize() / 2. This way, with the extra key we'll have a + // <= BtreeBucket<V>::bodySize() / 2. This way, with the extra key we'll have a // new bucket data size < half the total data size and by the implementation // of rebalancedSeparatorPos() the key must be added. - static const int lowWaterMark = BtreeBucket::bodySize() / 2 - KeyMax - sizeof( _KeyNode ) + 1; static const int split_debug = 0; static const int insert_debug = 0; @@ -88,47 +92,57 @@ namespace mongo { /* BucketBasics --------------------------------------------------- */ - void BucketBasics::assertWritable() { + template< class V > + void BucketBasics<V>::assertWritable() { if( cmdLine.dur ) - dur::assertAlreadyDeclared(this, sizeof(*this)); + dur::assertAlreadyDeclared(this, V::BucketSize); } - string BtreeBucket::bucketSummary() const { + template< class V > + string BtreeBucket<V>::bucketSummary() const { stringstream ss; ss << " Bucket info:" << endl; - ss << " n: " << n << endl; - ss << " parent: " << parent.toString() << endl; - ss << " nextChild: " << parent.toString() << endl; - ss << " flags:" << flags << endl; - ss << " emptySize: " << emptySize << " topSize: " << topSize << endl; + ss << " n: " << this->n << endl; + ss << " parent: " << this->parent.toString() << endl; + ss << " nextChild: " << this->parent.toString() << endl; + ss << " flags:" << this->flags << endl; + ss << " emptySize: " << this->emptySize << " topSize: " << this->topSize << endl; return ss.str(); } - int BucketBasics::Size() const { - assert( _wasSize == BucketSize ); - return BucketSize; + template< class V > + int BucketBasics<V>::Size() const { + return V::BucketSize; } - void BucketBasics::_shape(int level, stringstream& ss) const { + template< class V > + void BucketBasics<V>::_shape(int level, stringstream& ss) const { for ( int i = 0; i < level; i++ ) ss << ' '; ss << "*\n"; - for ( int i = 0; i < n; i++ ) - if ( !k(i).prevChildBucket.isNull() ) - k(i).prevChildBucket.btree()->_shape(level+1,ss); - if ( !nextChild.isNull() ) - nextChild.btree()->_shape(level+1,ss); + for ( int i = 0; i < this->n; i++ ) { + if ( !k(i).prevChildBucket.isNull() ) { + DiskLoc ll = k(i).prevChildBucket; + ll.btree<V>()->_shape(level+1,ss); + } + } + if ( !this->nextChild.isNull() ) { + DiskLoc ll = this->nextChild; + ll.btree<V>()->_shape(level+1,ss); + } } int bt_fv=0; int bt_dmp=0; - void BtreeBucket::dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const { + template< class V > + void BtreeBucket<V>::dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const { bt_dmp=1; fullValidate(thisLoc, order); bt_dmp=0; } - long long BtreeBucket::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount, bool strict) const { + template< class V > + long long BtreeBucket<V>::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount, bool strict) const { { bool f = false; assert( f = true ); @@ -136,7 +150,7 @@ namespace mongo { } killCurrentOp.checkForInterrupt(); - assertValid(order, true); + this->assertValid(order, true); if ( bt_dmp ) { out() << thisLoc.toString() << ' '; @@ -146,8 +160,8 @@ namespace mongo { // keycount long long kc = 0; - for ( int i = 0; i < n; i++ ) { - const _KeyNode& kn = k(i); + for ( int i = 0; i < this->n; i++ ) { + const _KeyNode& kn = this->k(i); if ( kn.isUsed() ) { kc++; @@ -159,7 +173,7 @@ namespace mongo { } if ( !kn.prevChildBucket.isNull() ) { DiskLoc left = kn.prevChildBucket; - const BtreeBucket *b = left.btree(); + const BtreeBucket *b = left.btree<V>(); if ( strict ) { assert( b->parent == thisLoc ); } @@ -169,15 +183,16 @@ namespace mongo { kc += b->fullValidate(kn.prevChildBucket, order, unusedCount, strict); } } - if ( !nextChild.isNull() ) { - const BtreeBucket *b = nextChild.btree(); + if ( !this->nextChild.isNull() ) { + DiskLoc ll = this->nextChild; + const BtreeBucket *b = ll.btree<V>(); if ( strict ) { assert( b->parent == thisLoc ); } else { wassert( b->parent == thisLoc ); } - kc += b->fullValidate(nextChild, order, unusedCount, strict); + kc += b->fullValidate(this->nextChild, order, unusedCount, strict); } return kc; @@ -185,12 +200,13 @@ namespace mongo { int nDumped = 0; - void BucketBasics::assertValid(const Ordering &order, bool force) const { + template< class V > + void BucketBasics<V>::assertValid(const Ordering &order, bool force) const { if ( !debug && !force ) return; - wassert( n >= 0 && n < Size() ); - wassert( emptySize >= 0 && emptySize < BucketSize ); - wassert( topSize >= n && topSize <= BucketSize ); + wassert( this->n >= 0 && this->n < Size() ); + wassert( this->emptySize >= 0 && this->emptySize < V::BucketSize ); + wassert( this->topSize >= this->n && this->topSize <= V::BucketSize ); // this is very slow so don't do often { @@ -201,17 +217,17 @@ namespace mongo { DEV { // slow: - for ( int i = 0; i < n-1; i++ ) { - BSONObj k1 = keyNode(i).key; - BSONObj k2 = keyNode(i+1).key; + for ( int i = 0; i < this->n-1; i++ ) { + Key k1 = keyNode(i).key; + Key k2 = keyNode(i+1).key; int z = k1.woCompare(k2, order); //OK if ( z > 0 ) { out() << "ERROR: btree key order corrupt. Keys:" << endl; if ( ++nDumped < 5 ) { - for ( int j = 0; j < n; j++ ) { + for ( int j = 0; j < this->n; j++ ) { out() << " " << keyNode(j).key.toString() << endl; } - ((BtreeBucket *) this)->dump(); + ((BtreeBucket<V> *) this)->dump(); } wassert(false); break; @@ -228,15 +244,15 @@ namespace mongo { } else { //faster: - if ( n > 1 ) { - BSONObj k1 = keyNode(0).key; - BSONObj k2 = keyNode(n-1).key; + if ( this->n > 1 ) { + Key k1 = keyNode(0).key; + Key k2 = keyNode(this->n-1).key; int z = k1.woCompare(k2, order); //wassert( z <= 0 ); if ( z > 0 ) { problem() << "btree keys out of order" << '\n'; ONCE { - ((BtreeBucket *) this)->dump(); + ((BtreeBucket<V> *) this)->dump(); } assert(false); } @@ -244,54 +260,59 @@ namespace mongo { } } - inline void BucketBasics::markUnused(int keypos) { - assert( keypos >= 0 && keypos < n ); + template< class V > + inline void BucketBasics<V>::markUnused(int keypos) { + assert( keypos >= 0 && keypos < this->n ); k(keypos).setUnused(); } - inline int BucketBasics::totalDataSize() const { - return (int) (Size() - (data-(char*)this)); + template< class V > + inline int BucketBasics<V>::totalDataSize() const { + return (int) (Size() - (this->data-(char*)this)); } - void BucketBasics::init() { - parent.Null(); - nextChild.Null(); - _wasSize = BucketSize; - _reserved1 = 0; - flags = Packed; - n = 0; - emptySize = totalDataSize(); - topSize = 0; - reserved = 0; + template< class V > + void BucketBasics<V>::init() { + this->_init(); + this->parent.Null(); + this->nextChild.Null(); + this->flags = Packed; + this->n = 0; + this->emptySize = totalDataSize(); + this->topSize = 0; } /** see _alloc */ - inline void BucketBasics::_unalloc(int bytes) { - topSize -= bytes; - emptySize += bytes; + template< class V > + inline void BucketBasics<V>::_unalloc(int bytes) { + this->topSize -= bytes; + this->emptySize += bytes; } /** * we allocate space from the end of the buffer for data. * the keynodes grow from the front. */ - inline int BucketBasics::_alloc(int bytes) { - topSize += bytes; - emptySize -= bytes; - int ofs = totalDataSize() - topSize; + template< class V > + inline int BucketBasics<V>::_alloc(int bytes) { + this->topSize += bytes; + this->emptySize -= bytes; + dassert( this->emptySize >= 0 ); + int ofs = totalDataSize() - this->topSize; assert( ofs > 0 ); return ofs; } - void BucketBasics::_delKeyAtPos(int keypos, bool mayEmpty) { + template< class V > + void BucketBasics<V>::_delKeyAtPos(int keypos, bool mayEmpty) { // TODO This should be keypos < n - assert( keypos >= 0 && keypos <= n ); + assert( keypos >= 0 && keypos <= this->n ); assert( childForPos(keypos).isNull() ); // TODO audit cases where nextChild is null - assert( ( mayEmpty && n > 0 ) || n > 1 || nextChild.isNull() ); - emptySize += sizeof(_KeyNode); - n--; - for ( int j = keypos; j < n; j++ ) + assert( ( mayEmpty && this->n > 0 ) || this->n > 1 || this->nextChild.isNull() ); + this->emptySize += sizeof(_KeyNode); + this->n--; + for ( int j = keypos; j < this->n; j++ ) k(j) = k(j+1); setNotPacked(); } @@ -300,41 +321,53 @@ namespace mongo { * pull rightmost key from the bucket. this version requires its right child to be null so it * does not bother returning that value. */ - void BucketBasics::popBack(DiskLoc& recLoc, BSONObj& key) { - massert( 10282 , "n==0 in btree popBack()", n > 0 ); - assert( k(n-1).isUsed() ); // no unused skipping in this function at this point - btreebuilder doesn't require that - KeyNode kn = keyNode(n-1); + template< class V > + void BucketBasics<V>::popBack(DiskLoc& recLoc, Key& key) { + massert( 10282 , "n==0 in btree popBack()", this->n > 0 ); + assert( k(this->n-1).isUsed() ); // no unused skipping in this function at this point - btreebuilder doesn't require that + KeyNode kn = keyNode(this->n-1); recLoc = kn.recordLoc; key = kn.key; - int keysize = kn.key.objsize(); + int keysize = kn.key.dataSize(); - massert( 10283 , "rchild not null in btree popBack()", nextChild.isNull()); + massert( 10283 , "rchild not null in btree popBack()", this->nextChild.isNull()); // weirdly, we also put the rightmost down pointer in nextchild, even when bucket isn't full. - nextChild = kn.prevChildBucket; + this->nextChild = kn.prevChildBucket; - n--; + this->n--; // This is risky because the key we are returning points to this unalloc'ed memory, // and we are assuming that the last key points to the last allocated // bson region. - emptySize += sizeof(_KeyNode); + this->emptySize += sizeof(_KeyNode); _unalloc(keysize); } /** add a key. must be > all existing. be careful to set next ptr right. */ - bool BucketBasics::_pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) { - int bytesNeeded = key.objsize() + sizeof(_KeyNode); - if ( bytesNeeded > emptySize ) + template< class V > + bool BucketBasics<V>::_pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild) { + int bytesNeeded = key.dataSize() + sizeof(_KeyNode); + if ( bytesNeeded > this->emptySize ) return false; - assert( bytesNeeded <= emptySize ); - assert( n == 0 || keyNode(n-1).key.woCompare(key, order) <= 0 ); - emptySize -= sizeof(_KeyNode); - _KeyNode& kn = k(n++); + assert( bytesNeeded <= this->emptySize ); + if( this->n ) { + const KeyNode klast = keyNode(this->n-1); + if( klast.key.woCompare(key, order) > 0 ) { + log() << "btree bucket corrupt? consider reindexing or running validate command" << endl; + //cout << keyNode(n-1).key.toString() << endl; + //cout << key.toString() << endl; + assert(false); + } + } + this->emptySize -= sizeof(_KeyNode); + _KeyNode& kn = k(this->n++); kn.prevChildBucket = prevChild; kn.recordLoc = recordLoc; - kn.setKeyDataOfs( (short) _alloc(key.objsize()) ); - char *p = dataAt(kn.keyDataOfs()); - memcpy(p, key.objdata(), key.objsize()); + kn.setKeyDataOfs( (short) _alloc(key.dataSize()) ); + short ofs = kn.keyDataOfs(); + char *p = dataAt(ofs); + memcpy(p, key.data(), key.dataSize()); + return true; } @@ -346,19 +379,20 @@ namespace mongo { /** insert a key in a bucket with no complexity -- no splits required @return false if a split is required. */ - bool BucketBasics::basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const { - assert( keypos >= 0 && keypos <= n ); - int bytesNeeded = key.objsize() + sizeof(_KeyNode); - if ( bytesNeeded > emptySize ) { + template< class V > + bool BucketBasics<V>::basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const Key& key, const Ordering &order) const { + assert( keypos >= 0 && keypos <= this->n ); + int bytesNeeded = key.dataSize() + sizeof(_KeyNode); + if ( bytesNeeded > this->emptySize ) { _pack(thisLoc, order, keypos); - if ( bytesNeeded > emptySize ) + if ( bytesNeeded > this->emptySize ) return false; } BucketBasics *b; { const char *p = (const char *) &k(keypos); - const char *q = (const char *) &k(n+1); + const char *q = (const char *) &k(this->n+1); // declare that we will write to [k(keypos),k(n)] // todo: this writes a medium amount to the journal. we may want to add a verb "shift" to the redo log so // we can log a very small amount. @@ -368,11 +402,11 @@ namespace mongo { // 1 4 9 // -> // 1 4 _ 9 - for ( int j = n; j > keypos; j-- ) // make room + for ( int j = this->n; j > keypos; j-- ) // make room b->k(j) = b->k(j-1); } - getDur().declareWriteIntent(&b->emptySize, 12); // [b->emptySize..b->n] is 12 bytes and we are going to write those + getDur().declareWriteIntent(&b->emptySize, sizeof(this->emptySize)+sizeof(this->topSize)+sizeof(this->n)); b->emptySize -= sizeof(_KeyNode); b->n++; @@ -380,10 +414,10 @@ namespace mongo { _KeyNode& kn = b->k(keypos); kn.prevChildBucket.Null(); kn.recordLoc = recordLoc; - kn.setKeyDataOfs((short) b->_alloc(key.objsize()) ); + kn.setKeyDataOfs((short) b->_alloc(key.dataSize()) ); char *p = b->dataAt(kn.keyDataOfs()); - getDur().declareWriteIntent(p, key.objsize()); - memcpy(p, key.objdata(), key.objsize()); + getDur().declareWriteIntent(p, key.dataSize()); + memcpy(p, key.data(), key.dataSize()); return true; } @@ -391,20 +425,22 @@ namespace mongo { * With this implementation, refPos == 0 disregards effect of refPos. * index > 0 prevents creation of an empty bucket. */ - bool BucketBasics::mayDropKey( int index, int refPos ) const { + template< class V > + bool BucketBasics<V>::mayDropKey( int index, int refPos ) const { return index > 0 && ( index != refPos ) && k( index ).isUnused() && k( index ).prevChildBucket.isNull(); } - int BucketBasics::packedDataSize( int refPos ) const { - if ( flags & Packed ) { - return BucketSize - emptySize - headerSize(); + template< class V > + int BucketBasics<V>::packedDataSize( int refPos ) const { + if ( this->flags & Packed ) { + return V::BucketSize - this->emptySize - headerSize(); } int size = 0; - for( int j = 0; j < n; ++j ) { + for( int j = 0; j < this->n; ++j ) { if ( mayDropKey( j, refPos ) ) { continue; } - size += keyNode( j ).key.objsize() + sizeof( _KeyNode ); + size += keyNode( j ).key.dataSize() + sizeof( _KeyNode ); } return size; } @@ -413,8 +449,9 @@ namespace mongo { * when we delete things we just leave empty space until the node is * full and then we repack it. */ - void BucketBasics::_pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const { - if ( flags & Packed ) + template< class V > + void BucketBasics<V>::_pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const { + if ( this->flags & Packed ) return; VERIFYTHISLOC @@ -424,22 +461,23 @@ namespace mongo { declaration anyway within the group commit interval, in which case we would just be adding code and complexity without benefit. */ - thisLoc.btreemod()->_packReadyForMod(order, refPos); + thisLoc.btreemod<V>()->_packReadyForMod(order, refPos); } /** version when write intent already declared */ - void BucketBasics::_packReadyForMod( const Ordering &order, int &refPos ) { + template< class V > + void BucketBasics<V>::_packReadyForMod( const Ordering &order, int &refPos ) { assertWritable(); - if ( flags & Packed ) + if ( this->flags & Packed ) return; int tdz = totalDataSize(); - char temp[BucketSize]; + char temp[V::BucketSize]; int ofs = tdz; - topSize = 0; + this->topSize = 0; int i = 0; - for ( int j = 0; j < n; j++ ) { + for ( int j = 0; j < this->n; j++ ) { if( mayDropKey( j, refPos ) ) { continue; // key is unused and has no children - drop it } @@ -450,36 +488,37 @@ namespace mongo { k( i ) = k( j ); } short ofsold = k(i).keyDataOfs(); - int sz = keyNode(i).key.objsize(); + int sz = keyNode(i).key.dataSize(); ofs -= sz; - topSize += sz; + this->topSize += sz; memcpy(temp+ofs, dataAt(ofsold), sz); k(i).setKeyDataOfsSavingUse( ofs ); ++i; } - if ( refPos == n ) { + if ( refPos == this->n ) { refPos = i; } - n = i; + this->n = i; int dataUsed = tdz - ofs; - memcpy(data + ofs, temp + ofs, dataUsed); + memcpy(this->data + ofs, temp + ofs, dataUsed); // assertWritable(); // TEMP TEST getDur().declareWriteIntent(this, sizeof(*this)); - emptySize = tdz - dataUsed - n * sizeof(_KeyNode); - assert( emptySize >= 0 ); + this->emptySize = tdz - dataUsed - this->n * sizeof(_KeyNode); + assert( this->emptySize >= 0 ); setPacked(); assertValid( order ); } - inline void BucketBasics::truncateTo(int N, const Ordering &order, int &refPos) { + template< class V > + inline void BucketBasics<V>::truncateTo(int N, const Ordering &order, int &refPos) { dbMutex.assertWriteLocked(); assertWritable(); - n = N; + this->n = N; setNotPacked(); _packReadyForMod( order, refPos ); } @@ -501,16 +540,17 @@ namespace mongo { * * This function is expected to be called on a packed bucket. */ - int BucketBasics::splitPos( int keypos ) const { - assert( n > 2 ); + template< class V > + int BucketBasics<V>::splitPos( int keypos ) const { + assert( this->n > 2 ); int split = 0; int rightSize = 0; // when splitting a btree node, if the new key is greater than all the other keys, we should not do an even split, but a 90/10 split. // see SERVER-983 // TODO I think we only want to do the 90% split on the rhs node of the tree. - int rightSizeLimit = ( topSize + sizeof( _KeyNode ) * n ) / ( keypos == n ? 10 : 2 ); - for( int i = n - 1; i > -1; --i ) { - rightSize += keyNode( i ).key.objsize() + sizeof( _KeyNode ); + int rightSizeLimit = ( this->topSize + sizeof( _KeyNode ) * this->n ) / ( keypos == this->n ? 10 : 2 ); + for( int i = this->n - 1; i > -1; --i ) { + rightSize += keyNode( i ).key.dataSize() + sizeof( _KeyNode ); if ( rightSize > rightSizeLimit ) { split = i; break; @@ -520,37 +560,40 @@ namespace mongo { if ( split < 1 ) { split = 1; } - else if ( split > n - 2 ) { - split = n - 2; + else if ( split > this->n - 2 ) { + split = this->n - 2; } return split; } - void BucketBasics::reserveKeysFront( int nAdd ) { - assert( emptySize >= int( sizeof( _KeyNode ) * nAdd ) ); - emptySize -= sizeof( _KeyNode ) * nAdd; - for( int i = n - 1; i > -1; --i ) { + template< class V > + void BucketBasics<V>::reserveKeysFront( int nAdd ) { + assert( this->emptySize >= int( sizeof( _KeyNode ) * nAdd ) ); + this->emptySize -= sizeof( _KeyNode ) * nAdd; + for( int i = this->n - 1; i > -1; --i ) { k( i + nAdd ) = k( i ); } - n += nAdd; + this->n += nAdd; } - void BucketBasics::setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket ) { + template< class V > + void BucketBasics<V>::setKey( int i, const DiskLoc recordLoc, const Key &key, const DiskLoc prevChildBucket ) { _KeyNode &kn = k( i ); kn.recordLoc = recordLoc; kn.prevChildBucket = prevChildBucket; - short ofs = (short) _alloc( key.objsize() ); + short ofs = (short) _alloc( key.dataSize() ); kn.setKeyDataOfs( ofs ); char *p = dataAt( ofs ); - memcpy( p, key.objdata(), key.objsize() ); + memcpy( p, key.data(), key.dataSize() ); } - void BucketBasics::dropFront( int nDrop, const Ordering &order, int &refpos ) { - for( int i = nDrop; i < n; ++i ) { + template< class V > + void BucketBasics<V>::dropFront( int nDrop, const Ordering &order, int &refpos ) { + for( int i = nDrop; i < this->n; ++i ) { k( i - nDrop ) = k( i ); } - n -= nDrop; + this->n -= nDrop; setNotPacked(); _packReadyForMod( order, refpos ); } @@ -558,10 +601,11 @@ namespace mongo { /* - BtreeBucket --------------------------------------------------- */ /** @return largest key in the subtree. */ - void BtreeBucket::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) { + template< class V > + void BtreeBucket<V>::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) { DiskLoc loc = thisLoc; while ( 1 ) { - const BtreeBucket *b = loc.btree(); + const BtreeBucket *b = loc.btree<V>(); if ( !b->nextChild.isNull() ) { loc = b->nextChild; continue; @@ -580,8 +624,16 @@ namespace mongo { * not have more keys than an unsigned variable has bits. The same * assumption is used in the implementation below with respect to the 'mask' * variable. + * + * @param l a regular bsonobj + * @param rBegin composed partly of an existing bsonobj, and the remaining keys are taken from a vector of elements that frequently changes + * + * see + * jstests/index_check6.js + * https://jira.mongodb.org/browse/SERVER-371 */ - int BtreeBucket::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction ) { + template< class V > + int BtreeBucket<V>::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction ) { BSONObjIterator ll( l ); BSONObjIterator rr( rBegin ); vector< const BSONElement * >::const_iterator rr2 = rEnd.begin(); @@ -619,27 +671,29 @@ namespace mongo { return 0; } - bool BtreeBucket::exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const { - int pos; - bool found; - DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc); + template< class V > + bool BtreeBucket<V>::exists(const IndexDetails& idx, const DiskLoc &thisLoc, const Key& key, const Ordering& order) const { + int pos; + bool found; + DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc); - // skip unused keys - while ( 1 ) { - if( b.isNull() ) - break; - const BtreeBucket *bucket = b.btree(); - const _KeyNode& kn = bucket->k(pos); - if ( kn.isUsed() ) - return bucket->keyAt(pos).woEqual(key); - b = bucket->advance(b, pos, 1, "BtreeBucket::exists"); + // skip unused keys + while ( 1 ) { + if( b.isNull() ) + break; + const BtreeBucket *bucket = b.btree<V>(); + const _KeyNode& kn = bucket->k(pos); + if ( kn.isUsed() ) + return bucket->keyAt(pos).woEqual(key); + b = bucket->advance(b, pos, 1, "BtreeBucket<V>::exists"); } return false; } - bool BtreeBucket::wouldCreateDup( + template< class V > + bool BtreeBucket<V>::wouldCreateDup( const IndexDetails& idx, const DiskLoc &thisLoc, - const BSONObj& key, const Ordering& order, + const Key& key, const Ordering& order, const DiskLoc &self) const { int pos; bool found; @@ -647,24 +701,25 @@ namespace mongo { while ( !b.isNull() ) { // we skip unused keys - const BtreeBucket *bucket = b.btree(); + const BtreeBucket *bucket = b.btree<V>(); const _KeyNode& kn = bucket->k(pos); if ( kn.isUsed() ) { if( bucket->keyAt(pos).woEqual(key) ) return kn.recordLoc != self; break; } - b = bucket->advance(b, pos, 1, "BtreeBucket::dupCheck"); + b = bucket->advance(b, pos, 1, "BtreeBucket<V>::dupCheck"); } return false; } - string BtreeBucket::dupKeyError( const IndexDetails& idx , const BSONObj& key ) { + template< class V > + string BtreeBucket<V>::dupKeyError( const IndexDetails& idx , const Key& key ) { stringstream ss; ss << "E11000 duplicate key error "; ss << "index: " << idx.indexNamespace() << " "; - ss << "dup key: " << key; + ss << "dup key: " << key.toString(); return ss.str(); } @@ -682,30 +737,18 @@ namespace mongo { * returns n if it goes after the last existing key. * note result might be an Unused location! */ - char foo; - bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const { -#if defined(_EXPERIMENT1) - { - char *z = (char *) this; - int i = 0; - while( 1 ) { - i += 4096; - if( i >= BucketSize ) - break; - foo += z[i]; - } - } -#endif - + template< class V > + bool BtreeBucket<V>::find(const IndexDetails& idx, const Key& key, const DiskLoc &recordLoc, + const Ordering &order, int& pos, bool assertIfDup) const { globalIndexCounters.btree( (char*)this ); // binary search for this key bool dupsChecked = false; int l=0; - int h=n-1; + int h=this->n-1; while ( l <= h ) { int m = (l+h)/2; - KeyNode M = keyNode(m); + KeyNode M = this->keyNode(m); int x = key.woCompare(M.key, order); if ( x == 0 ) { if( assertIfDup ) { @@ -715,8 +758,8 @@ namespace mongo { // coding effort in here to make this particularly fast if( !dupsChecked ) { dupsChecked = true; - if( idx.head.btree()->exists(idx, idx.head, key, order) ) { - if( idx.head.btree()->wouldCreateDup(idx, idx.head, key, order, recordLoc) ) + if( idx.head.btree<V>()->exists(idx, idx.head, key, order) ) { + if( idx.head.btree<V>()->wouldCreateDup(idx, idx.head, key, order, recordLoc) ) uasserted( ASSERT_ID_DUPKEY , dupKeyError( idx , key ) ); else alreadyInIndex(); @@ -747,8 +790,8 @@ namespace mongo { } // not found pos = l; - if ( pos != n ) { - BSONObj keyatpos = keyNode(pos).key; + if ( pos != this->n ) { + Key keyatpos = keyNode(pos).key; wassert( key.woCompare(keyatpos, order) <= 0 ); if ( pos > 0 ) { wassert( keyNode(pos-1).key.woCompare(key, order) <= 0 ); @@ -758,38 +801,42 @@ namespace mongo { return false; } - void BtreeBucket::delBucket(const DiskLoc thisLoc, const IndexDetails& id) { + template< class V > + void BtreeBucket<V>::delBucket(const DiskLoc thisLoc, const IndexDetails& id) { ClientCursor::informAboutToDeleteBucket(thisLoc); // slow... assert( !isHead() ); - const BtreeBucket *p = parent.btree(); + DiskLoc ll = this->parent; + const BtreeBucket *p = ll.btree<V>(); int parentIdx = indexInParent( thisLoc ); p->childForPos( parentIdx ).writing().Null(); deallocBucket( thisLoc, id ); } - void BtreeBucket::deallocBucket(const DiskLoc thisLoc, const IndexDetails &id) { + template< class V > + void BtreeBucket<V>::deallocBucket(const DiskLoc thisLoc, const IndexDetails &id) { #if 0 // as a temporary defensive measure, we zap the whole bucket, AND don't truly delete // it (meaning it is ineligible for reuse). memset(this, 0, Size()); #else // defensive: - n = -1; - parent.Null(); + this->n = -1; + this->parent.Null(); string ns = id.indexNamespace(); theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), thisLoc.rec(), thisLoc); #endif } /** note: may delete the entire bucket! this invalid upon return sometimes. */ - void BtreeBucket::delKeyAtPos( const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order) { - assert(n>0); - DiskLoc left = childForPos(p); - - if ( n == 1 ) { - if ( left.isNull() && nextChild.isNull() ) { - _delKeyAtPos(p); + template< class V > + void BtreeBucket<V>::delKeyAtPos( const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order) { + assert(this->n>0); + DiskLoc left = this->childForPos(p); + + if ( this->n == 1 ) { + if ( left.isNull() && this->nextChild.isNull() ) { + this->_delKeyAtPos(p); if ( isHead() ) { // we don't delete the top bucket ever } @@ -808,7 +855,7 @@ namespace mongo { } if ( left.isNull() ) { - _delKeyAtPos(p); + this->_delKeyAtPos(p); mayBalanceWithNeighbors( thisLoc, id, order ); } else { @@ -838,9 +885,10 @@ namespace mongo { * k by k', preserving the key's unused marking. This function is only * expected to mark a key as unused when handling a legacy btree. */ - void BtreeBucket::deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order ) { - DiskLoc lchild = childForPos( keypos ); - DiskLoc rchild = childForPos( keypos + 1 ); + template< class V > + void BtreeBucket<V>::deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order ) { + DiskLoc lchild = this->childForPos( keypos ); + DiskLoc rchild = this->childForPos( keypos + 1 ); assert( !lchild.isNull() || !rchild.isNull() ); int advanceDirection = lchild.isNull() ? 1 : -1; int advanceKeyOfs = keypos; @@ -849,48 +897,55 @@ namespace mongo { // child in the proper direction and all descendants of thisLoc must be // nonempty because they are not the root. - if ( !advanceLoc.btree()->childForPos( advanceKeyOfs ).isNull() || - !advanceLoc.btree()->childForPos( advanceKeyOfs + 1 ).isNull() ) { + if ( !advanceLoc.btree<V>()->childForPos( advanceKeyOfs ).isNull() || + !advanceLoc.btree<V>()->childForPos( advanceKeyOfs + 1 ).isNull() ) { // only expected with legacy btrees, see note above - markUnused( keypos ); + this->markUnused( keypos ); return; } - KeyNode kn = advanceLoc.btree()->keyNode( advanceKeyOfs ); + KeyNode kn = advanceLoc.btree<V>()->keyNode( advanceKeyOfs ); // Because advanceLoc is a descendant of thisLoc, updating thisLoc will // not affect packing or keys of advanceLoc and kn will be stable // during the following setInternalKey() - setInternalKey( thisLoc, keypos, kn.recordLoc, kn.key, order, childForPos( keypos ), childForPos( keypos + 1 ), id ); - advanceLoc.btreemod()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order ); + setInternalKey( thisLoc, keypos, kn.recordLoc, kn.key, order, this->childForPos( keypos ), this->childForPos( keypos + 1 ), id ); + advanceLoc.btreemod<V>()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order ); } - void BtreeBucket::replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id ) { - assert( n == 0 && !nextChild.isNull() ); - if ( parent.isNull() ) { +#define BTREE(loc) (static_cast<DiskLoc>(loc).btree<V>()) +#define BTREEMOD(loc) (static_cast<DiskLoc>(loc).btreemod<V>()) + + template< class V > + void BtreeBucket<V>::replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id ) { + assert( this->n == 0 && !this->nextChild.isNull() ); + if ( this->parent.isNull() ) { assert( id.head == thisLoc ); - id.head.writing() = nextChild; + id.head.writing() = this->nextChild; } else { - parent.btree()->childForPos( indexInParent( thisLoc ) ).writing() = nextChild; + DiskLoc ll = this->parent; + ll.btree<V>()->childForPos( indexInParent( thisLoc ) ).writing() = this->nextChild; } - nextChild.btree()->parent.writing() = parent; + BTREE(this->nextChild)->parent.writing() = this->parent; + (static_cast<DiskLoc>(this->nextChild).btree<V>())->parent.writing() = this->parent; ClientCursor::informAboutToDeleteBucket( thisLoc ); deallocBucket( thisLoc, id ); } - bool BtreeBucket::canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const { - assert( leftIndex >= 0 && leftIndex < n ); - DiskLoc leftNodeLoc = childForPos( leftIndex ); - DiskLoc rightNodeLoc = childForPos( leftIndex + 1 ); + template< class V > + bool BtreeBucket<V>::canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const { + assert( leftIndex >= 0 && leftIndex < this->n ); + DiskLoc leftNodeLoc = this->childForPos( leftIndex ); + DiskLoc rightNodeLoc = this->childForPos( leftIndex + 1 ); if ( leftNodeLoc.isNull() || rightNodeLoc.isNull() ) { // TODO if this situation is possible in long term implementation, maybe we should compact somehow anyway return false; } int pos = 0; { - const BtreeBucket *l = leftNodeLoc.btree(); - const BtreeBucket *r = rightNodeLoc.btree(); - if ( ( headerSize() + l->packedDataSize( pos ) + r->packedDataSize( pos ) + keyNode( leftIndex ).key.objsize() + sizeof(_KeyNode) > unsigned( BucketSize ) ) ) { + const BtreeBucket *l = leftNodeLoc.btree<V>(); + const BtreeBucket *r = rightNodeLoc.btree<V>(); + if ( ( this->headerSize() + l->packedDataSize( pos ) + r->packedDataSize( pos ) + keyNode( leftIndex ).key.dataSize() + sizeof(_KeyNode) > unsigned( V::BucketSize ) ) ) { return false; } } @@ -901,33 +956,34 @@ namespace mongo { * This implementation must respect the meaning and value of lowWaterMark. * Also see comments in splitPos(). */ - int BtreeBucket::rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const { + template< class V > + int BtreeBucket<V>::rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const { int split = -1; int rightSize = 0; - const BtreeBucket *l = childForPos( leftIndex ).btree(); - const BtreeBucket *r = childForPos( leftIndex + 1 ).btree(); + const BtreeBucket *l = BTREE(this->childForPos( leftIndex )); + const BtreeBucket *r = BTREE(this->childForPos( leftIndex + 1 )); int KNS = sizeof( _KeyNode ); - int rightSizeLimit = ( l->topSize + l->n * KNS + keyNode( leftIndex ).key.objsize() + KNS + r->topSize + r->n * KNS ) / 2; + int rightSizeLimit = ( l->topSize + l->n * KNS + keyNode( leftIndex ).key.dataSize() + KNS + r->topSize + r->n * KNS ) / 2; // This constraint should be ensured by only calling this function // if we go below the low water mark. - assert( rightSizeLimit < BtreeBucket::bodySize() ); + assert( rightSizeLimit < BtreeBucket<V>::bodySize() ); for( int i = r->n - 1; i > -1; --i ) { - rightSize += r->keyNode( i ).key.objsize() + KNS; + rightSize += r->keyNode( i ).key.dataSize() + KNS; if ( rightSize > rightSizeLimit ) { split = l->n + 1 + i; break; } } if ( split == -1 ) { - rightSize += keyNode( leftIndex ).key.objsize() + KNS; + rightSize += keyNode( leftIndex ).key.dataSize() + KNS; if ( rightSize > rightSizeLimit ) { split = l->n; } } if ( split == -1 ) { for( int i = l->n - 1; i > -1; --i ) { - rightSize += l->keyNode( i ).key.objsize() + KNS; + rightSize += l->keyNode( i ).key.dataSize() + KNS; if ( rightSize > rightSizeLimit ) { split = i; break; @@ -945,11 +1001,12 @@ namespace mongo { return split; } - void BtreeBucket::doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) { - DiskLoc leftNodeLoc = childForPos( leftIndex ); - DiskLoc rightNodeLoc = childForPos( leftIndex + 1 ); - BtreeBucket *l = leftNodeLoc.btreemod(); - BtreeBucket *r = rightNodeLoc.btreemod(); + template< class V > + void BtreeBucket<V>::doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) { + DiskLoc leftNodeLoc = this->childForPos( leftIndex ); + DiskLoc rightNodeLoc = this->childForPos( leftIndex + 1 ); + BtreeBucket *l = leftNodeLoc.btreemod<V>(); + BtreeBucket *r = rightNodeLoc.btreemod<V>(); int pos = 0; l->_packReadyForMod( order, pos ); r->_packReadyForMod( order, pos ); // pack r in case there are droppable keys @@ -968,10 +1025,10 @@ namespace mongo { l->nextChild = r->nextChild; l->fixParentPtrs( leftNodeLoc, oldLNum ); r->delBucket( rightNodeLoc, id ); - childForPos( leftIndex + 1 ) = leftNodeLoc; - childForPos( leftIndex ) = DiskLoc(); - _delKeyAtPos( leftIndex, true ); - if ( n == 0 ) { + this->childForPos( leftIndex + 1 ) = leftNodeLoc; + this->childForPos( leftIndex ) = DiskLoc(); + this->_delKeyAtPos( leftIndex, true ); + if ( this->n == 0 ) { // will trash this and thisLoc // TODO To ensure all leaves are of equal height, we should ensure // this is only called on the root. @@ -983,9 +1040,10 @@ namespace mongo { } } - int BtreeBucket::indexInParent( const DiskLoc &thisLoc ) const { - assert( !parent.isNull() ); - const BtreeBucket *p = parent.btree(); + template< class V > + int BtreeBucket<V>::indexInParent( const DiskLoc &thisLoc ) const { + assert( !this->parent.isNull() ); + const BtreeBucket *p = BTREE(this->parent); if ( p->nextChild == thisLoc ) { return p->n; } @@ -999,23 +1057,25 @@ namespace mongo { out() << "ERROR: can't find ref to child bucket.\n"; out() << "child: " << thisLoc << "\n"; dump(); - out() << "Parent: " << parent << "\n"; + out() << "Parent: " << this->parent << "\n"; p->dump(); assert(false); return -1; // just to compile } - bool BtreeBucket::tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const { + template< class V > + bool BtreeBucket<V>::tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const { // If we can merge, then we must merge rather than balance to preserve // bucket utilization constraints. if ( canMergeChildren( thisLoc, leftIndex ) ) { return false; } - thisLoc.btreemod()->doBalanceChildren( thisLoc, leftIndex, id, order ); + thisLoc.btreemod<V>()->doBalanceChildren( thisLoc, leftIndex, id, order ); return true; } - void BtreeBucket::doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split, + template< class V > + void BtreeBucket<V>::doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split, BtreeBucket *l, const DiskLoc lchild, BtreeBucket *r, const DiskLoc rchild, IndexDetails &id, const Ordering &order ) { @@ -1049,7 +1109,8 @@ namespace mongo { l->truncateTo( split, order, zeropos ); } - void BtreeBucket::doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split, + template< class V > + void BtreeBucket<V>::doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split, BtreeBucket *l, const DiskLoc lchild, BtreeBucket *r, const DiskLoc rchild, IndexDetails &id, const Ordering &order ) { @@ -1082,13 +1143,14 @@ namespace mongo { r->dropFront( split - lN, order, zeropos ); } - void BtreeBucket::doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) { - DiskLoc lchild = childForPos( leftIndex ); - DiskLoc rchild = childForPos( leftIndex + 1 ); + template< class V > + void BtreeBucket<V>::doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) { + DiskLoc lchild = this->childForPos( leftIndex ); + DiskLoc rchild = this->childForPos( leftIndex + 1 ); int zeropos = 0; - BtreeBucket *l = lchild.btreemod(); + BtreeBucket *l = lchild.btreemod<V>(); l->_packReadyForMod( order, zeropos ); - BtreeBucket *r = rchild.btreemod(); + BtreeBucket *r = rchild.btreemod<V>(); r->_packReadyForMod( order, zeropos ); int split = rebalancedSeparatorPos( thisLoc, leftIndex ); @@ -1103,16 +1165,17 @@ namespace mongo { } } - bool BtreeBucket::mayBalanceWithNeighbors( const DiskLoc thisLoc, IndexDetails &id, const Ordering &order ) const { - if ( parent.isNull() ) { // we are root, there are no neighbors + template< class V > + bool BtreeBucket<V>::mayBalanceWithNeighbors( const DiskLoc thisLoc, IndexDetails &id, const Ordering &order ) const { + if ( this->parent.isNull() ) { // we are root, there are no neighbors return false; } - if ( packedDataSize( 0 ) >= lowWaterMark ) { + if ( this->packedDataSize( 0 ) >= this->lowWaterMark() ) { return false; } - const BtreeBucket *p = parent.btree(); + const BtreeBucket *p = BTREE(this->parent); int parentIdx = indexInParent( thisLoc ); // TODO will missing neighbor case be possible long term? Should we try to merge/balance somehow in that case if so? @@ -1123,21 +1186,21 @@ namespace mongo { // to preserve btree bucket utilization constraints since that's a more // heavy duty operation (especially if we must re-split later). if ( mayBalanceRight && - p->tryBalanceChildren( parent, parentIdx, id, order ) ) { + p->tryBalanceChildren( this->parent, parentIdx, id, order ) ) { return true; } if ( mayBalanceLeft && - p->tryBalanceChildren( parent, parentIdx - 1, id, order ) ) { + p->tryBalanceChildren( this->parent, parentIdx - 1, id, order ) ) { return true; } - BtreeBucket *pm = parent.btreemod(); + BtreeBucket *pm = BTREEMOD(this->parent); if ( mayBalanceRight ) { - pm->doMergeChildren( parent, parentIdx, id, order ); + pm->doMergeChildren( this->parent, parentIdx, id, order ); return true; } else if ( mayBalanceLeft ) { - pm->doMergeChildren( parent, parentIdx - 1, id, order ); + pm->doMergeChildren( this->parent, parentIdx - 1, id, order ); return true; } @@ -1145,7 +1208,8 @@ namespace mongo { } /** remove a key from the index */ - bool BtreeBucket::unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc ) const { + template< class V > + bool BtreeBucket<V>::unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc ) const { int pos; bool found; DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1); @@ -1155,24 +1219,26 @@ namespace mongo { OCCASIONALLY problem() << "unindex: key too large to index but was found for " << id.indexNamespace() << " reIndex suggested" << endl; } - loc.btreemod()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern())); + loc.btreemod<V>()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern())); return true; } return false; } - BtreeBucket* BtreeBucket::allocTemp() { - BtreeBucket *b = (BtreeBucket*) malloc(BucketSize); + template< class V > + BtreeBucket<V> * BtreeBucket<V>::allocTemp() { + BtreeBucket *b = (BtreeBucket*) malloc(V::BucketSize); b->init(); return b; } - inline void BtreeBucket::fix(const DiskLoc thisLoc, const DiskLoc child) { + template< class V > + inline void BtreeBucket<V>::fix(const DiskLoc thisLoc, const DiskLoc child) { if ( !child.isNull() ) { if ( insert_debug ) out() << " " << child.toString() << ".parent=" << thisLoc.toString() << endl; - child.btree()->parent.writing() = thisLoc; + child.btree<V>()->parent.writing() = thisLoc; } } @@ -1180,32 +1246,34 @@ namespace mongo { * This can cause a lot of additional page writes when we assign buckets to * different parents. Maybe get rid of parent ptrs? */ - void BtreeBucket::fixParentPtrs(const DiskLoc thisLoc, int firstIndex, int lastIndex) const { + template< class V > + void BtreeBucket<V>::fixParentPtrs(const DiskLoc thisLoc, int firstIndex, int lastIndex) const { VERIFYTHISLOC if ( lastIndex == -1 ) { - lastIndex = n; + lastIndex = this->n; } for ( int i = firstIndex; i <= lastIndex; i++ ) { - fix(thisLoc, childForPos(i)); + fix(thisLoc, this->childForPos(i)); } } - void BtreeBucket::setInternalKey( const DiskLoc thisLoc, int keypos, - const DiskLoc recordLoc, const BSONObj &key, const Ordering &order, + template< class V > + void BtreeBucket<V>::setInternalKey( const DiskLoc thisLoc, int keypos, + const DiskLoc recordLoc, const Key &key, const Ordering &order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx ) { - childForPos( keypos ).Null(); + this->childForPos( keypos ).Null(); // This may leave the bucket empty (n == 0) which is ok only as a // transient state. In the instant case, the implementation of // insertHere behaves correctly when n == 0 and as a side effect // increments n. - _delKeyAtPos( keypos, true ); + this->_delKeyAtPos( keypos, true ); // Ensure we do not orphan neighbor's old child. - assert( childForPos( keypos ) == rchild ); + assert( this->childForPos( keypos ) == rchild ); // Just set temporarily - required to pass validation in insertHere() - childForPos( keypos ) = lchild; + this->childForPos( keypos ) = lchild; insertHere( thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx ); } @@ -1219,8 +1287,9 @@ namespace mongo { * Some of the write intent signaling below relies on the implementation of * the optimized write intent code in basicInsert(). */ - void BtreeBucket::insertHere( const DiskLoc thisLoc, int keypos, - const DiskLoc recordLoc, const BSONObj& key, const Ordering& order, + template< class V > + void BtreeBucket<V>::insertHere( const DiskLoc thisLoc, int keypos, + const DiskLoc recordLoc, const Key& key, const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx) const { if ( insert_debug ) out() << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' ' @@ -1228,36 +1297,36 @@ namespace mongo { if ( !basicInsert(thisLoc, keypos, recordLoc, key, order) ) { // If basicInsert() fails, the bucket will be packed as required by split(). - thisLoc.btreemod()->split(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx); + thisLoc.btreemod<V>()->split(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx); return; } { const _KeyNode *_kn = &k(keypos); _KeyNode *kn = (_KeyNode *) getDur().alreadyDeclared((_KeyNode*) _kn); // already declared intent in basicInsert() - if ( keypos+1 == n ) { // last key - if ( nextChild != lchild ) { + if ( keypos+1 == this->n ) { // last key + if ( this->nextChild != lchild ) { out() << "ERROR nextChild != lchild" << endl; out() << " thisLoc: " << thisLoc.toString() << ' ' << idx.indexNamespace() << endl; - out() << " keyPos: " << keypos << " n:" << n << endl; - out() << " nextChild: " << nextChild.toString() << " lchild: " << lchild.toString() << endl; + out() << " keyPos: " << keypos << " n:" << this->n << endl; + out() << " nextChild: " << this->nextChild.toString() << " lchild: " << lchild.toString() << endl; out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl; out() << " key: " << key.toString() << endl; dump(); assert(false); } - kn->prevChildBucket = nextChild; + kn->prevChildBucket = this->nextChild; assert( kn->prevChildBucket == lchild ); - nextChild.writing() = rchild; + this->nextChild.writing() = rchild; if ( !rchild.isNull() ) - rchild.btree()->parent.writing() = thisLoc; + BTREE(rchild)->parent.writing() = thisLoc; } else { kn->prevChildBucket = lchild; if ( k(keypos+1).prevChildBucket != lchild ) { out() << "ERROR k(keypos+1).prevChildBucket != lchild" << endl; out() << " thisLoc: " << thisLoc.toString() << ' ' << idx.indexNamespace() << endl; - out() << " keyPos: " << keypos << " n:" << n << endl; + out() << " keyPos: " << keypos << " n:" << this->n << endl; out() << " k(keypos+1).pcb: " << k(keypos+1).prevChildBucket.toString() << " lchild: " << lchild.toString() << endl; out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl; out() << " key: " << key.toString() << endl; @@ -1267,83 +1336,84 @@ namespace mongo { const DiskLoc *pc = &k(keypos+1).prevChildBucket; *getDur().alreadyDeclared((DiskLoc*) pc) = rchild; // declared in basicInsert() if ( !rchild.isNull() ) - rchild.btree()->parent.writing() = thisLoc; + rchild.btree<V>()->parent.writing() = thisLoc; } return; } } - void BtreeBucket::split(const DiskLoc thisLoc, int keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx) { - assertWritable(); + template< class V > + void BtreeBucket<V>::split(const DiskLoc thisLoc, int keypos, const DiskLoc recordLoc, const Key& key, const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx) { + this->assertWritable(); if ( split_debug ) out() << " " << thisLoc.toString() << ".split" << endl; - int split = splitPos( keypos ); + int split = this->splitPos( keypos ); DiskLoc rLoc = addBucket(idx); - BtreeBucket *r = rLoc.btreemod(); + BtreeBucket *r = rLoc.btreemod<V>(); if ( split_debug ) - out() << " split:" << split << ' ' << keyNode(split).key.toString() << " n:" << n << endl; - for ( int i = split+1; i < n; i++ ) { + out() << " split:" << split << ' ' << keyNode(split).key.toString() << " this->n:" << this->n << endl; + for ( int i = split+1; i < this->n; i++ ) { KeyNode kn = keyNode(i); r->pushBack(kn.recordLoc, kn.key, order, kn.prevChildBucket); } - r->nextChild = nextChild; + r->nextChild = this->nextChild; r->assertValid( order ); if ( split_debug ) - out() << " new rLoc:" << rLoc.toString() << endl; + out() << " this->new rLoc:" << rLoc.toString() << endl; r = 0; - rLoc.btree()->fixParentPtrs(rLoc); + rLoc.btree<V>()->fixParentPtrs(rLoc); { KeyNode splitkey = keyNode(split); - nextChild = splitkey.prevChildBucket; // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r) + this->nextChild = splitkey.prevChildBucket; // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r) if ( split_debug ) { out() << " splitkey key:" << splitkey.key.toString() << endl; } // Because thisLoc is a descendant of parent, updating parent will - // not not affect packing or keys of thisLoc and splitkey will be stable + // not affect packing or keys of thisLoc and splitkey will be stable // during the following: - // promote splitkey to a parent node - if ( parent.isNull() ) { - // make a new parent if we were the root + // promote splitkey to a parent this->node + if ( this->parent.isNull() ) { + // make a this->new this->parent if we were the root DiskLoc L = addBucket(idx); - BtreeBucket *p = L.btreemod(); + BtreeBucket *p = L.btreemod<V>(); p->pushBack(splitkey.recordLoc, splitkey.key, order, thisLoc); p->nextChild = rLoc; p->assertValid( order ); - parent = idx.head.writing() = L; + this->parent = idx.head.writing() = L; if ( split_debug ) - out() << " we were root, making new root:" << hex << parent.getOfs() << dec << endl; - rLoc.btree()->parent.writing() = parent; + out() << " we were root, making this->new root:" << hex << this->parent.getOfs() << dec << endl; + rLoc.btree<V>()->parent.writing() = this->parent; } else { // set this before calling _insert - if it splits it will do fixParent() logic and change the value. - rLoc.btree()->parent.writing() = parent; + rLoc.btree<V>()->parent.writing() = this->parent; if ( split_debug ) out() << " promoting splitkey key " << splitkey.key.toString() << endl; - parent.btree()->_insert(parent, splitkey.recordLoc, splitkey.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx); + BTREE(this->parent)->_insert(this->parent, splitkey.recordLoc, splitkey.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx); } } int newpos = keypos; // note this may trash splitkey.key. thus we had to promote it before finishing up here. - truncateTo(split, order, newpos); + this->truncateTo(split, order, newpos); - // add our new key, there is room now + // add our this->new key, there is room this->now { if ( keypos <= split ) { if ( split_debug ) - out() << " keypos<split, insertHere() the new key" << endl; + out() << " keypos<split, insertHere() the this->new key" << endl; insertHere(thisLoc, newpos, recordLoc, key, order, lchild, rchild, idx); } else { int kp = keypos-split-1; assert(kp>=0); - rLoc.btree()->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx); + BTREE(rLoc)->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx); } } @@ -1351,41 +1421,44 @@ namespace mongo { out() << " split end " << hex << thisLoc.getOfs() << dec << endl; } - /** start a new index off, empty */ - DiskLoc BtreeBucket::addBucket(const IndexDetails& id) { + /** start a this->new index off, empty */ + template< class V > + DiskLoc BtreeBucket<V>::addBucket(const IndexDetails& id) { string ns = id.indexNamespace(); - DiskLoc loc = theDataFileMgr.insert(ns.c_str(), 0, BucketSize, true); - BtreeBucket *b = loc.btreemod(); + DiskLoc loc = theDataFileMgr.insert(ns.c_str(), 0, V::BucketSize, true); + BtreeBucket *b = BTREEMOD(loc); b->init(); return loc; } - void BtreeBucket::renameIndexNamespace(const char *oldNs, const char *newNs) { + void renameIndexNamespace(const char *oldNs, const char *newNs) { renameNamespace( oldNs, newNs ); } - const DiskLoc BtreeBucket::getHead(const DiskLoc& thisLoc) const { + template< class V > + const DiskLoc BtreeBucket<V>::getHead(const DiskLoc& thisLoc) const { DiskLoc p = thisLoc; - while ( !p.btree()->isHead() ) - p = p.btree()->parent; + while ( !BTREE(p)->isHead() ) + p = BTREE(p)->parent; return p; } - DiskLoc BtreeBucket::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const { - if ( keyOfs < 0 || keyOfs >= n ) { - out() << "ASSERT failure BtreeBucket::advance, caller: " << caller << endl; + template< class V > + DiskLoc BtreeBucket<V>::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const { + if ( keyOfs < 0 || keyOfs >= this->n ) { + out() << "ASSERT failure BtreeBucket<V>::advance, caller: " << caller << endl; out() << " thisLoc: " << thisLoc.toString() << endl; - out() << " keyOfs: " << keyOfs << " n:" << n << " direction: " << direction << endl; + out() << " keyOfs: " << keyOfs << " this->n:" << this->n << " direction: " << direction << endl; out() << bucketSummary() << endl; assert(false); } int adj = direction < 0 ? 1 : 0; int ko = keyOfs + direction; - DiskLoc nextDown = childForPos(ko+adj); + DiskLoc nextDown = this->childForPos(ko+adj); if ( !nextDown.isNull() ) { while ( 1 ) { - keyOfs = direction>0 ? 0 : nextDown.btree()->n - 1; - DiskLoc loc = nextDown.btree()->childForPos(keyOfs + adj); + keyOfs = direction>0 ? 0 : BTREE(nextDown)->n - 1; + DiskLoc loc = BTREE(nextDown)->childForPos(keyOfs + adj); if ( loc.isNull() ) break; nextDown = loc; @@ -1393,18 +1466,18 @@ namespace mongo { return nextDown; } - if ( ko < n && ko >= 0 ) { + if ( ko < this->n && ko >= 0 ) { keyOfs = ko; return thisLoc; } // end of bucket. traverse back up. DiskLoc childLoc = thisLoc; - DiskLoc ancestor = parent; + DiskLoc ancestor = this->parent; while ( 1 ) { if ( ancestor.isNull() ) break; - const BtreeBucket *an = ancestor.btree(); + const BtreeBucket *an = BTREE(ancestor); for ( int i = 0; i < an->n; i++ ) { if ( an->childForPos(i+adj) == childLoc ) { keyOfs = i; @@ -1412,7 +1485,7 @@ namespace mongo { } } assert( direction<0 || an->nextChild == childLoc ); - // parent exhausted also, keep going up + // this->parent exhausted also, keep going up childLoc = ancestor; ancestor = an->parent; } @@ -1420,7 +1493,13 @@ namespace mongo { return DiskLoc(); } - DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, const DiskLoc &recordLoc, int direction) const { + template< class V > + DiskLoc BtreeBucket<V>::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, const DiskLoc &recordLoc, int direction) const { + return locate(idx, thisLoc, KeyOwned(key), order, pos, found, recordLoc, direction); + } + + template< class V > + DiskLoc BtreeBucket<V>::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const Key& key, const Ordering &order, int& pos, bool& found, const DiskLoc &recordLoc, int direction) const { int p; found = find(idx, key, recordLoc, order, p, /*assertIfDup*/ false); if ( found ) { @@ -1428,10 +1507,10 @@ namespace mongo { return thisLoc; } - DiskLoc child = childForPos(p); + DiskLoc child = this->childForPos(p); if ( !child.isNull() ) { - DiskLoc l = child.btree()->locate(idx, child, key, order, pos, found, recordLoc, direction); + DiskLoc l = BTREE(child)->locate(idx, child, key, order, pos, found, recordLoc, direction); if ( !l.isNull() ) return l; } @@ -1440,14 +1519,15 @@ namespace mongo { if ( direction < 0 ) return --pos == -1 ? DiskLoc() /*theend*/ : thisLoc; else - return pos == n ? DiskLoc() /*theend*/ : thisLoc; + return pos == this->n ? DiskLoc() /*theend*/ : thisLoc; } - bool BtreeBucket::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const { + template< class V > + bool BtreeBucket<V>::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const { while( 1 ) { if ( l + 1 == h ) { keyOfs = ( direction > 0 ) ? h : l; - DiskLoc next = thisLoc.btree()->k( h ).prevChildBucket; + DiskLoc next = BTREE(thisLoc)->k( h ).prevChildBucket; if ( !next.isNull() ) { bestParent = make_pair( thisLoc, keyOfs ); thisLoc = next; @@ -1458,7 +1538,7 @@ namespace mongo { } } int m = l + ( h - l ) / 2; - int cmp = customBSONCmp( thisLoc.btree()->keyNode( m ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ); + int cmp = customBSONCmp( BTREE(thisLoc)->keyNode( m ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ); if ( cmp < 0 ) { l = m; } @@ -1481,18 +1561,19 @@ namespace mongo { * starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd * All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient */ - void BtreeBucket::advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const { + template< class V > + void BtreeBucket<V>::advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const { int l,h; bool dontGoUp; if ( direction > 0 ) { l = keyOfs; - h = n - 1; - dontGoUp = ( customBSONCmp( keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); + h = this->n - 1; + dontGoUp = ( customBSONCmp( keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); } else { l = 0; h = keyOfs; - dontGoUp = ( customBSONCmp( keyNode( l ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); + dontGoUp = ( customBSONCmp( keyNode( l ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); } pair< DiskLoc, int > bestParent; if ( dontGoUp ) { @@ -1502,16 +1583,16 @@ namespace mongo { } } else { - // go up parents until rightmost/leftmost node is >=/<= target or at top - while( !thisLoc.btree()->parent.isNull() ) { - thisLoc = thisLoc.btree()->parent; + // go up this->parents until rightmost/leftmost node is >=/<= target or at top + while( !BTREE(thisLoc)->parent.isNull() ) { + thisLoc = BTREE(thisLoc)->parent; if ( direction > 0 ) { - if ( customBSONCmp( thisLoc.btree()->keyNode( thisLoc.btree()->n - 1 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) { + if ( customBSONCmp( BTREE(thisLoc)->keyNode( BTREE(thisLoc)->n - 1 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) { break; } } else { - if ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) { + if ( customBSONCmp( BTREE(thisLoc)->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) { break; } } @@ -1520,31 +1601,32 @@ namespace mongo { customLocate( thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, bestParent ); } - void BtreeBucket::customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const { - if ( thisLoc.btree()->n == 0 ) { + template< class V > + void BtreeBucket<V>::customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const { + if ( BTREE(thisLoc)->n == 0 ) { thisLoc = DiskLoc(); return; } // go down until find smallest/biggest >=/<= target while( 1 ) { int l = 0; - int h = thisLoc.btree()->n - 1; + int h = BTREE(thisLoc)->n - 1; // leftmost/rightmost key may possibly be >=/<= search key bool firstCheck; if ( direction > 0 ) { - firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); + firstCheck = ( customBSONCmp( BTREE(thisLoc)->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); } else { - firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); + firstCheck = ( customBSONCmp( BTREE(thisLoc)->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); } if ( firstCheck ) { DiskLoc next; if ( direction > 0 ) { - next = thisLoc.btree()->k( 0 ).prevChildBucket; + next = BTREE(thisLoc)->k( 0 ).prevChildBucket; keyOfs = 0; } else { - next = thisLoc.btree()->nextChild; + next = BTREE(thisLoc)->nextChild; keyOfs = h; } if ( !next.isNull() ) { @@ -1558,21 +1640,21 @@ namespace mongo { } bool secondCheck; if ( direction > 0 ) { - secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) < 0 ); + secondCheck = ( customBSONCmp( BTREE(thisLoc)->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) < 0 ); } else { - secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) > 0 ); + secondCheck = ( customBSONCmp( BTREE(thisLoc)->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) > 0 ); } if ( secondCheck ) { DiskLoc next; if ( direction > 0 ) { - next = thisLoc.btree()->nextChild; + next = BTREE(thisLoc)->nextChild; } else { - next = thisLoc.btree()->k( 0 ).prevChildBucket; + next = BTREE(thisLoc)->k( 0 ).prevChildBucket; } if ( next.isNull() ) { - // if bestParent is null, we've hit the end and thisLoc gets set to DiskLoc() + // if bestParent is this->null, we've hit the end and thisLoc gets set to DiskLoc() thisLoc = bestParent.first; keyOfs = bestParent.second; return; @@ -1590,14 +1672,15 @@ namespace mongo { /** @thisLoc disk location of *this */ - int BtreeBucket::_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, - const BSONObj& key, const Ordering &order, bool dupsAllowed, + template< class V > + int BtreeBucket<V>::_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, + const Key& key, const Ordering &order, bool dupsAllowed, const DiskLoc lChild, const DiskLoc rChild, IndexDetails& idx) const { - if ( key.objsize() > KeyMax ) { - problem() << "ERROR: key too large len:" << key.objsize() << " max:" << KeyMax << ' ' << key.objsize() << ' ' << idx.indexNamespace() << endl; + if ( key.dataSize() > KeyMax ) { + problem() << "ERROR: key too large len:" << key.dataSize() << " max:" << KeyMax << ' ' << key.dataSize() << ' ' << idx.indexNamespace() << endl; return 2; } - assert( key.objsize() > 0 ); + assert( key.dataSize() > 0 ); int pos; bool found = find(idx, key, recordLoc, order, pos, !dupsAllowed); @@ -1605,15 +1688,15 @@ namespace mongo { out() << " " << thisLoc.toString() << '.' << "_insert " << key.toString() << '/' << recordLoc.toString() << " l:" << lChild.toString() << " r:" << rChild.toString() << endl; - out() << " found:" << found << " pos:" << pos << " n:" << n << endl; + out() << " found:" << found << " pos:" << pos << " this->n:" << this->n << endl; } if ( found ) { const _KeyNode& kn = k(pos); if ( kn.isUnused() ) { log(4) << "btree _insert: reusing unused key" << endl; - massert( 10285 , "_insert: reuse key but lchild is not null", lChild.isNull()); - massert( 10286 , "_insert: reuse key but rchild is not null", rChild.isNull()); + massert( 10285 , "_insert: reuse key but lchild is not this->null", lChild.isNull()); + massert( 10286 , "_insert: reuse key but rchild is not this->null", rChild.isNull()); kn.writing().setUsed(); return 0; } @@ -1623,32 +1706,33 @@ namespace mongo { log() << " " << idx.indexNamespace() << " thisLoc:" << thisLoc.toString() << '\n'; log() << " " << key.toString() << '\n'; log() << " " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl; - log() << " old l r: " << childForPos(pos).toString() << ' ' << childForPos(pos+1).toString() << endl; - log() << " new l r: " << lChild.toString() << ' ' << rChild.toString() << endl; + log() << " old l r: " << this->childForPos(pos).toString() << ' ' << this->childForPos(pos+1).toString() << endl; + log() << " this->new l r: " << lChild.toString() << ' ' << rChild.toString() << endl; } alreadyInIndex(); } DEBUGGING out() << "TEMP: key: " << key.toString() << endl; - DiskLoc child = childForPos(pos); + DiskLoc child = this->childForPos(pos); if ( insert_debug ) out() << " getChild(" << pos << "): " << child.toString() << endl; - // In current usage, rChild isNull() for a new key and false when we are + // In current usage, rChild isNull() for a this->new key and false when we are // promoting a split key. These are the only two cases where _insert() // is called currently. if ( child.isNull() || !rChild.isNull() ) { - // A new key will be inserted at the same tree height as an adjacent existing key. + // A this->new key will be inserted at the same tree height as an adjacent existing key. insertHere(thisLoc, pos, recordLoc, key, order, lChild, rChild, idx); return 0; } - return child.btree()->bt_insert(child, recordLoc, key, order, dupsAllowed, idx, /*toplevel*/false); + return child.btree<V>()->_insert(child, recordLoc, key, order, dupsAllowed, /*lchild*/DiskLoc(), /*rchild*/DiskLoc(), idx); } - void BtreeBucket::dump() const { - out() << "DUMP btreebucket n:" << n; - out() << " parent:" << hex << parent.getOfs() << dec; - for ( int i = 0; i < n; i++ ) { + template< class V > + void BtreeBucket<V>::dump() const { + out() << "DUMP btreebucket this->n:" << this->n; + out() << " this->parent:" << hex << this->parent.getOfs() << dec; + for ( int i = 0; i < this->n; i++ ) { out() << '\n'; KeyNode k = keyNode(i); out() << '\t' << i << '\t' << k.key.toString() << "\tleft:" << hex << @@ -1656,48 +1740,52 @@ namespace mongo { if ( this->k(i).isUnused() ) out() << " UNUSED"; } - out() << " right:" << hex << nextChild.getOfs() << dec << endl; + out() << " right:" << hex << this->nextChild.getOfs() << dec << endl; } /** todo: meaning of return code unclear clean up */ - int BtreeBucket::bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, - const BSONObj& key, const Ordering &order, bool dupsAllowed, - IndexDetails& idx, bool toplevel) const { + template< class V > + int BtreeBucket<V>::bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, + const BSONObj& _key, const Ordering &order, bool dupsAllowed, + IndexDetails& idx, bool toplevel) const + { + KeyOwned key(_key); + if ( toplevel ) { - if ( key.objsize() > KeyMax ) { - problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() << ' ' << key.objsize() << ' ' << key.toString() << endl; + if ( key.dataSize() > KeyMax ) { + problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() << ' ' << key.dataSize() << ' ' << key.toString() << endl; return 3; } } - int x = _insert(thisLoc, recordLoc, key, order, dupsAllowed, DiskLoc(), DiskLoc(), idx); - assertValid( order ); + int x = _insert(thisLoc, recordLoc, Key(key), order, dupsAllowed, DiskLoc(), DiskLoc(), idx); + this->assertValid( order ); return x; } - void BtreeBucket::shape(stringstream& ss) const { - _shape(0, ss); + template< class V > + void BtreeBucket<V>::shape(stringstream& ss) const { + this->_shape(0, ss); } - int BtreeBucket::getLowWaterMark() { - return lowWaterMark; - } - - int BtreeBucket::getKeyMax() { + template< class V > + int BtreeBucket<V>::getKeyMax() { return KeyMax; } - DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const { + template< class V > + DiskLoc BtreeBucket<V>::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const { int pos; bool found; - // TODO: is it really ok here that the order is a default? + // TODO: is it really ok here that the order is a default? + // for findById() use, yes. for checkNoIndexConflicts, this->no? Ordering o = Ordering::make(BSONObj()); DiskLoc bucket = locate( indexdetails , indexdetails.head , key , o , pos , found , minDiskLoc ); if ( bucket.isNull() ) return bucket; - const BtreeBucket *b = bucket.btree(); + const BtreeBucket<V> *b = bucket.btree<V>(); while ( 1 ) { const _KeyNode& knraw = b->k(pos); if ( knraw.isUsed() ) @@ -1705,23 +1793,24 @@ namespace mongo { bucket = b->advance( bucket , pos , 1 , "findSingle" ); if ( bucket.isNull() ) return bucket; - b = bucket.btree(); + b = bucket.btree<V>(); } KeyNode kn = b->keyNode( pos ); - if ( key.woCompare( kn.key ) != 0 ) + if ( KeyOwned(key).woCompare( kn.key, o ) != 0 ) return DiskLoc(); return kn.recordLoc; } -} // namespace mongo +} // this->namespace mongo #include "db.h" #include "dbhelpers.h" namespace mongo { - void BtreeBucket::a_test(IndexDetails& id) { - BtreeBucket *b = id.head.btreemod(); + template< class V > + void BtreeBucket<V>::a_test(IndexDetails& id) { + BtreeBucket *b = id.head.btreemod<V>(); // record locs for testing DiskLoc A(1, 20); @@ -1765,139 +1854,9 @@ namespace mongo { // b->dumpTree(id.head, order); } - /* --- BtreeBuilder --- */ - - BtreeBuilder::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) : - dupsAllowed(_dupsAllowed), - idx(_idx), - n(0), - order( idx.keyPattern() ), - ordering( Ordering::make(idx.keyPattern()) ) { - first = cur = BtreeBucket::addBucket(idx); - b = cur.btreemod(); - committed = false; - } - - void BtreeBuilder::newBucket() { - DiskLoc L = BtreeBucket::addBucket(idx); - b->tempNext() = L; - cur = L; - b = cur.btreemod(); - } - - void BtreeBuilder::mayCommitProgressDurably() { - if ( getDur().commitIfNeeded() ) { - b = cur.btreemod(); - } - } - - void BtreeBuilder::addKey(BSONObj& key, DiskLoc loc) { - if ( key.objsize() > KeyMax ) { - problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() - << ' ' << key.objsize() << ' ' << key.toString() << endl; - return; - } - - if( !dupsAllowed ) { - if( n > 0 ) { - int cmp = keyLast.woCompare(key, order); - massert( 10288 , "bad key order in BtreeBuilder - server internal error", cmp <= 0 ); - if( cmp == 0 ) { - //if( !dupsAllowed ) - uasserted( ASSERT_ID_DUPKEY , BtreeBucket::dupKeyError( idx , keyLast ) ); - } - } - keyLast = key; - } - - if ( ! b->_pushBack(loc, key, ordering, DiskLoc()) ) { - // bucket was full - newBucket(); - b->pushBack(loc, key, ordering, DiskLoc()); - } - n++; - mayCommitProgressDurably(); - } - - void BtreeBuilder::buildNextLevel(DiskLoc loc) { - int levels = 1; - while( 1 ) { - if( loc.btree()->tempNext().isNull() ) { - // only 1 bucket at this level. we are done. - getDur().writingDiskLoc(idx.head) = loc; - break; - } - levels++; - - DiskLoc upLoc = BtreeBucket::addBucket(idx); - DiskLoc upStart = upLoc; - BtreeBucket *up = upLoc.btreemod(); - - DiskLoc xloc = loc; - while( !xloc.isNull() ) { - if ( getDur().commitIfNeeded() ) { - b = cur.btreemod(); - up = upLoc.btreemod(); - } - - BtreeBucket *x = xloc.btreemod(); - BSONObj k; - DiskLoc r; - x->popBack(r,k); - bool keepX = ( x->n != 0 ); - DiskLoc keepLoc = keepX ? xloc : x->nextChild; - - if ( ! up->_pushBack(r, k, ordering, keepLoc) ) { - // current bucket full - DiskLoc n = BtreeBucket::addBucket(idx); - up->tempNext() = n; - upLoc = n; - up = upLoc.btreemod(); - up->pushBack(r, k, ordering, keepLoc); - } - - DiskLoc nextLoc = x->tempNext(); // get next in chain at current level - if ( keepX ) { - x->parent = upLoc; - } - else { - if ( !x->nextChild.isNull() ) - x->nextChild.btreemod()->parent = upLoc; - x->deallocBucket( xloc, idx ); - } - xloc = nextLoc; - } - - loc = upStart; - mayCommitProgressDurably(); - } - - if( levels > 1 ) - log(2) << "btree levels: " << levels << endl; - } - - /** when all addKeys are done, we then build the higher levels of the tree */ - void BtreeBuilder::commit() { - buildNextLevel(first); - committed = true; - } - - BtreeBuilder::~BtreeBuilder() { - DESTRUCTOR_GUARD( - if( !committed ) { - log(2) << "Rolling back partially built index space" << endl; - DiskLoc x = first; - while( !x.isNull() ) { - DiskLoc next = x.btree()->tempNext(); - string ns = idx.indexNamespace(); - theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), x.rec(), x); - x = next; - getDur().commitIfNeeded(); - } - assert( idx.head.isNull() ); - log(2) << "done rollback" << endl; - } - ) - } + template class BucketBasics<V0>; + template class BucketBasics<V1>; + template class BtreeBucket<V0>; + template class BtreeBucket<V1>; } diff --git a/db/btree.h b/db/btree.h index 816f2698344..e8d592b2592 100644 --- a/db/btree.h +++ b/db/btree.h @@ -22,6 +22,7 @@ #include "jsobj.h" #include "diskloc.h" #include "pdfile.h" +#include "key.h" namespace mongo { @@ -62,9 +63,14 @@ namespace mongo { * below as legacy btrees. */ - const int BucketSize = 8192; + const int OldBucketSize = 8192; + + // largest key size we allow. note we very much need to support bigger keys (somehow) in the future. + const int KeyMax = OldBucketSize / 10;\ #pragma pack(1) + template< class Version > class BucketBasics; + /** * This is the fixed width data component for storage of a key within a * bucket. It contains an offset pointer to the variable width bson @@ -119,26 +125,7 @@ namespace mongo { return !isUnused(); } }; -#pragma pack() - - class BucketBasics; - - /** - * This is an in memory wrapper for a _KeyNode, and not itself part of btree - * storage. This object and its BSONObj 'key' will become invalid if the - * _KeyNode data that generated it is moved within the btree. In general, - * a KeyNode should not be expected to be valid after a write. - */ - class KeyNode { - public: - KeyNode(const BucketBasics& bb, const _KeyNode &k); - const DiskLoc& prevChildBucket; - const DiskLoc& recordLoc; - /* Points to the bson key storage for a _KeyNode */ - BSONObj key; - }; -#pragma pack(1) /** * This structure represents header data for a btree bucket. An object of * this type is typically allocated inside of a buffer of size BucketSize, @@ -156,7 +143,7 @@ namespace mongo { * b = bson key data * u = unused (old) bson key data, that may be garbage collected */ - class BtreeData { + class BtreeData_V0 { protected: /** Parent bucket of this bucket, which isNull() for the root bucket. */ DiskLoc parent; @@ -168,6 +155,12 @@ namespace mongo { unsigned short _reserved1; int flags; + void _init() { + _reserved1 = 0; + _wasSize = BucketSize; + reserved = 0; + } + /** basicInsert() assumes the next three members are consecutive and in this order: */ /** Size of the empty region. */ @@ -180,8 +173,45 @@ namespace mongo { int reserved; /* Beginning of the bucket's body */ char data[4]; + + public: + typedef KeyBson Key; + typedef KeyBson KeyOwned; + enum { BucketSize = 8192 }; }; + class BtreeData_V1 { + protected: + /** Parent bucket of this bucket, which isNull() for the root bucket. */ + DiskLoc parent; + /** Given that there are n keys, this is the n index child. */ + DiskLoc nextChild; + + unsigned short flags; + + /** basicInsert() assumes the next three members are consecutive and in this order: */ + + /** Size of the empty region. */ + unsigned short emptySize; + /** Size used for bson storage, including storage of old keys. */ + unsigned short topSize; + /* Number of keys in the bucket. */ + unsigned short n; + + /* Beginning of the bucket's body */ + char data[4]; + + void _init() { } + + public: + typedef KeyV1 Key; + typedef KeyV1Owned KeyOwned; + enum { BucketSize = 8192-16 }; // leave room for Record header + }; + + typedef BtreeData_V0 V0; + typedef BtreeData_V1 V1; + /** * This class adds functionality to BtreeData for managing a single bucket. * The following policies are used in an attempt to encourage simplicity: @@ -209,10 +239,28 @@ namespace mongo { * A number of functions below require a thisLoc argument, which must be the * disk location of the bucket mapped to 'this'. */ - class BucketBasics : public BtreeData { - friend class BtreeBuilder; - friend class KeyNode; + template< class Version > + class BucketBasics : public Version { public: + template <class U> friend class BtreeBuilder; + typedef typename Version::Key Key; + + /** + * This is an in memory wrapper for a _KeyNode, and not itself part of btree + * storage. This object and its BSONObj 'key' will become invalid if the + * _KeyNode data that generated it is moved within the btree. In general, + * a KeyNode should not be expected to be valid after a write. + */ + class KeyNode { + public: + KeyNode(const BucketBasics<Version>& bb, const _KeyNode &k); + const DiskLoc& prevChildBucket; + const DiskLoc& recordLoc; + /* Points to the bson key storage for a _KeyNode */ + Key key; + }; + friend class KeyNode; + /** Assert write intent declared for this bucket already. */ void assertWritable(); @@ -225,8 +273,8 @@ namespace mongo { * a KeyNode should not be expected to be valid after a write. */ const KeyNode keyNode(int i) const { - if ( i >= n ) { - massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << n ).jsonString() , i < n ); + if ( i >= this->n ) { + massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << this->n ).jsonString() , i < this->n ); } return KeyNode(*this, k(i)); } @@ -235,14 +283,15 @@ namespace mongo { const BucketBasics *d = 0; return (char*)&(d->data) - (char*)&(d->parent); } - static int bodySize() { return BucketSize - headerSize(); } + static int bodySize() { return Version::BucketSize - headerSize(); } + static int lowWaterMark() { return bodySize() / 2 - KeyMax - sizeof( _KeyNode ) + 1; } // see comment in btree.cpp // for testing - int nKeys() const { return n; } - const DiskLoc getNextChild() const { return nextChild; } + int nKeys() const { return this->n; } + const DiskLoc getNextChild() const { return this->nextChild; } protected: - char * dataAt(short ofs) { return data + ofs; } + char * dataAt(short ofs) { return this->data + ofs; } /** Initialize the header for a new node. */ void init(); @@ -263,7 +312,7 @@ namespace mongo { * Although this function is marked const, it modifies the underlying * btree representation through an optimized write intent mechanism. */ - bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const; + bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const Key& key, const Ordering &order) const; /** * Preconditions: @@ -276,8 +325,8 @@ namespace mongo { * Importantly, nextChild is not updated! * - Otherwise false is returned and there is no change. */ - bool _pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild); - void pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) { + bool _pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild); + void pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild) { bool ok = _pushBack( recordLoc , key , order , prevChild ); assert(ok); } @@ -302,7 +351,7 @@ namespace mongo { * - The last key of the bucket is removed, and its key and recLoc are * returned. As mentioned above, the key points to unallocated memory. */ - void popBack(DiskLoc& recLoc, BSONObj& key); + void popBack(DiskLoc& recLoc, Key& key); /** * Preconditions: @@ -324,8 +373,8 @@ namespace mongo { enum Flags { Packed=1 }; /** n == 0 is ok */ - const DiskLoc& childForPos(int p) const { return p == n ? nextChild : k(p).prevChildBucket; } - DiskLoc& childForPos(int p) { return p == n ? nextChild : k(p).prevChildBucket; } + const DiskLoc& childForPos(int p) const { return p == this->n ? this->nextChild : k(p).prevChildBucket; } + DiskLoc& childForPos(int p) { return p == this->n ? this->nextChild : k(p).prevChildBucket; } /** Same as bodySize(). */ int totalDataSize() const; @@ -356,8 +405,8 @@ namespace mongo { /** @return the size the bucket's body would have if we were to call pack() */ int packedDataSize( int refPos ) const; - void setNotPacked() { flags &= ~Packed; } - void setPacked() { flags |= Packed; } + void setNotPacked() { this->flags &= ~Packed; } + void setPacked() { this->flags |= Packed; } /** * Preconditions: 'bytes' is <= emptySize * Postconditions: A buffer of size 'bytes' is allocated on the top side, @@ -399,15 +448,18 @@ namespace mongo { * BtreeBuilder uses the parent var as a temp place to maintain a linked list chain. * we use tempNext() when we do that to be less confusing. (one might have written a union in C) */ - const DiskLoc& tempNext() const { return parent; } - DiskLoc& tempNext() { return parent; } + const DiskLoc& tempNext() const { return this->parent; } + DiskLoc& tempNext() { return this->parent; } void _shape(int level, stringstream&) const; int Size() const; /** @return i-indexed _KeyNode, without bounds checking */ - const _KeyNode& k(int i) const { return ((const _KeyNode*)data)[i]; } - _KeyNode& k(int i) { return ((_KeyNode*)data)[i]; } + public: + const _KeyNode& k(int i) const { return ((const _KeyNode*)this->data)[i]; } + _KeyNode& _k(int i) { return ((_KeyNode*)this->data)[i]; } + protected: + _KeyNode& k(int i) { return ((_KeyNode*)this->data)[i]; } /** * Preconditions: 'this' is packed @@ -437,7 +489,7 @@ namespace mongo { * - The specified key is set at index i, replacing the existing * _KeyNode data and without shifting any other _KeyNode objects. */ - void setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket ); + void setKey( int i, const DiskLoc recordLoc, const Key& key, const DiskLoc prevChildBucket ); }; /** @@ -468,14 +520,25 @@ namespace mongo { * behave correctly if the 'this' pointer is suddenly invalidated by a * callee. */ - class BtreeBucket : public BucketBasics { + template< class V > + class BtreeBucket : public BucketBasics<V> { friend class BtreeCursor; public: - bool isHead() const { return parent.isNull(); } + // make compiler happy: + typedef typename V::Key Key; + typedef typename V::KeyOwned KeyOwned; + typedef typename BucketBasics<V>::KeyNode KeyNode; + const _KeyNode& k(int i) const { return static_cast< const BucketBasics<V> * >(this)->k(i); } + protected: + _KeyNode& k(int i) { return static_cast< BucketBasics<V> * >(this)->_k(i); } + public: + const KeyNode keyNode(int i) const { return static_cast< const BucketBasics<V> * >(this)->keyNode(i); } + + bool isHead() const { return this->parent.isNull(); } void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const; long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount = 0, bool strict = false) const; /* traverses everything */ - bool isUsed( int i ) const { return k(i).isUsed(); } + bool isUsed( int i ) const { return this->k(i).isUsed(); } string bucketSummary() const; void dump() const; @@ -486,7 +549,9 @@ namespace mongo { * BSONObj order = ((IndexDetails&)idx).keyPattern(); * likewise below in bt_insert() etc. */ - bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const; + private: + bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const Key& key, const Ordering& order) const; + public: /** * @param self - Don't complain about ourself already being in the index case. @@ -494,7 +559,7 @@ namespace mongo { */ bool wouldCreateDup( const IndexDetails& idx, const DiskLoc &thisLoc, - const BSONObj& key, const Ordering& order, + const Key& key, const Ordering& order, const DiskLoc &self) const; /** @@ -514,9 +579,6 @@ namespace mongo { */ void deallocBucket(const DiskLoc thisLoc, const IndexDetails &id); - /** Renames the index namespace for this btree's index. */ - static void renameIndexNamespace(const char *oldNs, const char *newNs); - /** * Preconditions: * - 'key' has a valid schema for this index. @@ -555,11 +617,14 @@ namespace mongo { */ DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const; + DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const Key& key, const Ordering &order, + int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const; /** * find the first instance of the key * does not handle dups - * returned DiskLoc isNull if can't find anything with that + * WARNING: findSingle may not be compound index safe. this may need to change. see notes in + * findSingle code. * @return the record location of the first match */ DiskLoc findSingle( const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const; @@ -584,7 +649,6 @@ namespace mongo { static void a_test(IndexDetails&); - static int getLowWaterMark(); static int getKeyMax(); protected: @@ -668,8 +732,8 @@ namespace mongo { * indexes greater than split in lchild are moved to rchild. */ void doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split, - BtreeBucket *l, const DiskLoc lchild, - BtreeBucket *r, const DiskLoc rchild, + BtreeBucket<V> *l, const DiskLoc lchild, + BtreeBucket<V> *r, const DiskLoc rchild, IndexDetails &id, const Ordering &order ); /** * Preconditions: @@ -685,8 +749,8 @@ namespace mongo { * lchild. */ void doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split, - BtreeBucket *l, const DiskLoc lchild, - BtreeBucket *r, const DiskLoc rchild, + BtreeBucket<V> *l, const DiskLoc lchild, + BtreeBucket<V> *r, const DiskLoc rchild, IndexDetails &id, const Ordering &order ); /** @@ -735,15 +799,15 @@ namespace mongo { */ int indexInParent( const DiskLoc &thisLoc ) const; - BSONObj keyAt(int keyOfs) const { - return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key; + Key keyAt(int keyOfs) const { + return keyOfs >= this->n ? Key() : this->keyNode(keyOfs).key; } /** * Allocate a temporary btree bucket in ram rather than in memory mapped * storage. The caller must release this bucket with free(). */ - static BtreeBucket* allocTemp(); + static BtreeBucket<V> * allocTemp(); /** * Preconditions: @@ -759,7 +823,7 @@ namespace mongo { * Splitting may occur recursively, possibly changing the tree head. */ void split(const DiskLoc thisLoc, int keypos, - const DiskLoc recordLoc, const BSONObj& key, + const DiskLoc recordLoc, const Key& key, const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx); /** @@ -779,15 +843,15 @@ namespace mongo { * it commonly relies on the specialized write intent mechanism of basicInsert(). */ void insertHere(const DiskLoc thisLoc, int keypos, - const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, + const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx) const; /** bt_insert() is basically just a wrapper around this. */ int _insert(const DiskLoc thisLoc, const DiskLoc recordLoc, - const BSONObj& key, const Ordering &order, bool dupsAllowed, + const Key& key, const Ordering &order, bool dupsAllowed, const DiskLoc lChild, const DiskLoc rChild, IndexDetails &idx) const; - bool find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const; + bool find(const IndexDetails& idx, const Key& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const; bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const; static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey); static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction ); @@ -810,7 +874,7 @@ namespace mongo { * - childForPos( keypos ) will be orphaned. */ void setInternalKey( const DiskLoc thisLoc, int keypos, - const DiskLoc recordLoc, const BSONObj &key, const Ordering &order, + const DiskLoc recordLoc, const Key &key, const Ordering &order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx); /** @@ -829,7 +893,7 @@ namespace mongo { void deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order ); public: /** simply builds and returns a dup key error message string */ - static string dupKeyError( const IndexDetails& idx , const BSONObj& key ); + static string dupKeyError( const IndexDetails& idx , const Key& key ); }; #pragma pack() @@ -837,10 +901,15 @@ namespace mongo { class FieldRangeVectorIterator; class BtreeCursor : public Cursor { - public: + protected: BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction ); BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ); + public: virtual ~BtreeCursor(); + /** makes an appropriate subclass depending on the index version */ + static BtreeCursor* make( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction ); + static BtreeCursor* make( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ); + virtual bool ok() { return !bucket.isNull(); } virtual bool advance(); virtual void noteLocation(); // updates keyAtKeyOfs... @@ -868,16 +937,18 @@ namespace mongo { const _KeyNode& _currKeyNode() const { assert( !bucket.isNull() ); - const _KeyNode& kn = bucket.btree()->k(keyOfs); + const _KeyNode& kn = keyNode(keyOfs); assert( kn.isUsed() ); return kn; } - const KeyNode currKeyNode() const { + +// const BSONObj currKey() const; + /*const KeyNode currKeyNode() const { assert( !bucket.isNull() ); return bucket.btree()->keyNode(keyOfs); - } + }*/ - virtual BSONObj currKey() const { return currKeyNode().key; } + virtual BSONObj currKey() const = 0; // { return currKeyNode().key.toBson(); } virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); } virtual void aboutToDeleteBucket(const DiskLoc& b) { @@ -885,7 +956,7 @@ namespace mongo { keyOfs = -1; } - virtual DiskLoc currLoc() { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); } + virtual DiskLoc currLoc() = 0; // { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); } virtual DiskLoc refLoc() { return currLoc(); } virtual Record* _current() { return currLoc().rec(); } virtual BSONObj current() { return BSONObj(_current()); } @@ -908,7 +979,7 @@ namespace mongo { /** for debugging only */ const DiskLoc getBucket() const { return bucket; } - private: + protected: /** * Our btrees may (rarely) have "unused" keys when items are deleted. * Skip past them. @@ -921,14 +992,18 @@ namespace mongo { /** selective audits on construction */ void audit(); + virtual void _audit() = 0; + virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc) = 0; + virtual const _KeyNode& keyNode(int keyOfs) const = 0; + virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) = 0; + virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) = 0; + /** set initial bucket */ void init(); /** if afterKey is true, we want the first key with values of the keyBegin fields greater than keyBegin */ void advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive ); - friend class BtreeBucket; - set<DiskLoc> _dups; NamespaceDetails * const d; const int idxNo; @@ -952,55 +1027,16 @@ namespace mongo { long long _nscanned; }; + /** Renames the index namespace for this btree's index. */ + void renameIndexNamespace(const char *oldNs, const char *newNs); - inline bool IndexDetails::hasKey(const BSONObj& key) { + /*inline bool IndexDetails::hasKey(const BSONObj& key) { return head.btree()->exists(*this, head, key, Ordering::make(keyPattern())); - } - inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) { - return head.btree()->wouldCreateDup(*this, head, key, Ordering::make(keyPattern()), self); - } - - /** - * build btree from the bottom up - * _ TODO dropDups - */ - class BtreeBuilder { - bool dupsAllowed; - IndexDetails& idx; - /** Number of keys added to btree. */ - unsigned long long n; - /** Last key passed to addKey(). */ - BSONObj keyLast; - BSONObj order; - Ordering ordering; - /** true iff commit() completed successfully. */ - bool committed; - - DiskLoc cur, first; - BtreeBucket *b; - - void newBucket(); - void buildNextLevel(DiskLoc); - void mayCommitProgressDurably(); - - public: - ~BtreeBuilder(); - - BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx); - - /** - * Preconditions: 'key' is > or >= last key passed to this function (depends on _dupsAllowed) - * Postconditions: 'key' is added to intermediate storage. - */ - void addKey(BSONObj& key, DiskLoc loc); + }*/ - /** - * commit work. if not called, destructor will clean up partially completed work - * (in case exception has happened). - */ - void commit(); - - unsigned long long getn() { return n; } - }; + /** @param self location of our own record. self doesn't count as a dup. */ + /*inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) { + return head.btree()->wouldCreateDup(*this, head, KeyOwned(key), Ordering::make(keyPattern()), self); + }*/ } // namespace mongo; diff --git a/db/btreebuilder.cpp b/db/btreebuilder.cpp new file mode 100644 index 00000000000..38678b01d95 --- /dev/null +++ b/db/btreebuilder.cpp @@ -0,0 +1,184 @@ +// btreebuilder.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "db.h" +#include "btree.h" +#include "pdfile.h" +#include "json.h" +#include "clientcursor.h" +#include "client.h" +#include "dbhelpers.h" +#include "curop-inl.h" +#include "stats/counters.h" +#include "dur_commitjob.h" +#include "btreebuilder.h" + +namespace mongo { + + /* --- BtreeBuilder --- */ + + template<class V> + BtreeBuilder<V>::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) : + dupsAllowed(_dupsAllowed), + idx(_idx), + n(0), + order( idx.keyPattern() ), + ordering( Ordering::make(idx.keyPattern()) ) { + first = cur = BtreeBucket<V>::addBucket(idx); + b = cur.btreemod<V>(); + committed = false; + } + + template<class V> + void BtreeBuilder<V>::newBucket() { + DiskLoc L = BtreeBucket<V>::addBucket(idx); + b->tempNext() = L; + cur = L; + b = cur.btreemod<V>(); + } + + template<class V> + void BtreeBuilder<V>::mayCommitProgressDurably() { + if ( getDur().commitIfNeeded() ) { + b = cur.btreemod<V>(); + } + } + + template<class V> + void BtreeBuilder<V>::addKey(BSONObj& _key, DiskLoc loc) { + KeyOwned key(_key); + + if ( key.dataSize() > KeyMax ) { + problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() + << ' ' << key.dataSize() << ' ' << key.toString() << endl; + return; + } + + if( !dupsAllowed ) { + if( n > 0 ) { + int cmp = keyLast.woCompare(key, ordering); + massert( 10288 , "bad key order in BtreeBuilder - server internal error", cmp <= 0 ); + if( cmp == 0 ) { + //if( !dupsAllowed ) + uasserted( ASSERT_ID_DUPKEY , BtreeBucket<V>::dupKeyError( idx , keyLast ) ); + } + } + keyLast = key; + } + + if ( ! b->_pushBack(loc, key, ordering, DiskLoc()) ) { + // bucket was full + newBucket(); + b->pushBack(loc, key, ordering, DiskLoc()); + } + n++; + mayCommitProgressDurably(); + } + + template<class V> + void BtreeBuilder<V>::buildNextLevel(DiskLoc loc) { + int levels = 1; + while( 1 ) { + if( loc.btree<V>()->tempNext().isNull() ) { + // only 1 bucket at this level. we are done. + getDur().writingDiskLoc(idx.head) = loc; + break; + } + levels++; + + DiskLoc upLoc = BtreeBucket<V>::addBucket(idx); + DiskLoc upStart = upLoc; + BtreeBucket<V> *up = upLoc.btreemod<V>(); + + DiskLoc xloc = loc; + while( !xloc.isNull() ) { + if ( getDur().commitIfNeeded() ) { + b = cur.btreemod<V>(); + up = upLoc.btreemod<V>(); + } + + BtreeBucket<V> *x = xloc.btreemod<V>(); + Key k; + DiskLoc r; + x->popBack(r,k); + bool keepX = ( x->n != 0 ); + DiskLoc keepLoc = keepX ? xloc : x->nextChild; + + if ( ! up->_pushBack(r, k, ordering, keepLoc) ) { + // current bucket full + DiskLoc n = BtreeBucket<V>::addBucket(idx); + up->tempNext() = n; + upLoc = n; + up = upLoc.btreemod<V>(); + up->pushBack(r, k, ordering, keepLoc); + } + + DiskLoc nextLoc = x->tempNext(); // get next in chain at current level + if ( keepX ) { + x->parent = upLoc; + } + else { + if ( !x->nextChild.isNull() ) { + DiskLoc ll = x->nextChild; + ll.btreemod<V>()->parent = upLoc; + //(x->nextChild.btreemod<V>())->parent = upLoc; + } + x->deallocBucket( xloc, idx ); + } + xloc = nextLoc; + } + + loc = upStart; + mayCommitProgressDurably(); + } + + if( levels > 1 ) + log(2) << "btree levels: " << levels << endl; + } + + /** when all addKeys are done, we then build the higher levels of the tree */ + template<class V> + void BtreeBuilder<V>::commit() { + buildNextLevel(first); + committed = true; + } + + template<class V> + BtreeBuilder<V>::~BtreeBuilder() { + DESTRUCTOR_GUARD( + if( !committed ) { + log(2) << "Rolling back partially built index space" << endl; + DiskLoc x = first; + while( !x.isNull() ) { + DiskLoc next = x.btree<V>()->tempNext(); + string ns = idx.indexNamespace(); + theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), x.rec(), x); + x = next; + getDur().commitIfNeeded(); + } + assert( idx.head.isNull() ); + log(2) << "done rollback" << endl; + } + ) + } + + template class BtreeBuilder<V0>; + template class BtreeBuilder<V1>; + +} diff --git a/db/btreebuilder.h b/db/btreebuilder.h new file mode 100644 index 00000000000..9a7f4bfb808 --- /dev/null +++ b/db/btreebuilder.h @@ -0,0 +1,53 @@ +#pragma once + +#include "btree.h" + +namespace mongo { + + /** + * build btree from the bottom up + */ + template< class V > + class BtreeBuilder { + typedef typename V::KeyOwned KeyOwned; + typedef typename V::Key Key; + + bool dupsAllowed; + IndexDetails& idx; + /** Number of keys added to btree. */ + unsigned long long n; + /** Last key passed to addKey(). */ + typename V::Key keyLast; + BSONObj order; + Ordering ordering; + /** true iff commit() completed successfully. */ + bool committed; + + DiskLoc cur, first; + BtreeBucket<V> *b; + + void newBucket(); + void buildNextLevel(DiskLoc); + void mayCommitProgressDurably(); + + public: + ~BtreeBuilder(); + + BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx); + + /** + * Preconditions: 'key' is > or >= last key passed to this function (depends on _dupsAllowed) + * Postconditions: 'key' is added to intermediate storage. + */ + void addKey(BSONObj& key, DiskLoc loc); + + /** + * commit work. if not called, destructor will clean up partially completed work + * (in case exception has happened). + */ + void commit(); + + unsigned long long getn() { return n; } + }; + +} diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp index e1423fca7d7..50686d346b5 100644 --- a/db/btreecursor.cpp +++ b/db/btreecursor.cpp @@ -27,6 +27,161 @@ namespace mongo { extern int otherTraceLevel; + template< class V > + class BtreeCursorImpl : public BtreeCursor { + public: + typedef typename BucketBasics<V>::KeyNode KeyNode; + typedef typename V::Key Key; + + BtreeCursorImpl(NamespaceDetails *a, int b, const IndexDetails& c, const BSONObj &d, const BSONObj &e, bool f, int g) : + BtreeCursor(a,b,c,d,e,f,g) { } + BtreeCursorImpl(NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction) : + BtreeCursor(_d,_idxNo,_id,_bounds,_direction) + { + pair< DiskLoc, int > noBestParent; + indexDetails.head.btree<V>()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent ); + skipAndCheck(); + dassert( _dups.size() == 0 ); + } + + virtual DiskLoc currLoc() { + if( bucket.isNull() ) return DiskLoc(); + return currKeyNode().recordLoc; + } + + virtual BSONObj currKey() const { + assert( !bucket.isNull() ); + return bucket.btree<V>()->keyNode(keyOfs).key.toBson(); + } + + protected: + virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) { + thisLoc.btree<V>()->advanceTo(thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction); + } + virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) { + return thisLoc.btree<V>()->advance(thisLoc, keyOfs, direction, caller); + } + virtual void _audit() { + if ( otherTraceLevel >= 200 ) { + out() << "BtreeCursor() qtl>200. validating entire index." << endl; + indexDetails.head.btree<V>()->fullValidate(indexDetails.head, _order); + } + else { + out() << "BtreeCursor(). dumping head bucket" << endl; + indexDetails.head.btree<V>()->dump(); + } + } + virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc) { + bool found; + return indexDetails.head.btree<V>()-> + locate(indexDetails, indexDetails.head, key, _ordering, keyOfs, found, loc, _direction); + } + + const _KeyNode& keyNode(int keyOfs) const { + return bucket.btree<V>()->k(keyOfs); + } + + private: + const KeyNode currKeyNode() const { + assert( !bucket.isNull() ); + const BtreeBucket<V> *b = bucket.btree<V>(); + return b->keyNode(keyOfs); + } + }; + + template class BtreeCursorImpl<V0>; + template class BtreeCursorImpl<V1>; + + /* + class BtreeCursorV1 : public BtreeCursor { + public: + typedef BucketBasics<V1>::KeyNode KeyNode; + typedef V1::Key Key; + + BtreeCursorV1(NamespaceDetails *a, int b, const IndexDetails& c, const BSONObj &d, const BSONObj &e, bool f, int g) : + BtreeCursor(a,b,c,d,e,f,g) { } + BtreeCursorV1(NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction) : + BtreeCursor(_d,_idxNo,_id,_bounds,_direction) + { + pair< DiskLoc, int > noBestParent; + indexDetails.head.btree<V1>()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent ); + skipAndCheck(); + dassert( _dups.size() == 0 ); + } + + virtual DiskLoc currLoc() { + if( bucket.isNull() ) return DiskLoc(); + return currKeyNode().recordLoc; + } + + virtual BSONObj currKey() const { + assert( !bucket.isNull() ); + return bucket.btree<V1>()->keyNode(keyOfs).key.toBson(); + } + + protected: + virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) { + thisLoc.btree<V1>()->advanceTo(thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction); + } + virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) { + return thisLoc.btree<V1>()->advance(thisLoc, keyOfs, direction, caller); + } + virtual void _audit() { + if ( otherTraceLevel >= 200 ) { + out() << "BtreeCursor() qtl>200. validating entire index." << endl; + indexDetails.head.btree<V1>()->fullValidate(indexDetails.head, _order); + } + else { + out() << "BtreeCursor(). dumping head bucket" << endl; + indexDetails.head.btree<V1>()->dump(); + } + } + virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc); + virtual const _KeyNode& keyNode(int keyOfs) { + return bucket.btree<V1>()->k(keyOfs); + } + + private: + const KeyNode currKeyNode() const { + assert( !bucket.isNull() ); + const BtreeBucket<V1> *b = bucket.btree<V1>(); + return b->keyNode(keyOfs); + } + };*/ + + BtreeCursor* BtreeCursor::make( + NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, + const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction) + { + int v = _id.version(); + BtreeCursor *c = 0; + if( v == 1 ) { + c = new BtreeCursorImpl<V1>(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); + } + else if( v == 0 ) { + c = new BtreeCursorImpl<V0>(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); + } + else { + uasserted(14800, str::stream() << "unsupported index version " << v); + } + c->init(); + dassert( c->_dups.size() == 0 ); + return c; + } + + BtreeCursor* BtreeCursor::make( + NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, + const shared_ptr< FieldRangeVector > &_bounds, int _direction ) + { + int v = _id.version(); + if( v == 1 ) + return new BtreeCursorImpl<V1>(_d,_idxNo,_id,_bounds,_direction); + if( v == 0 ) + return new BtreeCursorImpl<V0>(_d,_idxNo,_id,_bounds,_direction); + uasserted(14801, str::stream() << "unsupported index version " << v); + return 0; + } + BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails &_id, const BSONObj &_startKey, const BSONObj &_endKey, bool endKeyInclusive, int _direction ) : d(_d), idxNo(_idxNo), @@ -42,8 +197,6 @@ namespace mongo { _independentFieldRanges( false ), _nscanned( 0 ) { audit(); - init(); - dassert( _dups.size() == 0 ); } BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ) @@ -65,12 +218,8 @@ namespace mongo { startKey = _bounds->startKey(); _boundsIterator->advance( startKey ); // handles initialization _boundsIterator->prepDive(); - pair< DiskLoc, int > noBestParent; bucket = indexDetails.head; keyOfs = 0; - indexDetails.head.btree()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent ); - skipAndCheck(); - dassert( _dups.size() == 0 ); } /** Properly destroy forward declared class members. */ @@ -78,16 +227,8 @@ namespace mongo { void BtreeCursor::audit() { dassert( d->idxNo((IndexDetails&) indexDetails) == idxNo ); - if ( otherTraceLevel >= 12 ) { - if ( otherTraceLevel >= 200 ) { - out() << "::BtreeCursor() qtl>200. validating entire index." << endl; - indexDetails.head.btree()->fullValidate(indexDetails.head, _order); - } - else { - out() << "BTreeCursor(). dumping head bucket" << endl; - indexDetails.head.btree()->dump(); - } + _audit(); } } @@ -96,9 +237,7 @@ namespace mongo { startKey = _spec.getType()->fixKey( startKey ); endKey = _spec.getType()->fixKey( endKey ); } - bool found; - bucket = indexDetails.head.btree()-> - locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, _direction > 0 ? minDiskLoc : maxDiskLoc, _direction); + bucket = _locate(startKey, _direction > 0 ? minDiskLoc : maxDiskLoc); if ( ok() ) { _nscanned = 1; } @@ -123,7 +262,7 @@ namespace mongo { if ( !ok() ) { return false; } - int ret = _boundsIterator->advance( currKeyNode().key ); + int ret = _boundsIterator->advance( currKey() ); if ( ret == -2 ) { bucket = DiskLoc(); return false; @@ -133,7 +272,7 @@ namespace mongo { return false; } ++_nscanned; - advanceTo( currKeyNode().key, ret, _boundsIterator->after(), _boundsIterator->cmp(), _boundsIterator->inc() ); + advanceTo( currKey(), ret, _boundsIterator->after(), _boundsIterator->cmp(), _boundsIterator->inc() ); return true; } @@ -143,11 +282,10 @@ namespace mongo { while ( 1 ) { if ( !ok() ) break; - const BtreeBucket *b = bucket.btree(); - const _KeyNode& kn = b->k(keyOfs); + const _KeyNode& kn = keyNode(keyOfs); if ( kn.isUsed() ) break; - bucket = b->advance(bucket, keyOfs, _direction, "skipUnusedKeys"); + bucket = _advance(bucket, keyOfs, _direction, "skipUnusedKeys"); u++; //don't include unused keys in nscanned //++_nscanned; @@ -180,7 +318,7 @@ namespace mongo { } void BtreeCursor::advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive) { - bucket.btree()->advanceTo( bucket, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, _ordering, _direction ); + _advanceTo( bucket, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, _ordering, _direction ); } bool BtreeCursor::advance() { @@ -188,7 +326,7 @@ namespace mongo { if ( bucket.isNull() ) return false; - bucket = bucket.btree()->advance(bucket, keyOfs, _direction, "BtreeCursor::advance"); + bucket = _advance(bucket, keyOfs, _direction, "BtreeCursor::advance"); if ( !_independentFieldRanges ) { skipUnusedKeys( false ); @@ -205,9 +343,9 @@ namespace mongo { void BtreeCursor::noteLocation() { if ( !eof() ) { - BSONObj o = bucket.btree()->keyAt(keyOfs).copy(); + BSONObj o = currKey().getOwned(); keyAtKeyOfs = o; - locAtKeyOfs = bucket.btree()->k(keyOfs).recordLoc; + locAtKeyOfs = currLoc(); } } @@ -226,18 +364,18 @@ namespace mongo { _multikey = d->isMultikey(idxNo); - if ( keyOfs >= 0 ) { - const BtreeBucket *b = bucket.btree(); + BSONObj _keyAtKeyOfs(keyAtKeyOfs); + if ( keyOfs >= 0 ) { assert( !keyAtKeyOfs.isEmpty() ); // Note keyAt() returns an empty BSONObj if keyOfs is now out of range, // which is possible as keys may have been deleted. int x = 0; while( 1 ) { - if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && - b->k(keyOfs).recordLoc == locAtKeyOfs ) { - if ( !b->k(keyOfs).isUsed() ) { + if( currKey().woEqual(keyAtKeyOfs) && currLoc() == locAtKeyOfs ) { + + if ( keyNode(keyOfs).isUsed() ) { /* we were deleted but still exist as an unused marker key. advance. */ @@ -259,11 +397,9 @@ namespace mongo { valid and we must refind where we left off (which is expensive) */ - bool found; - /* TODO: Switch to keep indexdetails and do idx.head! */ - bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, _ordering, keyOfs, found, locAtKeyOfs, _direction); - RARELY log() << " key seems to have moved in the index, refinding. found:" << found << endl; + bucket = _locate(_keyAtKeyOfs, locAtKeyOfs); + RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl; if ( ! bucket.isNull() ) skipUnusedKeys( false ); diff --git a/db/cap.cpp b/db/cap.cpp index 3e4d6a22860..92f97a494b9 100644 --- a/db/cap.cpp +++ b/db/cap.cpp @@ -442,7 +442,7 @@ namespace mongo { for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) { DiskLoc prev = ext.ext()->xprev; DiskLoc next = ext.ext()->xnext; - DiskLoc empty = ext.ext()->reuse( ns ); + DiskLoc empty = ext.ext()->reuse( ns, true ); ext.ext()->xprev.writing() = prev; ext.ext()->xnext.writing() = next; addDeletedRec( empty.drec(), empty ); diff --git a/db/db.vcxproj b/db/db.vcxproj index 42e7d52c9c7..e4801a4b707 100644 --- a/db/db.vcxproj +++ b/db/db.vcxproj @@ -475,6 +475,7 @@ <ClCompile Include="..\util\stringutils.cpp" />
<ClCompile Include="..\util\text.cpp" />
<ClCompile Include="..\util\version.cpp" />
+ <ClCompile Include="btreebuilder.cpp" />
<ClCompile Include="cap.cpp" />
<ClCompile Include="commands\distinct.cpp" />
<ClCompile Include="commands\group.cpp" />
@@ -491,6 +492,7 @@ <ClCompile Include="dur_writetodatafiles.cpp" />
<ClCompile Include="geo\2d.cpp" />
<ClCompile Include="geo\haystack.cpp" />
+ <ClCompile Include="key.cpp" />
<ClCompile Include="mongommf.cpp" />
<ClCompile Include="oplog.cpp" />
<ClCompile Include="projection.cpp" />
@@ -627,6 +629,11 @@ <None Include="repl\notes.txt" />
</ItemGroup>
<ItemGroup>
+ <ClInclude Include="..\bson\bson-inl.h" />
+ <ClInclude Include="..\bson\bson.h" />
+ <ClInclude Include="..\bson\bson_db.h" />
+ <ClInclude Include="..\bson\inline_decls.h" />
+ <ClInclude Include="..\bson\stringdata.h" />
<ClInclude Include="..\client\dbclientcursor.h" />
<ClInclude Include="..\client\distlock.h" />
<ClInclude Include="..\client\gridfs.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters index df17f803a63..50788598e6b 100755 --- a/db/db.vcxproj.filters +++ b/db/db.vcxproj.filters @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
@@ -157,6 +157,8 @@ <ClCompile Include="..\util\file_allocator.cpp" />
<ClCompile Include="querypattern.cpp" />
<ClCompile Include="..\util\ramlog.cpp" />
+ <ClCompile Include="key.cpp" />
+ <ClCompile Include="btreebuilder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\dbclientcursor.h" />
@@ -296,6 +298,11 @@ <ClInclude Include="..\util\concurrency\race.h" />
<ClInclude Include="..\util\alignedbuilder.h" />
<ClInclude Include="queryutil.h" />
+ <ClInclude Include="..\bson\bson.h" />
+ <ClInclude Include="..\bson\bson_db.h" />
+ <ClInclude Include="..\bson\bson-inl.h" />
+ <ClInclude Include="..\bson\inline_decls.h" />
+ <ClInclude Include="..\bson\stringdata.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc" />
diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index 0fcedd63555..30eb068bcca 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -15,6 +15,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/* SHARDING: + I believe this file is for mongod only. + See s/commnands_public.cpp for mongos. +*/ + #include "pch.h" #include "query.h" #include "pdfile.h" @@ -1105,7 +1110,7 @@ namespace mongo { if ( idx == 0 ) return false; - c.reset( new BtreeCursor( d, d->idxNo(*idx), *idx, min, max, false, 1 ) ); + c.reset( BtreeCursor::make( d, d->idxNo(*idx), *idx, min, max, false, 1 ) ); } long long avgObjSize = d->stats.datasize / d->stats.nrecords; @@ -1180,7 +1185,8 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { - help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024"; + help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n" + " avgObjSize - in bytes"; } bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + jsobj.firstElement().valuestr(); @@ -1520,7 +1526,7 @@ namespace mongo { int idNum = nsd->findIdIndex(); if ( idNum >= 0 ) { - cursor.reset( new BtreeCursor( nsd , idNum , nsd->idx( idNum ) , BSONObj() , BSONObj() , false , 1 ) ); + cursor.reset( BtreeCursor::make( nsd , idNum , nsd->idx( idNum ) , BSONObj() , BSONObj() , false , 1 ) ); } else if ( c.find( ".system." ) != string::npos ) { continue; @@ -1574,16 +1580,13 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "internal testing command. Makes db block (in a read lock) for 100 seconds\n"; - help << "w:true write lock"; + help << "w:true write lock. secs:<seconds>"; } CmdSleep() : Command("sleep") { } bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - - int secs = 100; if ( cmdObj["secs"].isNumber() ) secs = cmdObj["secs"].numberInt(); - if( cmdObj.getBoolField("w") ) { writelock lk(""); sleepsecs(secs); @@ -1592,7 +1595,6 @@ namespace mongo { readlock lk(""); sleepsecs(secs); } - return true; } } cmdSleep; diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp index c8e421c0080..31c02ddd572 100644 --- a/db/dbcommands_admin.cpp +++ b/db/dbcommands_admin.cpp @@ -356,7 +356,7 @@ namespace mongo { NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { IndexDetails& id = i.next(); - long long keys = id.head.btree()->fullValidate(id.head, id.keyPattern()); + long long keys = id.idxInterface().fullValidate(id.head, id.keyPattern()); indexes.appendNumber(id.indexNamespace(), keys); } result.append("keysPerIndex", indexes.done()); diff --git a/db/dbhelpers.cpp b/db/dbhelpers.cpp index 5fc908e9c0b..fa51773f30e 100644 --- a/db/dbhelpers.cpp +++ b/db/dbhelpers.cpp @@ -148,7 +148,7 @@ namespace mongo { BSONObj key = i.getKeyFromQuery( query ); - DiskLoc loc = i.head.btree()->findSingle( i , i.head , key ); + DiskLoc loc = i.idxInterface().findSingle(i , i.head , key); if ( loc.isNull() ) return false; result = loc.obj(); @@ -160,7 +160,7 @@ namespace mongo { uassert(13430, "no _id index", idxNo>=0); IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( idquery ); - return i.head.btree()->findSingle( i , i.head , key ); + return i.idxInterface().findSingle(i , i.head , key); } bool Helpers::isEmpty(const char *ns, bool doAuth) { @@ -253,7 +253,7 @@ namespace mongo { IndexDetails& i = nsd->idx( ii ); - shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); + shared_ptr<Cursor> c( BtreeCursor::make( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); diff --git a/db/diskloc.h b/db/diskloc.h index f356c73c64b..7f2320ec452 100644 --- a/db/diskloc.h +++ b/db/diskloc.h @@ -29,9 +29,10 @@ namespace mongo { class Record; class DeletedRecord; class Extent; - class BtreeBucket; class MongoDataFile; + template< class Version > class BtreeBucket; + #pragma pack(1) /** represents a disk location/offset on disk in a database. 64 bits. it is assumed these will be passed around by value a lot so don't do anything to make them large @@ -139,9 +140,13 @@ namespace mongo { Record* rec() const; DeletedRecord* drec() const; Extent* ext() const; - const BtreeBucket* btree() const; + + template< class V > + const BtreeBucket<V> * btree() const; + // Explicitly signals we are writing and casts away const - BtreeBucket* btreemod() const; + template< class V > + BtreeBucket<V> * btreemod() const; /*MongoDataFile& pdf() const;*/ }; @@ -158,7 +158,7 @@ namespace mongo { */ Record* writing(Record* r); /** Intentionally unimplemented method. BtreeBuckets are allocated in buffers larger than sizeof( BtreeBucket ). */ - BtreeBucket* writing( BtreeBucket* ); +// BtreeBucket* writing( BtreeBucket* ); /** Intentionally unimplemented method. NamespaceDetails may be based on references to 'Extra' objects. */ NamespaceDetails* writing( NamespaceDetails* ); diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp index 8246057756c..38833ce71e4 100644 --- a/db/geo/2d.cpp +++ b/db/geo/2d.cpp @@ -31,6 +31,12 @@ namespace mongo { + // just use old indexes for geo for now. todo. + typedef BtreeBucket<V0> GeoBtreeBucket; + typedef GeoBtreeBucket::KeyNode GeoKeyNode; + +#define BTREE btree<V0> + #if 0 # define GEODEBUGGING # define GEODEBUG(x) cout << x << endl; @@ -966,14 +972,14 @@ namespace mongo { //// Distance not used //// - GeoPoint( const KeyNode& node ) - : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) { + GeoPoint( const GeoKeyNode& node ) + : _key( node.key.toBson() ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) { } //// Immediate initialization of exact distance //// - GeoPoint( const KeyNode& node , double exactDistance, bool exactWithin ) - : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) { + GeoPoint( const GeoKeyNode& node , double exactDistance, bool exactWithin ) + : _key( node.key.toBson() ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) { } bool operator<( const GeoPoint& other ) const { @@ -995,7 +1001,7 @@ namespace mongo { double _exactDistance; bool _exactWithin; - }; + }; class GeoAccumulator { public: @@ -1009,12 +1015,12 @@ namespace mongo { virtual ~GeoAccumulator() { } - virtual void add( const KeyNode& node ) { + virtual void add( const GeoKeyNode& node ) { GEODEBUG( "\t\t\t\t checking key " << node.key.toString() ) // when looking at other boxes, don't want to look at some key/object pair twice - pair<set<pair<const char*, DiskLoc> >::iterator,bool> seenBefore = _seen.insert( make_pair(node.key.objdata(), node.recordLoc) ); + pair<set<pair<const char*, DiskLoc> >::iterator,bool> seenBefore = _seen.insert( make_pair(node.key.data(), node.recordLoc) ); if ( ! seenBefore.second ) { GEODEBUG( "\t\t\t\t already seen : " << node.key.toString() << " @ " << Point( _g, GeoHash( node.key.firstElement() ) ).toString() << " with " << node.recordLoc.obj()["_id"] ); return; @@ -1037,7 +1043,7 @@ namespace mongo { // matcher MatchDetails details; if ( _matcher.get() ) { - bool good = _matcher->matchesWithSingleKeyIndex( node.key , node.recordLoc , &details ); + bool good = _matcher->matchesWithSingleKeyIndex( node.key.toBson() , node.recordLoc , &details ); if ( details.loadedObject ) _objectsLoaded++; @@ -1063,8 +1069,8 @@ namespace mongo { _found++; } - virtual void addSpecific( const KeyNode& node , double d, bool newDoc ) = 0; - virtual bool checkDistance( const KeyNode& node , double& d ) = 0; + virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) = 0; + virtual bool checkDistance( const GeoKeyNode& node , double& d ) = 0; long long found() const { return _found; @@ -1088,7 +1094,7 @@ namespace mongo { : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0) {} - virtual bool checkDistance( const KeyNode& node, double& d ) { + virtual bool checkDistance( const GeoKeyNode& node, double& d ) { // Always check approximate distance, since it lets us avoid doing // checks of the rest of the object if it succeeds @@ -1109,8 +1115,8 @@ namespace mongo { return good; } - double approxDistance( const KeyNode& node ) { - return approxDistance( GeoHash( node.key.firstElement() ) ); + double approxDistance( const GeoKeyNode& node ) { + return approxDistance( GeoHash( node.key._firstElement() ) ); } double approxDistance( const GeoHash& h ) { @@ -1129,7 +1135,7 @@ namespace mongo { return approxDistance; } - double exactDistances( const KeyNode& node ) { + double exactDistances( const GeoKeyNode& node ) { GEODEBUG( "Finding exact distance for " << node.key.toString() << " and " << node.recordLoc.obj().toString() ); @@ -1141,7 +1147,7 @@ namespace mongo { // Find the particular location we want BSONObj loc; - GeoHash keyHash( node.key.firstElement(), _g->_bits ); + GeoHash keyHash( node.key._firstElement(), _g->_bits ); for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { loc = *i; @@ -1190,7 +1196,7 @@ namespace mongo { return approxD >= _maxDistance - _distError && approxD <= _maxDistance + _distError; } - virtual void addSpecific( const KeyNode& node , double d, bool newDoc ) { + virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d ); @@ -1227,7 +1233,7 @@ namespace mongo { BSONObj key() { if ( bucket.isNull() ) return BSONObj(); - return bucket.btree()->keyNode( pos ).key; + return bucket.BTREE()->keyNode( pos ).key.toBson(); } bool hasPrefix( const GeoHash& hash ) { @@ -1241,7 +1247,7 @@ namespace mongo { if ( bucket.isNull() ) return false; - bucket = bucket.btree()->advance( bucket , pos , direction , "btreelocation" ); + bucket = bucket.BTREE()->advance( bucket , pos , direction , "btreelocation" ); if ( all ) return checkCur( totalFound , all ); @@ -1253,9 +1259,9 @@ namespace mongo { if ( bucket.isNull() ) return false; - if ( bucket.btree()->isUsed(pos) ) { + if ( bucket.BTREE()->isUsed(pos) ) { totalFound++; - all->add( bucket.btree()->keyNode( pos ) ); + all->add( bucket.BTREE()->keyNode( pos ) ); } else { GEODEBUG( "\t\t\t\t not used: " << key() ); @@ -1278,9 +1284,11 @@ namespace mongo { GeoHash start , int & found , GeoAccumulator * hopper ) { + assert( id.version() == 0 ); // see note at top of this file + Ordering ordering = Ordering::make(spec->_order); - min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , + min.bucket = id.head.BTREE()->locate( id , id.head , start.wrap() , ordering , min.pos , min.found , minDiskLoc, -1 ); if (hopper) min.checkCur( found , hopper ); @@ -1288,7 +1296,7 @@ namespace mongo { // TODO: Might be able to avoid doing a full lookup in some cases here, // but would add complexity and we're hitting pretty much the exact same data. // Cannot set this = min in general, however. - max.bucket = id.head.btree()->locate( id , id.head , start.wrap() , + max.bucket = id.head.BTREE()->locate( id , id.head , start.wrap() , ordering , max.pos , max.found , minDiskLoc, 1 ); if (hopper) max.checkCur( found , hopper ); @@ -1327,7 +1335,7 @@ namespace mongo { void exec() { const IndexDetails& id = *_spec->getDetails(); - const BtreeBucket * head = id.head.btree(); + const GeoBtreeBucket * head = id.head.BTREE(); assert( head ); /* * Search algorithm @@ -1455,7 +1463,7 @@ namespace mongo { long long myscanned = 0; BtreeLocation loc; - loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) , + loc.bucket = id.head.BTREE()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) , loc.pos , loc.found , minDiskLoc ); loc.checkCur( _found , _hopper.get() ); while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ) { @@ -1833,7 +1841,7 @@ namespace mongo { // Whether the current box overlaps our search area virtual bool intersectsBox( Box& cur ) = 0; - virtual void addSpecific( const KeyNode& node , double d, bool newDoc ) { + virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { if( ! newDoc ) return; @@ -1943,9 +1951,9 @@ namespace mongo { return _bBox.intersects( cur ); } - virtual bool checkDistance( const KeyNode& node, double& d ) { + virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - GeoHash h( node.key.firstElement(), _g->_bits ); + GeoHash h( node.key._firstElement(), _g->_bits ); // Inexact hash distance checks. double error = 0; @@ -2069,9 +2077,9 @@ namespace mongo { return _want.intersects( cur ); } - virtual bool checkDistance( const KeyNode& node, double& d ) { + virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - GeoHash h( node.key.firstElement() ); + GeoHash h( node.key._firstElement() ); Point approxPt( _g, h ); bool approxInside = _want.inside( approxPt, _fudge ); @@ -2151,9 +2159,9 @@ namespace mongo { return _bounds.intersects( cur ); } - virtual bool checkDistance( const KeyNode& node, double& d ) { + virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - GeoHash h( node.key.firstElement(), _g->_bits ); + GeoHash h( node.key._firstElement(), _g->_bits ); Point p( _g, h ); int in = _poly.contains( p, _g->_error ); @@ -2442,7 +2450,8 @@ namespace mongo { int max = 100000; - BtreeCursor c( d , geoIdx , id , BSONObj() , BSONObj() , true , 1 ); + auto_ptr<BtreeCursor> bc( BtreeCursor::make( d , geoIdx , id , BSONObj() , BSONObj() , true , 1 ) ); + BtreeCursor &c = *bc; while ( c.ok() && max-- ) { GeoHash h( c.currKey().firstElement() ); int len; diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp index 7f278cafa23..a6810639487 100644 --- a/db/geo/haystack.cpp +++ b/db/geo/haystack.cpp @@ -207,15 +207,15 @@ namespace mongo { GEOQUADDEBUG( "KEY: " << key ); set<DiskLoc> thisPass; - BtreeCursor cursor( nsd , idxNo , *getDetails() , key , key , true , 1 ); - while ( cursor.ok() ) { - pair<set<DiskLoc>::iterator, bool> p = thisPass.insert( cursor.currLoc() ); + scoped_ptr<BtreeCursor> cursor( BtreeCursor::make( nsd , idxNo , *getDetails() , key , key , true , 1 ) ); + while ( cursor->ok() ) { + pair<set<DiskLoc>::iterator, bool> p = thisPass.insert( cursor->currLoc() ); if ( p.second ) { - hopper.got( cursor.currLoc() ); - GEOQUADDEBUG( "\t" << cursor.current() ); + hopper.got( cursor->currLoc() ); + GEOQUADDEBUG( "\t" << cursor->current() ); btreeMatches++; } - cursor.advance(); + cursor->advance(); } } diff --git a/db/index.cpp b/db/index.cpp index d7d5b71480a..faec4e4519c 100644 --- a/db/index.cpp +++ b/db/index.cpp @@ -26,6 +26,45 @@ namespace mongo { + /** old (<= v1.8) : 0 + 1 is temp new version but might move it to 2 for the real release TBD + */ + const int DefaultIndexVersionNumber = 0; + + template< class V > + class IndexInterfaceImpl : public IndexInterface { + public: + typedef typename V::KeyOwned KeyOwned; + virtual long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order) { + return thisLoc.btree<V>()->fullValidate(thisLoc, order); + } + virtual DiskLoc findSingle(const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key) const { + return thisLoc.btree<V>()->findSingle(indexdetails,thisLoc,key); + } + virtual bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const { + return thisLoc.btree<V>()->unindex(thisLoc, id, key, recordLoc); + } + virtual int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, + const BSONObj& key, const Ordering &order, bool dupsAllowed, + IndexDetails& idx, bool toplevel = true) const { + return thisLoc.btree<V>()->bt_insert(thisLoc, recordLoc, key, order, dupsAllowed, idx, toplevel); + } + virtual DiskLoc addBucket(const IndexDetails& id) { + return BtreeBucket<V>::addBucket(id); + } + virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, DiskLoc self, const Ordering& ordering) { + const BtreeBucket<V> *h = head.btree<V>(); + for( vector<BSONObj*>::iterator i = addedKeys.begin(); i != addedKeys.end(); i++ ) { + bool dup = h->wouldCreateDup(idx, head, KeyOwned(**i), ordering, self); + uassert( 11001 , "E11001 duplicate key on update", !dup); + } + } + }; + + IndexInterfaceImpl<V0> iii_v0; + IndexInterfaceImpl<V1> iii_v1; + IndexInterface *IndexDetails::iis[] = { &iii_v0, &iii_v1 }; + int removeFromSysIndexes(const char *ns, const char *idxName) { string system_indexes = cc().database()->name + ".system.indexes"; BSONObjBuilder b; @@ -189,7 +228,6 @@ namespace mongo { uassert(10097, "bad table to index name on add index attempt", cc().database()->name == nsToDatabase(sourceNS.c_str())); - BSONObj key = io.getObjectField("key"); uassert(12524, "index key pattern too large", key.objsize() <= 2048); if( !validKeyPattern(key) ) { @@ -260,16 +298,31 @@ namespace mongo { string pluginName = IndexPlugin::findPluginName( key ); IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0; - if ( plugin ) { - fixedIndexObject = plugin->adjustIndexSpec( io ); - } - else if ( io["v"].eoo() ) { - // add "v" if it doesn't exist - // if it does - leave whatever value was there - // this is for testing and replication - BSONObjBuilder b( io.objsize() + 32 ); - b.appendElements( io ); - b.append( "v" , 0 ); + + { + BSONObj o = io; + if ( plugin ) { + o = plugin->adjustIndexSpec(o); + } + BSONObjBuilder b; + int v = DefaultIndexVersionNumber; + if( o.hasElement("_id") ) + b.append( o["_id"] ); + if( !o["v"].eoo() ) { + double vv = o["v"].Number(); + // note (one day) we may be able to fresh build less versions than we can use + // isASupportedIndexVersionNumber() is what we can use + uassert(14803, str::stream() << "this version of mongod cannot build new indexes of version number " << vv, + vv == 0 || vv == 1); + v = (int) vv; + } + // idea is to put things we use a lot earlier + b.append("v", v); + b.append(o["key"]); + if( o["unique"].trueValue() ) + b.appendBool("unique", true); // normalize to bool true in case was int 1 or something... + b.append(o["ns"]); + b.appendElementsUnique(o); fixedIndexObject = b.obj(); } diff --git a/db/index.h b/db/index.h index 8578ed381c9..51416d8f51b 100644 --- a/db/index.h +++ b/db/index.h @@ -25,6 +25,21 @@ namespace mongo { + class IndexInterface { + protected: + virtual ~IndexInterface() { } + public: + virtual long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order) = 0; + virtual DiskLoc findSingle(const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key) const = 0; + virtual bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const = 0; + virtual int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, + const BSONObj& key, const Ordering &order, bool dupsAllowed, + IndexDetails& idx, bool toplevel = true) const = 0; + virtual DiskLoc addBucket(const IndexDetails&) = 0; + virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, + DiskLoc self, const Ordering& ordering) = 0; + }; + /* Details about a particular index. There is one of these effectively for each object in system.namespaces (although this also includes the head pointer, which is not in that collection). @@ -45,7 +60,7 @@ namespace mongo { /* Location of index info object. Format: { name:"nameofindex", ns:"parentnsname", key: {keypattobject} - [, unique: <bool>, background: <bool>] + [, unique: <bool>, background: <bool>, v:<version>] } This object is in the system.indexes collection. Note that since we @@ -86,7 +101,6 @@ namespace mongo { /* true if the specified key is in the index */ bool hasKey(const BSONObj& key); - bool wouldCreateDup(const BSONObj& key, DiskLoc self); // returns name of this index's storage area // database.table.$index @@ -126,6 +140,16 @@ namespace mongo { return io.getStringField("ns"); } + int version() const { + BSONElement e = info.obj()["v"]; + if( e.type() == NumberInt ) + return e._numberInt(); + // should normally be an int. this is for backward compatibility + int v = e.numberInt(); + uassert(14802, "index v field should be Integer type", v == 0); + return v; + } + bool unique() const { BSONObj io = info.obj(); return io["unique"].trueValue() || @@ -148,6 +172,21 @@ namespace mongo { string toString() const { return info.obj().toString(); } + + /** @return true if supported. supported means we can use the index, including adding new keys. + it may not mean we can build the index version in question: we may not maintain building + of indexes in old formats in the future. + */ + static bool isASupportedIndexVersionNumber(int v) { return v == 0 || v == 1; } + + IndexInterface& idxInterface() { + int v = version(); + dassert( isASupportedIndexVersionNumber(v) ); + return *iis[v&1]; + } + + private: + static IndexInterface *iis[]; }; struct IndexChanges { /*on an update*/ @@ -162,10 +201,8 @@ namespace mongo { void dupCheck(IndexDetails& idx, DiskLoc curObjLoc) { if( added.empty() || !idx.unique() ) return; - for( vector<BSONObj*>::iterator i = added.begin(); i != added.end(); i++ ) { - bool dup = idx.wouldCreateDup(**i, curObjLoc); - uassert( 11001 , "E11001 duplicate key on update", !dup); - } + const Ordering ordering = Ordering::make(idx.keyPattern()); + idx.idxInterface().uassertIfDups(idx, added, idx.head, curObjLoc, ordering); // "E11001 duplicate key on update" } }; diff --git a/db/indexkey.cpp b/db/indexkey.cpp index 34f30fa45ab..7dc52730070 100644 --- a/db/indexkey.cpp +++ b/db/indexkey.cpp @@ -118,7 +118,7 @@ namespace mongo { void IndexSpec::getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { - if ( _indexType.get() ) { + if ( _indexType.get() ) { //plugin (eg geo) _indexType->getKeys( obj , keys ); return; } diff --git a/db/key.cpp b/db/key.cpp new file mode 100644 index 00000000000..0d8696a17a2 --- /dev/null +++ b/db/key.cpp @@ -0,0 +1,370 @@ +// @file key.cpp + +/** +* Copyright (C) 2011 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "key.h" + +namespace mongo { + + // [ISKEY][HASMORE][x][y][canontype_4bits] + + /* warning: don't do BinData here unless you are careful with Geo, as geo + uses bindata. you would want to perf test it on the change as + the geo code doesn't use Key's but rather BSONObj's for its + key manipulation. + */ + + enum CanonicalsEtc { + cminkey=1, + cnull=2, + cdouble=4, + cstring=6, + coid=8, + cfalse=10, + ctrue=11, + cdate=12, + cmaxkey=14, + cCANONTYPEMASK = 0xf, + cY = 0x10, + cint = cY | cdouble, + cX = 0x20, + clong = cX | cdouble, + cHASMORE = 0x40, + cNOTUSED = 0x80 + }; + + /** object cannot be represented in compact format. so store in traditional bson format + with a leading sentinel byte IsBSON to indicate it's in that format. + + Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here + so that we don't have to do an extra malloc. + */ + void KeyV1Owned::traditional(BufBuilder& b, const BSONObj& obj) { + b.reset(); + b.appendUChar(IsBSON); + b.appendBuf(obj.objdata(), obj.objsize()); + _toFree = b.buf(); + _keyData = (const unsigned char *) _toFree; + b.decouple(); + } + + // fromBSON to Key format + KeyV1Owned::KeyV1Owned(const BSONObj& obj) { + BufBuilder b(512); + BSONObj::iterator i(obj); + assert( i.more() ); + unsigned char bits = 0; + while( 1 ) { + BSONElement e = i.next(); + if( i.more() ) + bits |= cHASMORE; + switch( e.type() ) { + case MinKey: + b.appendUChar(cminkey|bits); + break; + case jstNULL: + b.appendUChar(cnull|bits); + break; + case MaxKey: + b.appendUChar(cmaxkey|bits); + break; + case Bool: + b.appendUChar( (e.boolean()?ctrue:cfalse) | bits ); + break; + case jstOID: + b.appendUChar(coid|bits); + b.appendBuf(&e.__oid(), sizeof(OID)); + break; + case Date: + b.appendUChar(cdate|bits); + b.appendStruct(e.date()); + break; + case String: + { + b.appendUChar(cstring|bits); + // should we do e.valuestrsize()-1? last char currently will always be null. + unsigned x = (unsigned) e.valuestrsize(); + if( x > 255 ) { + traditional(b, obj); + return; + } + b.appendUChar(x); + b.appendBuf(e.valuestr(), x); + break; + } + case NumberInt: + b.appendUChar(cint|bits); + b.appendNum((double) e._numberInt()); + break; + case NumberLong: + { + long long n = e._numberLong(); + double d = (double) n; + if( d != n ) { + traditional(b, obj); + return; + } + b.appendUChar(clong|bits); + b.appendNum(d); + break; + } + case NumberDouble: + { + double d = e._numberDouble(); + bool nan = !( d <= numeric_limits< double >::max() && + d >= -numeric_limits< double >::max() ); + if( !nan ) { + b.appendUChar(cdouble|bits); + b.appendNum(d); + break; + } + // else fall through and return a traditional BSON obj so our compressed keys need not check for nan + } + default: + // if other types involved, store as traditional BSON + traditional(b, obj); + return; + } + if( !i.more() ) + break; + bits = 0; + } + _toFree = b.buf(); + _keyData = (const unsigned char *) _toFree; + dassert( b.len() == dataSize() ); // check datasize method is correct + dassert( (*_keyData & cNOTUSED) == 0 ); + b.decouple(); + } + + BSONObj KeyV1::toBson() const { + if( !isCompactFormat() ) + return bson(); + + BSONObjBuilder b(512); + const unsigned char *p = _keyData; + while( 1 ) { + unsigned bits = *p++; + + switch( bits & 0x3f ) { + case cminkey: b.appendMinKey(""); break; + case cnull: b.appendNull(""); break; + case cfalse: b.appendBool("", false); break; + case ctrue: b.appendBool("", true); break; + case cmaxkey: + b.appendMaxKey(""); + break; + case cstring: + { + unsigned sz = *p++; + b.append("", (const char *) p, sz); + p += sz; + break; + } + case coid: + b.appendOID("", (OID *) p); + p += sizeof(OID); + break; + case cdate: + b.appendDate("", (Date_t&) *p); + p += 8; + break; + case cdouble: + b.append("", (double&) *p); + p += sizeof(double); + break; + case cint: + b.append("", (int) ((double&) *p)); + p += sizeof(double); + break; + case clong: + b.append("", (long long) ((double&) *p)); + p += sizeof(double); + break; + default: + assert(false); + } + + if( (bits & cHASMORE) == 0 ) + break; + } + return b.obj(); + } + + static int compare(const unsigned char *&l, const unsigned char *&r) { + int lt = (*l & cCANONTYPEMASK); + int rt = (*r & cCANONTYPEMASK); + int x = lt - rt; + if( x ) + return x; + + l++; r++; + + // same type + switch( lt ) { + case cdouble: + { + double L = *((double *) l); + double R = *((double *) r); + if( L < R ) + return -1; + if( L > R ) + return 1; + l += 8; r += 8; + break; + } + case cstring: + { + l++; r++; // skip the size byte + // todo: see https://jira.mongodb.org/browse/SERVER-1300 + int res = strcmp((const char *) l, (const char *) r); + if( res ) + return res; + unsigned sz = l[-1]; + l += sz; r += sz; + break; + } + case coid: + { + int res = memcmp(l, r, sizeof(OID)); + if( res ) + return res; + l += 12; r += 12; + break; + } + case cdate: + { + long long L = *((long long *) l); + long long R = *((long long *) r); + if( L < R ) + return -1; + if( L > R ) + return 1; + l += 8; r += 8; + break; + } + default: + // all the others are a match -- e.g. null == null + ; + } + + return 0; + } + + // at least one of this and right are traditional BSON format + int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const { + BSONObj L = toBson(); + BSONObj R = right.toBson(); + return L.woCompare(R, order, /*considerfieldname*/false); + } + + int KeyV1::woCompare(const KeyV1& right, const Ordering &order) const { + const unsigned char *l = _keyData; + const unsigned char *r = right._keyData; + + if( (*l|*r) == IsBSON ) // only can do this if cNOTUSED maintained + return compareHybrid(right, order); + + unsigned mask = 1; + while( 1 ) { + char lval = *l; + char rval = *r; + { + int x = compare(l, r); // updates l and r pointers + if( x ) { + if( order.descending(mask) ) + x = -x; + return x; + } + } + + { + int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE)); + if( x ) + return x; + if( (lval & cHASMORE) == 0 ) + break; + } + + mask <<= 1; + } + + return 0; + } + + bool KeyV1::woEqual(const KeyV1& right) const { + const unsigned char *l = _keyData; + const unsigned char *r = right._keyData; + + if( (*l|*r) == IsBSON ) { + return toBson().woEqual(right.toBson()); + } + + while( 1 ) { + char lval = *l; + char rval = *r; + if( compare(l, r) ) // updates l and r pointers + return false; + if( (lval&cHASMORE)^(rval&cHASMORE) ) + return false; + if( (lval&cHASMORE) == 0 ) + break; + } + + return true; + } + + static unsigned sizes[] = { + 0, + 1, //cminkey=1, + 1, //cnull=2, + 0, + 9, //cdouble=4, + 0, + 0, //cstring=6, + 0, + 13, //coid=8, + 0, + 1, //cfalse=10, + 1, //ctrue=11, + 9, //cdate=12, + 0, + 1, //cmaxkey=14, + 0 + }; + + int KeyV1::dataSize() const { + const unsigned char *p = _keyData; + if( !isCompactFormat() ) { + return bson().objsize() + 1; + } + + bool more; + do { + unsigned type = *p & cCANONTYPEMASK; + unsigned z = sizes[type]; + if( z == 0 ) { + assert( type == cstring ); + z = ((unsigned) p[1]) + 2; + } + more = (*p & cHASMORE) != 0; + p += z; + } while( more ); + return p - _keyData; + } + +} diff --git a/db/key.h b/db/key.h new file mode 100644 index 00000000000..99aacc5fc70 --- /dev/null +++ b/db/key.h @@ -0,0 +1,101 @@ +// @file key.h + +/** +* Copyright (C) 2011 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "jsobj.h" + +namespace mongo { + + /** Key class for precomputing a small format index key that is denser than a traditional BSONObj. + + KeyBson is a legacy wrapper implementation for old BSONObj style keys for v:0 indexes. + + KeyV1 is the new implementation. + */ + class KeyBson { + public: + KeyBson() { } + explicit KeyBson(const char *keyData) : _o(keyData) { } + explicit KeyBson(const BSONObj& obj) : _o(obj) { } + int woCompare(const KeyBson& r, const Ordering &o) const { return _o.woCompare(r._o, o); } + bool woEqual(const KeyBson& r) const { return _o.woEqual(r._o); } + BSONObj toBson() const { return _o; } + string toString() const { return _o.toString(); } + int dataSize() const { return _o.objsize(); } + const char * data() const { return _o.objdata(); } + BSONElement _firstElement() const { return _o.firstElement(); } + bool isCompactFormat() const { return false; } + private: + BSONObj _o; + }; + + class KeyV1 { + public: + KeyV1() { _keyData = 0; } + ~KeyV1() { DEV _keyData = (const unsigned char *) 1; } + + /** @param keyData can be a buffer containing data in either BSON format, OR in KeyV1 format. + when BSON, we are just a wrapper + */ + explicit KeyV1(const char *keyData) : _keyData((unsigned char *) keyData) { } + int woCompare(const KeyV1& r, const Ordering &o) const; + bool woEqual(const KeyV1& r) const; + BSONObj toBson() const; + string toString() const { return toBson().toString(); } + + /** get the key data we want to store in the btree bucket */ + const char * data() const { return (const char *) _keyData; } + + /** @return size of data() */ + int dataSize() const; + + /** only used by geo, which always has bson keys */ + BSONElement _firstElement() const { return bson().firstElement(); } + bool isCompactFormat() const { return *_keyData != IsBSON; } + protected: + enum { IsBSON = 0xff }; + const unsigned char *_keyData; + BSONObj bson() const { + dassert( !isCompactFormat() ); + return BSONObj((const char *) _keyData+1); + } + private: + int compareHybrid(const KeyV1& right, const Ordering& order) const; + }; + + class KeyV1Owned : public KeyV1 { + public: + /** @obj a BSON object to be translated to KeyV1 format. If the object isn't + representable in KeyV1 format (which happens, intentionally, at times) + it will stay as bson herein. + */ + KeyV1Owned(const BSONObj& obj); + ~KeyV1Owned() { free((void*) _toFree); } + private: + KeyV1Owned(const KeyV1Owned&); //not copyable + const char *_toFree; + void traditional(BufBuilder& b, const BSONObj& obj); // store as traditional bson not as compact format + }; + + //typedef KeyBson Key; + //typedef KeyBson KeyOwned; + //typedef KeyV1 Key; + //typedef KeyV1Owned KeyOwned; + +}; diff --git a/db/namespace-inl.h b/db/namespace-inl.h index 374a94e2492..0a5f559e4b6 100644 --- a/db/namespace-inl.h +++ b/db/namespace-inl.h @@ -71,8 +71,10 @@ namespace mongo { } inline IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected ) { - if( idxNo < NIndexesBase ) - return _indexes[idxNo]; + if( idxNo < NIndexesBase ) { + IndexDetails& id = _indexes[idxNo]; + return id; + } Extra *e = extra(); if ( ! e ) { if ( missingExpected ) diff --git a/db/namespace.cpp b/db/namespace.cpp index 028ddf250cb..e8fb88430b6 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -714,7 +714,7 @@ namespace mongo { indexDetails.info = newIndexSpecLoc; string newIndexNs = indexDetails.indexNamespace(); - BtreeBucket::renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); + renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true ); } } diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 9ad7099934e..fd9e4c2b2a2 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -32,6 +32,7 @@ _ disallow system* manipulations from the database. #include "../util/processinfo.h" #include "../util/file.h" #include "btree.h" +#include "btreebuilder.h" #include <algorithm> #include <list> #include "query.h" @@ -46,6 +47,9 @@ _ disallow system* manipulations from the database. namespace mongo { + BOOST_STATIC_ASSERT( sizeof(Extent)-4 == 48+128 ); + BOOST_STATIC_ASSERT( sizeof(DataFileHeader)-4 == 8192 ); + bool inDBRepair = false; struct doingRepair { doingRepair() { @@ -327,13 +331,13 @@ namespace mongo { } } - void MongoDataFile::badOfs2(int ofs) const { + NOINLINE_DECL void MongoDataFile::badOfs2(int ofs) const { stringstream ss; ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; uasserted(13441, ss.str()); } - void MongoDataFile::badOfs(int ofs) const { + NOINLINE_DECL void MongoDataFile::badOfs(int ofs) const { stringstream ss; ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; uasserted(13440, ss.str()); @@ -436,13 +440,20 @@ namespace mongo { } Extent* MongoDataFile::createExtent(const char *ns, int approxSize, bool newCapped, int loops) { + { + // make sizes align with VM page size + int newSize = (approxSize + 0xfff) & 0xfffff000; + assert( newSize >= 0 ); + if( newSize < Extent::maxSize() ) + approxSize = newSize; + } massert( 10357 , "shutdown in progress", ! inShutdown() ); massert( 10358 , "bad new extent size", approxSize >= Extent::minSize() && approxSize <= Extent::maxSize() ); massert( 10359 , "header==0 on new extent: 32 bit mmap space exceeded?", header() ); // null if file open failed - int ExtentSize = approxSize <= header()->unusedLength ? approxSize : header()->unusedLength; + int ExtentSize = min(header()->unusedLength, approxSize); DiskLoc loc; if ( ExtentSize < Extent::minSize() ) { - /* not there could be a lot of looping here is db just started and + /* note there could be a lot of looping here is db just started and no files are open yet. we might want to do something about that. */ if ( loops > 8 ) { assert( loops < 10000 ); @@ -453,12 +464,12 @@ namespace mongo { } int offset = header()->unused.getOfs(); - DataFileHeader *h = getDur().writing(header()); - h->unused.set( fileNo, offset + ExtentSize ); - h->unusedLength -= ExtentSize; + DataFileHeader *h = header(); + h->unused.writing().set( fileNo, offset + ExtentSize ); + getDur().writingInt(h->unusedLength) = h->unusedLength - ExtentSize; loc.set(fileNo, offset); Extent *e = _getExtent(loc); - DiskLoc emptyLoc = getDur().writing(e)->init(ns, ExtentSize, fileNo, offset); + DiskLoc emptyLoc = getDur().writing(e)->init(ns, ExtentSize, fileNo, offset, newCapped); addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped); @@ -542,7 +553,7 @@ namespace mongo { // use it OCCASIONALLY if( n > 512 ) log() << "warning: newExtent " << n << " scanned\n"; - DiskLoc emptyLoc = e->reuse(ns); + DiskLoc emptyLoc = e->reuse(ns, capped); addNewExtentToNamespace(ns, e, e->myLoc, emptyLoc, capped); return e; } @@ -561,21 +572,36 @@ namespace mongo { lastRecord.Null(); } - DiskLoc Extent::reuse(const char *nsname) { - return getDur().writing(this)->_reuse(nsname); + DiskLoc Extent::reuse(const char *nsname, bool capped) { + return getDur().writing(this)->_reuse(nsname, capped); } - DiskLoc Extent::_reuse(const char *nsname) { - log(3) << "reset extent was:" << nsDiagnostic.toString() << " now:" << nsname << '\n'; + + void getEmptyLoc(const char *ns, const DiskLoc extentLoc, int extentLength, bool capped, /*out*/DiskLoc& emptyLoc, /*out*/int& delRecLength) { + emptyLoc = extentLoc; + emptyLoc.inc( Extent::HeaderSize() ); + delRecLength = extentLength - Extent::HeaderSize(); + if( delRecLength >= 32*1024 && str::contains(ns, '$') && !capped ) { + // probably an index. so skip forward to keep its records page aligned + int& ofs = emptyLoc.GETOFS(); + int newOfs = (ofs + 0xfff) & ~0xfff; + delRecLength -= (newOfs-ofs); + dassert( delRecLength > 0 ); + ofs = newOfs; + } + } + + DiskLoc Extent::_reuse(const char *nsname, bool capped) { + LOG(3) << "reset extent was:" << nsDiagnostic.toString() << " now:" << nsname << '\n'; massert( 10360 , "Extent::reset bad magic value", magic == 0x41424344 ); nsDiagnostic = nsname; markEmpty(); - DiskLoc emptyLoc = myLoc; - emptyLoc.inc( (int) (_extentData-(char*)this) ); + DiskLoc emptyLoc; + int delRecLength; + getEmptyLoc(nsname, myLoc, length, capped, emptyLoc, delRecLength); - int delRecLength = length - (_extentData - (char *) this); - - DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength);//(DeletedRecord *) getRecord(emptyLoc); + // todo: some dup code here and below in Extent::init + DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength); empty = getDur().writing(empty); empty->lengthWithHeaders = delRecLength; empty->extentOfs = myLoc.getOfs(); @@ -585,7 +611,7 @@ namespace mongo { } /* assumes already zeroed -- insufficient for block 'reuse' perhaps */ - DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset) { + DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset, bool capped) { magic = 0x41424344; myLoc.set(_fileNo, _offset); xnext.Null(); @@ -595,12 +621,12 @@ namespace mongo { firstRecord.Null(); lastRecord.Null(); - DiskLoc emptyLoc = myLoc; - emptyLoc.inc( (int) (_extentData-(char*)this) ); + DiskLoc emptyLoc; + int delRecLength; + getEmptyLoc(nsname, myLoc, _length, capped, emptyLoc, delRecLength); - int l = _length - (_extentData - (char *) this); - DeletedRecord *empty = getDur().writing( DataFileMgr::makeDeletedRecord(emptyLoc, l) ); - empty->lengthWithHeaders = l; + DeletedRecord *empty = getDur().writing( DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength) ); + empty->lengthWithHeaders = delRecLength; empty->extentOfs = myLoc.getOfs(); return emptyLoc; } @@ -857,16 +883,17 @@ namespace mongo { static void _unindexRecord(IndexDetails& id, BSONObj& obj, const DiskLoc& dl, bool logMissing = true) { BSONObjSetDefaultOrder keys; id.getKeysFromObject(obj, keys); + IndexInterface& ii = id.idxInterface(); for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { BSONObj j = *i; if ( otherTraceLevel >= 5 ) { - out() << "_unindexRecord() " << obj.toString(); - out() << "\n unindex:" << j.toString() << endl; + out() << "_unindexRecord() " << obj.toString() << endl; + out() << " unindex:" << j.toString() << endl; } bool ok = false; try { - ok = id.head.btree()->unindex(id.head, id, j, dl); + ok = ii.unindex(id.head, id, j, dl); } catch (AssertionException& e) { problem() << "Assertion failure: _unindex failed " << id.indexNamespace() << endl; @@ -1028,9 +1055,10 @@ namespace mongo { int z = d->nIndexesBeingBuilt(); for ( int x = 0; x < z; x++ ) { IndexDetails& idx = d->idx(x); + IndexInterface& ii = idx.idxInterface(); for ( unsigned i = 0; i < changes[x].removed.size(); i++ ) { try { - bool found = idx.head.btree()->unindex(idx.head, idx, *changes[x].removed[i], dl); + bool found = ii.unindex(idx.head, idx, *changes[x].removed[i], dl); if ( ! found ) { RARELY warning() << "ns: " << ns << " couldn't unindex key: " << *changes[x].removed[i] << " for doc: " << objOld["_id"] << endl; @@ -1048,7 +1076,7 @@ namespace mongo { for ( unsigned i = 0; i < changes[x].added.size(); i++ ) { try { /* we did the dupCheck() above. so we don't have to worry about it here. */ - idx.head.btree()->bt_insert( + ii.bt_insert( idx.head, dl, *changes[x].added[i], ordering, /*dupsAllowed*/true, idx); } @@ -1094,7 +1122,10 @@ namespace mongo { IndexDetails& idx = d->idx(idxNo); BSONObjSetDefaultOrder keys; idx.getKeysFromObject(obj, keys); + if( keys.empty() ) + return; BSONObj order = idx.keyPattern(); + IndexInterface& ii = idx.idxInterface(); Ordering ordering = Ordering::make(order); int n = 0; for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { @@ -1103,8 +1134,7 @@ namespace mongo { } assert( !recordLoc.isNull() ); try { - idx.head.btree()->bt_insert(idx.head, recordLoc, - *i, ordering, dupsAllowed, idx); + ii.bt_insert(idx.head, recordLoc, *i, ordering, dupsAllowed, idx); } catch (AssertionException& e) { if( e.getCode() == 10287 && idxNo == d->nIndexes ) { @@ -1146,6 +1176,52 @@ namespace mongo { SortPhaseOne *precalced = 0; + template< class V > + void buildBottomUpPhases2And3(bool dupsAllowed, IndexDetails& idx, BSONObjExternalSorter& sorter, + bool dropDups, list<DiskLoc> &dupsToDrop, CurOp * op, SortPhaseOne *phase1, ProgressMeterHolder &pm, + Timer& t + ) + { + BtreeBuilder<V> btBuilder(dupsAllowed, idx); + BSONObj keyLast; + auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator(); + assert( pm == op->setMessage( "index: (2/3) btree bottom up" , phase1->nkeys , 10 ) ); + while( i->more() ) { + RARELY killCurrentOp.checkForInterrupt(); + BSONObjExternalSorter::Data d = i->next(); + + try { + btBuilder.addKey(d.first, d.second); + } + catch( AssertionException& e ) { + if ( dupsAllowed ) { + // unknow exception?? + throw; + } + + if( e.interrupted() ) + throw; + + if ( ! dropDups ) + throw; + + /* we could queue these on disk, but normally there are very few dups, so instead we + keep in ram and have a limit. + */ + dupsToDrop.push_back(d.second); + uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); + } + pm.hit(); + } + pm.finished(); + op->setMessage( "index: (3/3) btree-middle" ); + log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl; + btBuilder.commit(); + if ( btBuilder.getn() != phase1->nkeys && ! dropDups ) { + warning() << "not all entries were added to the index, probably some keys were too large" << endl; + } + } + // throws DBException unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { CurOp * op = cc().curop(); @@ -1200,48 +1276,12 @@ namespace mongo { list<DiskLoc> dupsToDrop; /* build index --- */ - { - BtreeBuilder btBuilder(dupsAllowed, idx); - BSONObj keyLast; - auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator(); - assert( pm == op->setMessage( "index: (2/3) btree bottom up" , phase1->nkeys , 10 ) ); - while( i->more() ) { - RARELY killCurrentOp.checkForInterrupt(); - BSONObjExternalSorter::Data d = i->next(); - - try { - btBuilder.addKey(d.first, d.second); - } - catch( AssertionException& e ) { - if ( dupsAllowed ) { - // unknow exception?? - throw; - } - - if( e.interrupted() ) - throw; - - if ( ! dropDups ) - throw; - - /* we could queue these on disk, but normally there are very few dups, so instead we - keep in ram and have a limit. - */ - // so we get error # script checking: uasserted(14046, "asdf"); - raiseError(14046, "dups were encountered and dropped"); - dupsToDrop.push_back(d.second); - uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); - } - pm.hit(); - } - pm.finished(); - op->setMessage( "index: (3/3) btree-middle" ); - log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl; - btBuilder.commit(); - if ( btBuilder.getn() != phase1->nkeys && ! dropDups ) { - warning() << "not all entries were added to the index, probably some keys were too large" << endl; - } - } + if( idx.version() == 0 ) + buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); + else if( idx.version() == 1 ) + buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); + else + assert(false); log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; @@ -1341,7 +1381,7 @@ namespace mongo { prep(ns.c_str(), d); assert( idxNo == d->nIndexes ); try { - idx.head.writing() = BtreeBucket::addBucket(idx); + idx.head.writing() = idx.idxInterface().addBucket(idx); n = addExistingToIndex(ns.c_str(), d, idx, idxNo); } catch(...) { @@ -1513,9 +1553,12 @@ namespace mongo { BSONObjSetDefaultOrder keys; idx.getKeysFromObject(obj, keys); BSONObj order = idx.keyPattern(); + IndexInterface& ii = idx.idxInterface(); for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { + // WARNING: findSingle may not be compound index safe. this may need to change. see notes in + // findSingle code. uassert( 12582, "duplicate key insert for unique index of capped collection", - idx.head.btree()->findSingle(idx, idx.head, *i ).isNull() ); + ii.findSingle(idx, idx.head, *i ).isNull() ); } } } @@ -1608,7 +1651,13 @@ namespace mongo { also if this is an addIndex, those checks should happen before this! */ // This may create first file in the database. - cc().database()->allocExtent(ns, Extent::initialSize(len), false); + int ies = Extent::initialSize(len); + if( str::contains(ns, '$') && len + Record::HeaderSize >= BtreeData_V1::BucketSize - 256 && len + Record::HeaderSize <= BtreeData_V1::BucketSize + 256 ) { + // probably an index. so we pick a value here for the first extent instead of using initialExtentSize() which is more + // for user collections. TODO: we could look at the # of records in the parent collection to be smarter here. + ies = (32+4) * 1024; + } + cc().database()->allocExtent(ns, ies, false); d = nsdetails(ns); if ( !god ) ensureIdIndexForNewNs(ns); @@ -1622,8 +1671,11 @@ namespace mongo { if ( addIndex ) { assert( obuf ); BSONObj io((const char *) obuf); - if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex, fixedIndexObject ) ) + if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex, fixedIndexObject ) ) { + // prepare creates _id itself, or this indicates to fail the build silently (such + // as if index already exists) return DiskLoc(); + } if ( ! fixedIndexObject.isEmpty() ) { obuf = fixedIndexObject.objdata(); diff --git a/db/pdfile.h b/db/pdfile.h index 3421e345a46..fdfb51f3027 100644 --- a/db/pdfile.h +++ b/db/pdfile.h @@ -260,10 +260,10 @@ namespace mongo { Returns a DeletedRecord location which is the data in the extent ready for us. Caller will need to add that to the freelist structure in namespacedetail. */ - DiskLoc init(const char *nsname, int _length, int _fileNo, int _offset); + DiskLoc init(const char *nsname, int _length, int _fileNo, int _offset, bool capped); /* like init(), but for a reuse case */ - DiskLoc reuse(const char *nsname); + DiskLoc reuse(const char *nsname, bool newUseIsAsCapped); bool isOk() const { return magic == 0x41424344; } void assertOk() const { assert(isOk()); } @@ -289,8 +289,8 @@ namespace mongo { */ static int followupSize(int len, int lastExtentLen); - /** - * @param len lengt of record we need + /** get a suggested size for the first extent in a namespace + * @param len length of record we need to insert */ static int initialSize(int len); @@ -306,7 +306,7 @@ namespace mongo { /** caller must declare write intent first */ void markEmpty(); private: - DiskLoc _reuse(const char *nsname); // recycle an extent and reuse it for a different ns + DiskLoc _reuse(const char *nsname, bool newUseIsAsCapped); // recycle an extent and reuse it for a different ns }; /* a datafile - i.e. the "dbname.<#>" files : @@ -331,7 +331,7 @@ namespace mongo { int unusedLength; char reserved[8192 - 4*4 - 8]; - char data[4]; + char data[4]; // first extent starts here enum { HeaderSize = 8192 }; @@ -436,9 +436,12 @@ namespace mongo { inline Extent* DiskLoc::ext() const { return DataFileMgr::getExtent(*this); } - inline const BtreeBucket* DiskLoc::btree() const { + + template< class V > + inline + const BtreeBucket<V> * DiskLoc::btree() const { assert( _a != -1 ); - return (const BtreeBucket *) rec()->data; + return (const BtreeBucket<V> *) rec()->data; } } // namespace mongo diff --git a/db/query.cpp b/db/query.cpp index b34b4f32971..00b706ccc45 100644 --- a/db/query.cpp +++ b/db/query.cpp @@ -468,7 +468,7 @@ namespace mongo { _nscanned = _c->nscanned(); if ( _bc ) { if ( _firstMatch.isEmpty() ) { - _firstMatch = _bc->currKeyNode().key.copy(); + _firstMatch = _bc->currKey().getOwned(); // if not match if ( _query.woCompare( _firstMatch, BSONObj(), false ) ) { setComplete(); @@ -477,7 +477,7 @@ namespace mongo { _gotOne(); } else { - if ( ! _firstMatch.woEqual( _bc->currKeyNode().key ) ) { + if ( ! _firstMatch.woEqual( _bc->currKey() ) ) { setComplete(); return; } diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index 15c7807c9bf..8dc38c7c56b 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -209,13 +209,13 @@ doneCheckOrder: if ( _startOrEndSpec ) { // we are sure to spec _endKeyInclusive - return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) ); + return shared_ptr<Cursor>( BtreeCursor::make( _d, _idxNo, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) ); } else if ( _index->getSpec().getType() ) { - return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) ); + return shared_ptr<Cursor>( BtreeCursor::make( _d, _idxNo, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) ); } else { - return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv, _direction >= 0 ? 1 : -1 ) ); + return shared_ptr<Cursor>( BtreeCursor::make( _d, _idxNo, *_index, _frv, _direction >= 0 ? 1 : -1 ) ); } } diff --git a/db/update.cpp b/db/update.cpp index 97c0d9c7cb9..d190e7e7d0e 100644 --- a/db/update.cpp +++ b/db/update.cpp @@ -966,8 +966,8 @@ namespace mongo { DiskLoc loc; { IndexDetails& i = d->idx(idIdxNo); - BSONObj key = i.getKeyFromQuery( patternOrig ); - loc = i.head.btree()->findSingle(i, i.head, key); + BSONObj key = i.getKeyFromQuery( patternOrig ); + loc = i.idxInterface().findSingle(i, i.head, key); if( loc.isNull() ) { // no upsert support in _updateById yet, so we are done. return UpdateResult(0, 0, 0); diff --git a/dbtests/btreetests.cpp b/dbtests/btreetests.cpp index 7c17c3d7195..44c5474bb63 100644 --- a/dbtests/btreetests.cpp +++ b/dbtests/btreetests.cpp @@ -24,1688 +24,26 @@ #include "dbtests.h" -namespace BtreeTests { - - const char* ns() { - return "unittests.btreetests"; - } - - // dummy, valid record loc - const DiskLoc recordLoc() { - return DiskLoc( 0, 2 ); - } - - class Ensure { - public: - Ensure() { - _c.ensureIndex( ns(), BSON( "a" << 1 ), false, "testIndex" ); - } - ~Ensure() { - _c.dropIndexes( ns() ); - } - private: - DBDirectClient _c; - }; - - class Base : public Ensure { - public: - Base() : - _context( ns() ) { - { - bool f = false; - assert( f = true ); - massert( 10402 , "assert is misdefined", f); - } - } - virtual ~Base() {} - static string bigNumString( long long n, int len = 800 ) { - char sub[17]; - sprintf( sub, "%.16llx", n ); - string val( len, ' ' ); - for( int i = 0; i < len; ++i ) { - val[ i ] = sub[ i % 16 ]; - } - return val; - } - protected: - const BtreeBucket* bt() { - return id().head.btree(); - } - DiskLoc dl() { - return id().head; - } - IndexDetails& id() { - NamespaceDetails *nsd = nsdetails( ns() ); - assert( nsd ); - return nsd->idx( 1 ); - } - void checkValid( int nKeys ) { - ASSERT( bt() ); - ASSERT( bt()->isHead() ); - bt()->assertValid( order(), true ); - ASSERT_EQUALS( nKeys, bt()->fullValidate( dl(), order(), 0, true ) ); - } - void dump() { - bt()->dumpTree( dl(), order() ); - } - void insert( BSONObj &key ) { - bt()->bt_insert( dl(), recordLoc(), key, Ordering::make(order()), true, id(), true ); - getDur().commitIfNeeded(); - } - bool unindex( BSONObj &key ) { - getDur().commitIfNeeded(); - return bt()->unindex( dl(), id(), key, recordLoc() ); - } - static BSONObj simpleKey( char c, int n = 1 ) { - BSONObjBuilder builder; - string val( n, c ); - builder.append( "a", val ); - return builder.obj(); - } - void locate( BSONObj &key, int expectedPos, - bool expectedFound, const DiskLoc &expectedLocation, - int direction = 1 ) { - int pos; - bool found; - DiskLoc location = - bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction ); - ASSERT_EQUALS( expectedFound, found ); - ASSERT( location == expectedLocation ); - ASSERT_EQUALS( expectedPos, pos ); - } - bool present( BSONObj &key, int direction ) { - int pos; - bool found; - bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction ); - return found; - } - BSONObj order() { - return id().keyPattern(); - } - const BtreeBucket *child( const BtreeBucket *b, int i ) { - assert( i <= b->nKeys() ); - DiskLoc d; - if ( i == b->nKeys() ) { - d = b->getNextChild(); - } - else { - d = const_cast< DiskLoc& >( b->keyNode( i ).prevChildBucket ); - } - assert( !d.isNull() ); - return d.btree(); - } - void checkKey( char i ) { - stringstream ss; - ss << i; - checkKey( ss.str() ); - } - void checkKey( const string &k ) { - BSONObj key = BSON( "" << k ); -// log() << "key: " << key << endl; - ASSERT( present( key, 1 ) ); - ASSERT( present( key, -1 ) ); - } - private: - dblock lk_; - Client::Context _context; - }; - - class Create : public Base { - public: - void run() { - checkValid( 0 ); - } - }; - - class SimpleInsertDelete : public Base { - public: - void run() { - BSONObj key = simpleKey( 'z' ); - insert( key ); - - checkValid( 1 ); - locate( key, 0, true, dl() ); - - unindex( key ); - - checkValid( 0 ); - locate( key, 0, false, DiskLoc() ); - } - }; - - class SplitUnevenBucketBase : public Base { - public: - virtual ~SplitUnevenBucketBase() {} - void run() { - for ( int i = 0; i < 10; ++i ) { - BSONObj shortKey = simpleKey( shortToken( i ), 1 ); - insert( shortKey ); - BSONObj longKey = simpleKey( longToken( i ), 800 ); - insert( longKey ); - } - checkValid( 20 ); - ASSERT_EQUALS( 1, bt()->nKeys() ); - checkSplit(); - } - protected: - virtual char shortToken( int i ) const = 0; - virtual char longToken( int i ) const = 0; - static char leftToken( int i ) { - return 'a' + i; - } - static char rightToken( int i ) { - return 'z' - i; - } - virtual void checkSplit() = 0; - }; - - class SplitRightHeavyBucket : public SplitUnevenBucketBase { - private: - virtual char shortToken( int i ) const { - return leftToken( i ); - } - virtual char longToken( int i ) const { - return rightToken( i ); - } - virtual void checkSplit() { - ASSERT_EQUALS( 15, child( bt(), 0 )->nKeys() ); - ASSERT_EQUALS( 4, child( bt(), 1 )->nKeys() ); - } - }; - - class SplitLeftHeavyBucket : public SplitUnevenBucketBase { - private: - virtual char shortToken( int i ) const { - return rightToken( i ); - } - virtual char longToken( int i ) const { - return leftToken( i ); - } - virtual void checkSplit() { - ASSERT_EQUALS( 4, child( bt(), 0 )->nKeys() ); - ASSERT_EQUALS( 15, child( bt(), 1 )->nKeys() ); - } - }; - - class MissingLocate : public Base { - public: - void run() { - for ( int i = 0; i < 3; ++i ) { - BSONObj k = simpleKey( 'b' + 2 * i ); - insert( k ); - } - - locate( 1, 'a', 'b', dl() ); - locate( 1, 'c', 'd', dl() ); - locate( 1, 'e', 'f', dl() ); - locate( 1, 'g', 'g' + 1, DiskLoc() ); // of course, 'h' isn't in the index. - - // old behavior - // locate( -1, 'a', 'b', dl() ); - // locate( -1, 'c', 'd', dl() ); - // locate( -1, 'e', 'f', dl() ); - // locate( -1, 'g', 'f', dl() ); - - locate( -1, 'a', 'a' - 1, DiskLoc() ); // of course, 'a' - 1 isn't in the index - locate( -1, 'c', 'b', dl() ); - locate( -1, 'e', 'd', dl() ); - locate( -1, 'g', 'f', dl() ); - } - private: - void locate( int direction, char token, char expectedMatch, - DiskLoc expectedLocation ) { - BSONObj k = simpleKey( token ); - int expectedPos = ( expectedMatch - 'b' ) / 2; - Base::locate( k, expectedPos, false, expectedLocation, direction ); - } - }; - - class MissingLocateMultiBucket : public Base { - public: - void run() { - for ( int i = 0; i < 8; ++i ) { - insert( i ); - } - insert( 9 ); - insert( 8 ); -// dump(); - BSONObj straddle = key( 'i' ); - locate( straddle, 0, false, dl(), 1 ); - straddle = key( 'k' ); - locate( straddle, 0, false, dl(), -1 ); - } - private: - BSONObj key( char c ) { - return simpleKey( c, 800 ); - } - void insert( int i ) { - BSONObj k = key( 'b' + 2 * i ); - Base::insert( k ); - } - }; - - class SERVER983 : public Base { - public: - void run() { - for ( int i = 0; i < 10; ++i ) { - insert( i ); - } -// dump(); - BSONObj straddle = key( 'o' ); - locate( straddle, 0, false, dl(), 1 ); - straddle = key( 'q' ); - locate( straddle, 0, false, dl(), -1 ); - } - private: - BSONObj key( char c ) { - return simpleKey( c, 800 ); - } - void insert( int i ) { - BSONObj k = key( 'b' + 2 * i ); - Base::insert( k ); - } - }; - - class DontReuseUnused : public Base { - public: - void run() { - for ( int i = 0; i < 10; ++i ) { - insert( i ); - } -// dump(); - BSONObj root = key( 'p' ); - unindex( root ); - Base::insert( root ); - locate( root, 0, true, bt()->getNextChild(), 1 ); - } - private: - BSONObj key( char c ) { - return simpleKey( c, 800 ); - } - void insert( int i ) { - BSONObj k = key( 'b' + 2 * i ); - Base::insert( k ); - } - }; - - class PackUnused : public Base { - public: - void run() { - for ( long long i = 0; i < 1000000; i += 1000 ) { - insert( i ); - } -// dump(); - string orig, after; - { - stringstream ss; - bt()->shape( ss ); - orig = ss.str(); - } - vector< string > toDel; - vector< string > other; - BSONObjBuilder start; - start.appendMinKey( "a" ); - BSONObjBuilder end; - end.appendMaxKey( "a" ); - auto_ptr< BtreeCursor > c( new BtreeCursor( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); - while( c->ok() ) { - if ( !c->currKeyNode().prevChildBucket.isNull() ) { - toDel.push_back( c->currKey().firstElement().valuestr() ); - } - else { - other.push_back( c->currKey().firstElement().valuestr() ); - } - c->advance(); - } - ASSERT( toDel.size() > 0 ); - for( vector< string >::const_iterator i = toDel.begin(); i != toDel.end(); ++i ) { - BSONObj o = BSON( "a" << *i ); - unindex( o ); - } - ASSERT( other.size() > 0 ); - for( vector< string >::const_iterator i = other.begin(); i != other.end(); ++i ) { - BSONObj o = BSON( "a" << *i ); - unindex( o ); - } - - long long unused = 0; - ASSERT_EQUALS( 0, bt()->fullValidate( dl(), order(), &unused, true ) ); - - for ( long long i = 50000; i < 50100; ++i ) { - insert( i ); - } - - long long unused2 = 0; - ASSERT_EQUALS( 100, bt()->fullValidate( dl(), order(), &unused2, true ) ); - -// log() << "old unused: " << unused << ", new unused: " << unused2 << endl; -// - ASSERT( unused2 <= unused ); - } - protected: - void insert( long long n ) { - string val = bigNumString( n ); - BSONObj k = BSON( "a" << val ); - Base::insert( k ); - } - }; - - class DontDropReferenceKey : public PackUnused { - public: - void run() { - // with 80 root node is full - for ( long long i = 0; i < 80; i += 1 ) { - insert( i ); - } - - BSONObjBuilder start; - start.appendMinKey( "a" ); - BSONObjBuilder end; - end.appendMaxKey( "a" ); - BSONObj l = bt()->keyNode( 0 ).key; - string toInsert; - auto_ptr< BtreeCursor > c( new BtreeCursor( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); - while( c->ok() ) { - if ( c->currKey().woCompare( l ) > 0 ) { - toInsert = c->currKey().firstElement().valuestr(); - break; - } - c->advance(); - } - // too much work to try to make this happen through inserts and deletes - // we are intentionally manipulating the btree bucket directly here - getDur().writingDiskLoc( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) ) = DiskLoc(); - getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused - BSONObj k = BSON( "a" << toInsert ); - Base::insert( k ); - } - }; - - class MergeBuckets : public Base { - public: - virtual ~MergeBuckets() {} - void run() { - for ( int i = 0; i < 10; ++i ) { - insert( i ); - } -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - int expectedCount = 10 - unindexKeys(); -// dump(); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - long long unused = 0; - ASSERT_EQUALS( expectedCount, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - } - protected: - BSONObj key( char c ) { - return simpleKey( c, 800 ); - } - void insert( int i ) { - BSONObj k = key( 'b' + 2 * i ); - Base::insert( k ); - } - virtual int unindexKeys() = 0; - }; - - class MergeBucketsLeft : public MergeBuckets { - virtual int unindexKeys() { - BSONObj k = key( 'b' ); - unindex( k ); - k = key( 'b' + 2 ); - unindex( k ); - k = key( 'b' + 4 ); - unindex( k ); - k = key( 'b' + 6 ); - unindex( k ); - return 4; - } - }; - - class MergeBucketsRight : public MergeBuckets { - virtual int unindexKeys() { - BSONObj k = key( 'b' + 2 * 9 ); - unindex( k ); - return 1; - } - }; - - // deleting from head won't coalesce yet -// class MergeBucketsHead : public MergeBuckets { -// virtual BSONObj unindexKey() { return key( 'p' ); } -// }; - - class MergeBucketsDontReplaceHead : public Base { - public: - void run() { - for ( int i = 0; i < 18; ++i ) { - insert( i ); - } - // dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = key( 'a' + 17 ); - unindex( k ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - long long unused = 0; - ASSERT_EQUALS( 17, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - } - private: - BSONObj key( char c ) { - return simpleKey( c, 800 ); - } - void insert( int i ) { - BSONObj k = key( 'a' + i ); - Base::insert( k ); - } - }; - - // Tool to construct custom trees for tests. - class ArtificialTree : public BtreeBucket { - public: - void push( const BSONObj &key, const DiskLoc &child ) { - pushBack( dummyDiskLoc(), key, Ordering::make( BSON( "a" << 1 ) ), child ); - } - void setNext( const DiskLoc &child ) { - nextChild = child; - } - static DiskLoc make( IndexDetails &id ) { - DiskLoc ret = addBucket( id ); - is( ret )->init(); - getDur().commitIfNeeded(); - return ret; - } - static ArtificialTree *is( const DiskLoc &l ) { - return static_cast< ArtificialTree * >( l.btreemod() ); - } - static DiskLoc makeTree( const string &spec, IndexDetails &id ) { - return makeTree( fromjson( spec ), id ); - } - static DiskLoc makeTree( const BSONObj &spec, IndexDetails &id ) { - DiskLoc node = make( id ); - ArtificialTree *n = ArtificialTree::is( node ); - BSONObjIterator i( spec ); - while( i.more() ) { - BSONElement e = i.next(); - DiskLoc child; - if ( e.type() == Object ) { - child = makeTree( e.embeddedObject(), id ); - } - if ( e.fieldName() == string( "_" ) ) { - n->setNext( child ); - } - else { - n->push( BSON( "" << expectedKey( e.fieldName() ) ), child ); - } - } - n->fixParentPtrs( node ); - return node; - } - static void setTree( const string &spec, IndexDetails &id ) { - set( makeTree( spec, id ), id ); - } - static void set( const DiskLoc &l, IndexDetails &id ) { - ArtificialTree::is( id.head )->deallocBucket( id.head, id ); - getDur().writingDiskLoc(id.head) = l; - } - static string expectedKey( const char *spec ) { - if ( spec[ 0 ] != '$' ) { - return spec; - } - char *endPtr; - // parsing a long long is a pain, so just allow shorter keys for now - unsigned long long num = strtol( spec + 1, &endPtr, 16 ); - int len = 800; - if( *endPtr == '$' ) { - len = strtol( endPtr + 1, 0, 16 ); - } - return Base::bigNumString( num, len ); - } - static void checkStructure( const BSONObj &spec, const IndexDetails &id, const DiskLoc node ) { - ArtificialTree *n = ArtificialTree::is( node ); - BSONObjIterator j( spec ); - for( int i = 0; i < n->n; ++i ) { - ASSERT( j.more() ); - BSONElement e = j.next(); - KeyNode kn = n->keyNode( i ); - string expected = expectedKey( e.fieldName() ); - ASSERT( present( id, BSON( "" << expected ), 1 ) ); - ASSERT( present( id, BSON( "" << expected ), -1 ) ); - ASSERT_EQUALS( expected, kn.key.firstElement().valuestr() ); - if ( kn.prevChildBucket.isNull() ) { - ASSERT( e.type() == jstNULL ); - } - else { - ASSERT( e.type() == Object ); - checkStructure( e.embeddedObject(), id, kn.prevChildBucket ); - } - } - if ( n->nextChild.isNull() ) { - // maybe should allow '_' field with null value? - ASSERT( !j.more() ); - } - else { - BSONElement e = j.next(); - ASSERT_EQUALS( string( "_" ), e.fieldName() ); - ASSERT( e.type() == Object ); - checkStructure( e.embeddedObject(), id, n->nextChild ); - } - ASSERT( !j.more() ); - } - static void checkStructure( const string &spec, const IndexDetails &id ) { - checkStructure( fromjson( spec ), id, id.head ); - } - static bool present( const IndexDetails &id, const BSONObj &key, int direction ) { - int pos; - bool found; - id.head.btree()->locate( id, id.head, key, Ordering::make(id.keyPattern()), pos, found, recordLoc(), direction ); - return found; - } - int headerSize() const { return BtreeBucket::headerSize(); } - int packedDataSize( int pos ) const { return BtreeBucket::packedDataSize( pos ); } - void fixParentPtrs( const DiskLoc &thisLoc ) { BtreeBucket::fixParentPtrs( thisLoc ); } - void forcePack() { - topSize += emptySize; - emptySize = 0; - setNotPacked(); - } - private: - DiskLoc dummyDiskLoc() const { return DiskLoc( 0, 2 ); } - }; - - /** - * We could probably refactor the following tests, but it's easier to debug - * them in the present state. - */ - - class MergeBucketsDelInternal : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "bb" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}", id() ); - } - }; - - class MergeBucketsRightNull : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "bb" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}", id() ); - } - }; - - // not yet handling this case - class DontMergeSingleBucket : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},c:null}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "c" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{d:{b:{a:null}}}", id() ); - } - }; - - class ParentMergeNonRightToLeft : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "bb" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - // child does not currently replace parent in this case - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() ); - } - }; - - class ParentMergeNonRightToRight : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "ff" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - // child does not currently replace parent in this case - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() ); - } - }; - - class CantMergeRightNoMerge : public Base { - public: - void run() { - ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "bb" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{d:{b:{a:null},cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() ); - } - }; - - class CantMergeLeftNoMerge : public Base { - public: - void run() { - ArtificialTree::setTree( "{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "g" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{c:{b:{a:null}},d:null,_:{f:{e:null}}}", id() ); - } - }; - - class MergeOption : public Base { - public: - void run() { - ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "ee" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}", id() ); - } - }; - - class ForceMergeLeft : public Base { - public: - void run() { - ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "ee" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}", id() ); - } - }; - - class ForceMergeRight : public Base { - public: - void run() { - ArtificialTree::setTree( "{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "ee" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}", id() ); - } - }; - - class RecursiveMerge : public Base { - public: - void run() { - ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "c" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - // height is not currently reduced in this case - ArtificialTree::checkStructure( "{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}", id() ); - } - }; - - class RecursiveMergeRightBucket : public Base { - public: - void run() { - ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "c" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}", id() ); - } - }; - - class RecursiveMergeDoubleRightBucket : public Base { - public: - void run() { - ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}", id() ); -// dump(); - string ns = id().indexNamespace(); - ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - - BSONObj k = BSON( "" << "c" ); - assert( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - // no recursion currently in this case - ArtificialTree::checkStructure( "{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}", id() ); - } - }; - - class MergeSizeBase : public Base { - public: - MergeSizeBase() : _count() {} - virtual ~MergeSizeBase() {} - void run() { - typedef ArtificialTree A; - A::set( A::make( id() ), id() ); - A* root = A::is( dl() ); - DiskLoc left = A::make( id() ); - root->push( biggestKey( 'm' ), left ); - _count = 1; - A* l = A::is( left ); - DiskLoc right = A::make( id() ); - root->setNext( right ); - A* r = A::is( right ); - root->fixParentPtrs( dl() ); - - ASSERT_EQUALS( bigSize(), bigSize() / 2 * 2 ); - fillToExactSize( l, leftSize(), 'a' ); - fillToExactSize( r, rightSize(), 'n' ); - ASSERT( leftAdditional() <= 2 ); - if ( leftAdditional() >= 2 ) { - l->push( bigKey( 'k' ), DiskLoc() ); - } - if ( leftAdditional() >= 1 ) { - l->push( bigKey( 'l' ), DiskLoc() ); - } - ASSERT( rightAdditional() <= 2 ); - if ( rightAdditional() >= 2 ) { - r->push( bigKey( 'y' ), DiskLoc() ); - } - if ( rightAdditional() >= 1 ) { - r->push( bigKey( 'z' ), DiskLoc() ); - } - _count += leftAdditional() + rightAdditional(); - -// dump(); - - initCheck(); - string ns = id().indexNamespace(); - const char *keys = delKeys(); - for( const char *i = keys; *i; ++i ) { - long long unused = 0; - ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = bigKey( *i ); - unindex( k ); -// dump(); - --_count; - } - -// dump(); - - long long unused = 0; - ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - validate(); - if ( !merge() ) { - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - } - else { - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - } - } - protected: - virtual int leftAdditional() const { return 2; } - virtual int rightAdditional() const { return 2; } - virtual void initCheck() {} - virtual void validate() {} - virtual int leftSize() const = 0; - virtual int rightSize() const = 0; - virtual const char * delKeys() const { return "klyz"; } - virtual bool merge() const { return true; } - void fillToExactSize( ArtificialTree *t, int targetSize, char startKey ) { - int size = 0; - while( size < targetSize ) { - int space = targetSize - size; - int nextSize = space - sizeof( _KeyNode ); - assert( nextSize > 0 ); - BSONObj newKey = key( startKey++, nextSize ); - t->push( newKey, DiskLoc() ); - size += newKey.objsize() + sizeof( _KeyNode ); - _count += 1; - } - ASSERT_EQUALS( t->packedDataSize( 0 ), targetSize ); - } - static BSONObj key( char a, int size ) { - if ( size >= bigSize() ) { - return bigKey( a ); - } - return simpleKey( a, size - ( bigSize() - 801 ) ); - } - static BSONObj bigKey( char a ) { - return simpleKey( a, 801 ); - } - static BSONObj biggestKey( char a ) { - int size = BtreeBucket::getKeyMax() - bigSize() + 801; - return simpleKey( a, size ); - } - static int bigSize() { - return bigKey( 'a' ).objsize(); - } - static int biggestSize() { - return biggestKey( 'a' ).objsize(); - } - int _count; - }; - - class MergeSizeJustRightRight : public MergeSizeBase { - protected: - virtual int rightSize() const { return BtreeBucket::getLowWaterMark() - 1; } - virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::getLowWaterMark() - 1 ); } - }; - - class MergeSizeJustRightLeft : public MergeSizeBase { - protected: - virtual int leftSize() const { return BtreeBucket::getLowWaterMark() - 1; } - virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::getLowWaterMark() - 1 ); } - virtual const char * delKeys() const { return "yzkl"; } - }; - - class MergeSizeRight : public MergeSizeJustRightRight { - virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() - 1; } - virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } - }; - - class MergeSizeLeft : public MergeSizeJustRightLeft { - virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } - virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() - 1; } - }; - - class NoMergeBelowMarkRight : public MergeSizeJustRightRight { - virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; } - virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() - 1; } - virtual bool merge() const { return false; } - }; - - class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft { - virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() - 1; } - virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; } - virtual bool merge() const { return false; } - }; - - class MergeSizeRightTooBig : public MergeSizeJustRightLeft { - virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } - virtual bool merge() const { return false; } - }; - - class MergeSizeLeftTooBig : public MergeSizeJustRightRight { - virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } - virtual bool merge() const { return false; } - }; - - class BalanceOneLeftToRight : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},b:{$20:null,$30:null,$40:null,$50:null,a:null},_:{c:null}}", id() ); - ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x40 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},b:{$10:null,$20:null,$30:null,$50:null,a:null},_:{c:null}}", id() ); - } - }; - - class BalanceOneRightToLeft : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null},b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() ); - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x3 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$20:{$1:null,$2:null,$4:null,$10:null},b:{$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() ); - } - }; - - class BalanceThreeLeftToRight : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},b:{$30:null,$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() ); - ASSERT_EQUALS( 23, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x30 ) ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$9:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},_:{$8:null}},b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() ); - } - }; - - class BalanceThreeRightToLeft : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() ); - ASSERT_EQUALS( 25, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x5 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 24, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},$30:{$25:null},$40:{$35:null},_:{$45:null}},b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() ); - } - }; - - class BalanceSingleParentKey : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() ); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x40 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() ); - } - }; - - class PackEmpty : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null}", id() ); - BSONObj k = BSON( "" << "a" ); - ASSERT( unindex( k ) ); - ArtificialTree *t = ArtificialTree::is( dl() ); - t->forcePack(); - Tester::checkEmpty( t, id() ); - } - class Tester : public ArtificialTree { - public: - static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) { - Tester *t = static_cast< Tester * >( a ); - ASSERT_EQUALS( 0, t->n ); - ASSERT( !( t->flags & Packed ) ); - Ordering o = Ordering::make( id.keyPattern() ); - int zero = 0; - t->_packReadyForMod( o, zero ); - ASSERT_EQUALS( 0, t->n ); - ASSERT_EQUALS( 0, t->topSize ); - ASSERT_EQUALS( BtreeBucket::bodySize(), t->emptySize ); - ASSERT( t->flags & Packed ); - } - }; - }; - - class PackedDataSizeEmpty : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null}", id() ); - BSONObj k = BSON( "" << "a" ); - ASSERT( unindex( k ) ); - ArtificialTree *t = ArtificialTree::is( dl() ); - t->forcePack(); - Tester::checkEmpty( t, id() ); - } - class Tester : public ArtificialTree { - public: - static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) { - Tester *t = static_cast< Tester * >( a ); - ASSERT_EQUALS( 0, t->n ); - ASSERT( !( t->flags & Packed ) ); - int zero = 0; - ASSERT_EQUALS( 0, t->packedDataSize( zero ) ); - ASSERT( !( t->flags & Packed ) ); - } - }; - }; - - class BalanceSingleParentKeyPackParent : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() ); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - // force parent pack - ArtificialTree::is( dl() )->forcePack(); - BSONObj k = BSON( "" << bigNumString( 0x40 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() ); - } - }; - - class BalanceSplitParent : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10$10:{$1:null,$2:null,$3:null,$4:null},$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null,$500:null,$600:null,$700:null,$800:null,$900:null,_:{c:null}}", id() ); - ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x3 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 21, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$500:{$30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},$100:{$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null},_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}", id() ); - } - }; - - class RebalancedSeparatorBase : public Base { - public: - void run() { - ArtificialTree::setTree( treeSpec(), id() ); - modTree(); - Tester::checkSeparator( id(), expectedSeparator() ); - } - virtual string treeSpec() const = 0; - virtual int expectedSeparator() const = 0; - virtual void modTree() {} - struct Tester : public ArtificialTree { - static void checkSeparator( const IndexDetails& id, int expected ) { - ASSERT_EQUALS( expected, static_cast< Tester * >( id.head.btreemod() )->rebalancedSeparatorPos( id.head, 0 ) ); - } - }; - }; - - class EvenRebalanceLeft : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$7:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null,$6:null},_:{$8:null,$9:null,$10$31e:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class EvenRebalanceLeftCusp : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},_:{$7:null,$8:null,$9$31e:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class EvenRebalanceRight : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class EvenRebalanceRightCusp : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class EvenRebalanceCenter : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class OddRebalanceLeft : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class OddRebalanceRight : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class OddRebalanceCenter : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:null}}"; } - virtual int expectedSeparator() const { return 4; } - }; - - class RebalanceEmptyRight : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$b:null}}"; } - virtual void modTree() { - BSONObj k = BSON( "" << bigNumString( 0xb ) ); - ASSERT( unindex( k ) ); - } - virtual int expectedSeparator() const { return 4; } - }; - - class RebalanceEmptyLeft : public RebalancedSeparatorBase { - virtual string treeSpec() const { return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$18:null,$19:null}}"; } - virtual void modTree() { - BSONObj k = BSON( "" << bigNumString( 0x1 ) ); - ASSERT( unindex( k ) ); - } - virtual int expectedSeparator() const { return 4; } - }; - - class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight { - virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; } - virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; } - virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key ); } - virtual bool merge() const { return false; } - protected: - BSONObj _oldTop; - }; - - class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight { - virtual int rightSize() const { return MergeSizeJustRightRight::rightSize(); } - virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } - // different top means we rebalanced - virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); } - }; - - class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft { - virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; } - virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; } - virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key ); } - virtual bool merge() const { return false; } - protected: - BSONObj _oldTop; - }; - - class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft { - virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize(); } - virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } - // different top means we rebalanced - virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); } - }; - - class PreferBalanceLeft : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$30:null}}", id() ); - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x12 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$5:{$1:null,$2:null,$3:null,$4:null},$20:{$6:null,$10:null,$11:null,$13:null,$14:null},_:{$30:null}}", id() ); - } - }; - - class PreferBalanceRight : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$1:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}", id() ); - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x12 ) ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$10:{$1:null},$31:{$11:null,$13:null,$14:null,$20:null},_:{$32:null,$33:null,$34:null,$35:null,$36:null}}", id() ); - } - }; - - class RecursiveMergeThenBalance : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},_:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null}}", id() ); - ASSERT_EQUALS( 15, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x7 ) ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},_:{$50:null,$60:null,$70:null,$80:null,$90:null}}", id() ); - } - }; - - class MergeRightEmpty : public MergeSizeBase { - protected: - virtual int rightAdditional() const { return 1; } - virtual int leftAdditional() const { return 1; } - virtual const char * delKeys() const { return "lz"; } - virtual int rightSize() const { return 0; } - virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); } - }; - - class MergeMinRightEmpty : public MergeSizeBase { - protected: - virtual int rightAdditional() const { return 1; } - virtual int leftAdditional() const { return 0; } - virtual const char * delKeys() const { return "z"; } - virtual int rightSize() const { return 0; } - virtual int leftSize() const { return bigSize() + sizeof( _KeyNode ); } - }; - - class MergeLeftEmpty : public MergeSizeBase { - protected: - virtual int rightAdditional() const { return 1; } - virtual int leftAdditional() const { return 1; } - virtual const char * delKeys() const { return "zl"; } - virtual int leftSize() const { return 0; } - virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); } - }; - - class MergeMinLeftEmpty : public MergeSizeBase { - protected: - virtual int leftAdditional() const { return 1; } - virtual int rightAdditional() const { return 0; } - virtual const char * delKeys() const { return "l"; } - virtual int leftSize() const { return 0; } - virtual int rightSize() const { return bigSize() + sizeof( _KeyNode ); } - }; - - class BalanceRightEmpty : public MergeRightEmpty { - protected: - virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; } - virtual bool merge() const { return false; } - virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; } - virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); } - private: - BSONObj _oldTop; - }; - - class BalanceLeftEmpty : public MergeLeftEmpty { - protected: - virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; } - virtual bool merge() const { return false; } - virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; } - virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); } - private: - BSONObj _oldTop; - }; - - class DelEmptyNoNeighbors : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{b:{a:null}}", id() ); - ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "a" ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{b:null}", id() ); - } - }; - - class DelEmptyEmptyNeighbors : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); - ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "b" ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) ); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{a:null,c:null,d:null}", id() ); - } - }; - - class DelInternal : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); - long long unused = 0; - ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "c" ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() ); - } - }; - - class DelInternalReplaceWithUnused : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); - getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket.btree()->keyNode( 0 ).recordLoc ).GETOFS() ) |= 1; // make unused - long long unused = 0; - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 1, unused ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "c" ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - unused = 0; - ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 1, unused ); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - // doesn't discriminate between used and unused - ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() ); - } - }; - - class DelInternalReplaceRight : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,_:{b:null}}", id() ); - long long unused = 0; - ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "a" ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - unused = 0; - ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{b:null}", id() ); - } - }; - - class DelInternalPromoteKey : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}", id() ); - long long unused = 0; - ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "y" ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - unused = 0; - ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{a:null,e:{c:{b:null},d:null},z:null}", id() ); - } - }; - - class DelInternalPromoteRightKey : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,_:{e:{c:null},_:{f:null}}}", id() ); - long long unused = 0; - ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "a" ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - unused = 0; - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{c:null,_:{e:null,f:null}}", id() ); - } - }; - - class DelInternalReplacementPrevNonNull : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,d:{c:{b:null}},e:null}", id() ); - long long unused = 0; - ASSERT_EQUALS( 5, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "d" ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 1, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{a:null,d:{c:{b:null}},e:null}", id() ); - ASSERT( bt()->keyNode( 1 ).recordLoc.getOfs() & 1 ); // check 'unused' key - } - }; - - class DelInternalReplacementNextNonNull : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{a:null,_:{c:null,_:{d:null}}}", id() ); - long long unused = 0; - ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << "a" ); - // dump(); - ASSERT( unindex( k ) ); - // dump(); - ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 1, unused ); - ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{a:null,_:{c:null,_:{d:null}}}", id() ); - ASSERT( bt()->keyNode( 0 ).recordLoc.getOfs() & 1 ); // check 'unused' key - } - }; - - class DelInternalSplitPromoteLeft : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}", id() ); - long long unused = 0; - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x30, 0x10 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$60:{$10:null,$20:null,$27:{$23:null,$25:null},$40:null,$50:null},_:{$70:null,$80:null,$90:null,$100:null}}", id() ); - } - }; - - class DelInternalSplitPromoteRight : public Base { - public: - void run() { - string ns = id().indexNamespace(); - ArtificialTree::setTree( "{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}", id() ); - long long unused = 0; - ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - BSONObj k = BSON( "" << bigNumString( 0x100, 0x10 ) ); -// dump(); - ASSERT( unindex( k ) ); -// dump(); - ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) ); - ASSERT_EQUALS( 0, unused ); - ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); - ArtificialTree::checkStructure( "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{$90:null,$97:{$93:null,$95:null}}}", id() ); - } - }; - - class All : public Suite { - public: - All() : Suite( "btree" ) { - } +#define BtreeBucket BtreeBucket<V0> +#define btree btree<V0> +#define btreemod btreemod<V0> +#define testName "btree" +#define BTVERSION 0 +namespace BtreeTests0 { + #include "btreetests.inl" +} - void setupTests() { - add< Create >(); - add< SimpleInsertDelete >(); - add< SplitRightHeavyBucket >(); - add< SplitLeftHeavyBucket >(); - add< MissingLocate >(); - add< MissingLocateMultiBucket >(); - add< SERVER983 >(); - add< DontReuseUnused >(); - add< PackUnused >(); - add< DontDropReferenceKey >(); - add< MergeBucketsLeft >(); - add< MergeBucketsRight >(); -// add< MergeBucketsHead >(); - add< MergeBucketsDontReplaceHead >(); - add< MergeBucketsDelInternal >(); - add< MergeBucketsRightNull >(); - add< DontMergeSingleBucket >(); - add< ParentMergeNonRightToLeft >(); - add< ParentMergeNonRightToRight >(); - add< CantMergeRightNoMerge >(); - add< CantMergeLeftNoMerge >(); - add< MergeOption >(); - add< ForceMergeLeft >(); - add< ForceMergeRight >(); - add< RecursiveMerge >(); - add< RecursiveMergeRightBucket >(); - add< RecursiveMergeDoubleRightBucket >(); - add< MergeSizeJustRightRight >(); - add< MergeSizeJustRightLeft >(); - add< MergeSizeRight >(); - add< MergeSizeLeft >(); - add< NoMergeBelowMarkRight >(); - add< NoMergeBelowMarkLeft >(); - add< MergeSizeRightTooBig >(); - add< MergeSizeLeftTooBig >(); - add< BalanceOneLeftToRight >(); - add< BalanceOneRightToLeft >(); - add< BalanceThreeLeftToRight >(); - add< BalanceThreeRightToLeft >(); - add< BalanceSingleParentKey >(); - add< PackEmpty >(); - add< PackedDataSizeEmpty >(); - add< BalanceSingleParentKeyPackParent >(); - add< BalanceSplitParent >(); - add< EvenRebalanceLeft >(); - add< EvenRebalanceLeftCusp >(); - add< EvenRebalanceRight >(); - add< EvenRebalanceRightCusp >(); - add< EvenRebalanceCenter >(); - add< OddRebalanceLeft >(); - add< OddRebalanceRight >(); - add< OddRebalanceCenter >(); - add< RebalanceEmptyRight >(); - add< RebalanceEmptyLeft >(); - add< NoMoveAtLowWaterMarkRight >(); - add< MoveBelowLowWaterMarkRight >(); - add< NoMoveAtLowWaterMarkLeft >(); - add< MoveBelowLowWaterMarkLeft >(); - add< PreferBalanceLeft >(); - add< PreferBalanceRight >(); - add< RecursiveMergeThenBalance >(); - add< MergeRightEmpty >(); - add< MergeMinRightEmpty >(); - add< MergeLeftEmpty >(); - add< MergeMinLeftEmpty >(); - add< BalanceRightEmpty >(); - add< BalanceLeftEmpty >(); - add< DelEmptyNoNeighbors >(); - add< DelEmptyEmptyNeighbors >(); - add< DelInternal >(); - add< DelInternalReplaceWithUnused >(); - add< DelInternalReplaceRight >(); - add< DelInternalPromoteKey >(); - add< DelInternalPromoteRightKey >(); - add< DelInternalReplacementPrevNonNull >(); - add< DelInternalReplacementNextNonNull >(); - add< DelInternalSplitPromoteLeft >(); - add< DelInternalSplitPromoteRight >(); - } - } myall; +#undef BtreeBucket +#undef btree +#undef btreemod +#define BtreeBucket BtreeBucket<V1> +#define btree btree<V1> +#define btreemod btreemod<V1> +#undef testName +#define testName "btree1" +#undef BTVERSION +#define BTVERSION 1 +namespace BtreeTests1 { + #include "btreetests.inl" } diff --git a/dbtests/btreetests.inl b/dbtests/btreetests.inl new file mode 100644 index 00000000000..b9c208f0be5 --- /dev/null +++ b/dbtests/btreetests.inl @@ -0,0 +1,1689 @@ + const char* ns() { + return "unittests.btreetests"; + } + + // dummy, valid record loc + const DiskLoc recordLoc() { + return DiskLoc( 0, 2 ); + } + + class Ensure { + public: + Ensure() { + _c.ensureIndex( ns(), BSON( "a" << 1 ), false, "testIndex", + false, // given two versions not sure if cache true would mess us up... + false, BTVERSION); + } + ~Ensure() { + _c.dropIndexes( ns() ); + } + private: + DBDirectClient _c; + }; + + class Base : public Ensure { + public: + Base() : + _context( ns() ) { + { + bool f = false; + assert( f = true ); + massert( 10402 , "assert is misdefined", f); + } + } + virtual ~Base() {} + static string bigNumString( long long n, int len = 800 ) { + char sub[17]; + sprintf( sub, "%.16llx", n ); + string val( len, ' ' ); + for( int i = 0; i < len; ++i ) { + val[ i ] = sub[ i % 16 ]; + } + return val; + } + protected: + const BtreeBucket* bt() { + return id().head.btree(); + } + DiskLoc dl() { + return id().head; + } + IndexDetails& id() { + NamespaceDetails *nsd = nsdetails( ns() ); + assert( nsd ); + return nsd->idx( 1 ); + } + void checkValid( int nKeys ) { + ASSERT( bt() ); + ASSERT( bt()->isHead() ); + bt()->assertValid( order(), true ); + ASSERT_EQUALS( nKeys, bt()->fullValidate( dl(), order(), 0, true ) ); + } + void dump() { + bt()->dumpTree( dl(), order() ); + } + void insert( BSONObj &key ) { + const BtreeBucket *b = bt(); + b->bt_insert( dl(), recordLoc(), key, Ordering::make(order()), true, id(), true ); + getDur().commitIfNeeded(); + } + bool unindex( BSONObj &key ) { + getDur().commitIfNeeded(); + return bt()->unindex( dl(), id(), key, recordLoc() ); + } + static BSONObj simpleKey( char c, int n = 1 ) { + BSONObjBuilder builder; + string val( n, c ); + builder.append( "a", val ); + return builder.obj(); + } + void locate( BSONObj &key, int expectedPos, + bool expectedFound, const DiskLoc &expectedLocation, + int direction = 1 ) { + int pos; + bool found; + DiskLoc location = + bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction ); + ASSERT_EQUALS( expectedFound, found ); + ASSERT( location == expectedLocation ); + ASSERT_EQUALS( expectedPos, pos ); + } + bool present( BSONObj &key, int direction ) { + int pos; + bool found; + bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction ); + return found; + } + BSONObj order() { + return id().keyPattern(); + } + const BtreeBucket *child( const BtreeBucket *b, int i ) { + assert( i <= b->nKeys() ); + DiskLoc d; + if ( i == b->nKeys() ) { + d = b->getNextChild(); + } + else { + d = const_cast< DiskLoc& >( b->keyNode( i ).prevChildBucket ); + } + assert( !d.isNull() ); + return d.btree(); + } + void checkKey( char i ) { + stringstream ss; + ss << i; + checkKey( ss.str() ); + } + void checkKey( const string &k ) { + BSONObj key = BSON( "" << k ); +// log() << "key: " << key << endl; + ASSERT( present( key, 1 ) ); + ASSERT( present( key, -1 ) ); + } + private: + dblock lk_; + Client::Context _context; + }; + + class Create : public Base { + public: + void run() { + checkValid( 0 ); + } + }; + + class SimpleInsertDelete : public Base { + public: + void run() { + BSONObj key = simpleKey( 'z' ); + insert( key ); + + checkValid( 1 ); + locate( key, 0, true, dl() ); + + unindex( key ); + + checkValid( 0 ); + locate( key, 0, false, DiskLoc() ); + } + }; + + class SplitUnevenBucketBase : public Base { + public: + virtual ~SplitUnevenBucketBase() {} + void run() { + for ( int i = 0; i < 10; ++i ) { + BSONObj shortKey = simpleKey( shortToken( i ), 1 ); + insert( shortKey ); + BSONObj longKey = simpleKey( longToken( i ), 800 ); + insert( longKey ); + } + checkValid( 20 ); + ASSERT_EQUALS( 1, bt()->nKeys() ); + checkSplit(); + } + protected: + virtual char shortToken( int i ) const = 0; + virtual char longToken( int i ) const = 0; + static char leftToken( int i ) { + return 'a' + i; + } + static char rightToken( int i ) { + return 'z' - i; + } + virtual void checkSplit() = 0; + }; + + class SplitRightHeavyBucket : public SplitUnevenBucketBase { + private: + virtual char shortToken( int i ) const { + return leftToken( i ); + } + virtual char longToken( int i ) const { + return rightToken( i ); + } + virtual void checkSplit() { + ASSERT_EQUALS( 15, child( bt(), 0 )->nKeys() ); + ASSERT_EQUALS( 4, child( bt(), 1 )->nKeys() ); + } + }; + + class SplitLeftHeavyBucket : public SplitUnevenBucketBase { + private: + virtual char shortToken( int i ) const { + return rightToken( i ); + } + virtual char longToken( int i ) const { + return leftToken( i ); + } + virtual void checkSplit() { + ASSERT_EQUALS( 4, child( bt(), 0 )->nKeys() ); + ASSERT_EQUALS( 15, child( bt(), 1 )->nKeys() ); + } + }; + + class MissingLocate : public Base { + public: + void run() { + for ( int i = 0; i < 3; ++i ) { + BSONObj k = simpleKey( 'b' + 2 * i ); + insert( k ); + } + + locate( 1, 'a', 'b', dl() ); + locate( 1, 'c', 'd', dl() ); + locate( 1, 'e', 'f', dl() ); + locate( 1, 'g', 'g' + 1, DiskLoc() ); // of course, 'h' isn't in the index. + + // old behavior + // locate( -1, 'a', 'b', dl() ); + // locate( -1, 'c', 'd', dl() ); + // locate( -1, 'e', 'f', dl() ); + // locate( -1, 'g', 'f', dl() ); + + locate( -1, 'a', 'a' - 1, DiskLoc() ); // of course, 'a' - 1 isn't in the index + locate( -1, 'c', 'b', dl() ); + locate( -1, 'e', 'd', dl() ); + locate( -1, 'g', 'f', dl() ); + } + private: + void locate( int direction, char token, char expectedMatch, + DiskLoc expectedLocation ) { + BSONObj k = simpleKey( token ); + int expectedPos = ( expectedMatch - 'b' ) / 2; + Base::locate( k, expectedPos, false, expectedLocation, direction ); + } + }; + + class MissingLocateMultiBucket : public Base { + public: + void run() { + for ( int i = 0; i < 8; ++i ) { + insert( i ); + } + insert( 9 ); + insert( 8 ); +// dump(); + BSONObj straddle = key( 'i' ); + locate( straddle, 0, false, dl(), 1 ); + straddle = key( 'k' ); + locate( straddle, 0, false, dl(), -1 ); + } + private: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'b' + 2 * i ); + Base::insert( k ); + } + }; + + class SERVER983 : public Base { + public: + void run() { + for ( int i = 0; i < 10; ++i ) { + insert( i ); + } +// dump(); + BSONObj straddle = key( 'o' ); + locate( straddle, 0, false, dl(), 1 ); + straddle = key( 'q' ); + locate( straddle, 0, false, dl(), -1 ); + } + private: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'b' + 2 * i ); + Base::insert( k ); + } + }; + + class DontReuseUnused : public Base { + public: + void run() { + for ( int i = 0; i < 10; ++i ) { + insert( i ); + } +// dump(); + BSONObj root = key( 'p' ); + unindex( root ); + Base::insert( root ); + locate( root, 0, true, bt()->getNextChild(), 1 ); + } + private: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'b' + 2 * i ); + Base::insert( k ); + } + }; + + class PackUnused : public Base { + public: + void run() { + for ( long long i = 0; i < 1000000; i += 1000 ) { + insert( i ); + } +// dump(); + string orig, after; + { + NamespaceDetails *nsd = nsdetails( ns() ); + + stringstream ss; + bt()->shape( ss ); + orig = ss.str(); + cout << orig << endl; + } + vector< string > toDel; + vector< string > other; + BSONObjBuilder start; + start.appendMinKey( "a" ); + BSONObjBuilder end; + end.appendMaxKey( "a" ); + auto_ptr< BtreeCursor > c( BtreeCursor::make( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); + while( c->ok() ) { + if ( !c->_currKeyNode().prevChildBucket.isNull() ) { + toDel.push_back( c->currKey().firstElement().valuestr() ); + } + else { + other.push_back( c->currKey().firstElement().valuestr() ); + } + c->advance(); + } + ASSERT( toDel.size() > 0 ); + for( vector< string >::const_iterator i = toDel.begin(); i != toDel.end(); ++i ) { + BSONObj o = BSON( "a" << *i ); + unindex( o ); + } + ASSERT( other.size() > 0 ); + for( vector< string >::const_iterator i = other.begin(); i != other.end(); ++i ) { + BSONObj o = BSON( "a" << *i ); + unindex( o ); + } + + long long unused = 0; + ASSERT_EQUALS( 0, bt()->fullValidate( dl(), order(), &unused, true ) ); + + for ( long long i = 50000; i < 50100; ++i ) { + insert( i ); + } + + long long unused2 = 0; + ASSERT_EQUALS( 100, bt()->fullValidate( dl(), order(), &unused2, true ) ); + +// log() << "old unused: " << unused << ", new unused: " << unused2 << endl; +// + ASSERT( unused2 <= unused ); + } + protected: + void insert( long long n ) { + string val = bigNumString( n ); + BSONObj k = BSON( "a" << val ); + Base::insert( k ); + } + }; + + class DontDropReferenceKey : public PackUnused { + public: + void run() { + // with 80 root node is full + for ( long long i = 0; i < 80; i += 1 ) { + insert( i ); + } + + BSONObjBuilder start; + start.appendMinKey( "a" ); + BSONObjBuilder end; + end.appendMaxKey( "a" ); + BSONObj l = bt()->keyNode( 0 ).key.toBson(); + string toInsert; + auto_ptr< BtreeCursor > c( BtreeCursor::make( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); + while( c->ok() ) { + if ( c->currKey().woCompare( l ) > 0 ) { + toInsert = c->currKey().firstElement().valuestr(); + break; + } + c->advance(); + } + // too much work to try to make this happen through inserts and deletes + // we are intentionally manipulating the btree bucket directly here + getDur().writingDiskLoc( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) ) = DiskLoc(); + getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused + BSONObj k = BSON( "a" << toInsert ); + Base::insert( k ); + } + }; + + class MergeBuckets : public Base { + public: + virtual ~MergeBuckets() {} + void run() { + for ( int i = 0; i < 10; ++i ) { + insert( i ); + } +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + int expectedCount = 10 - unindexKeys(); +// dump(); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + long long unused = 0; + ASSERT_EQUALS( expectedCount, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + } + protected: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'b' + 2 * i ); + Base::insert( k ); + } + virtual int unindexKeys() = 0; + }; + + class MergeBucketsLeft : public MergeBuckets { + virtual int unindexKeys() { + BSONObj k = key( 'b' ); + unindex( k ); + k = key( 'b' + 2 ); + unindex( k ); + k = key( 'b' + 4 ); + unindex( k ); + k = key( 'b' + 6 ); + unindex( k ); + return 4; + } + }; + + class MergeBucketsRight : public MergeBuckets { + virtual int unindexKeys() { + BSONObj k = key( 'b' + 2 * 9 ); + unindex( k ); + return 1; + } + }; + + // deleting from head won't coalesce yet +// class MergeBucketsHead : public MergeBuckets { +// virtual BSONObj unindexKey() { return key( 'p' ); } +// }; + + class MergeBucketsDontReplaceHead : public Base { + public: + void run() { + for ( int i = 0; i < 18; ++i ) { + insert( i ); + } + // dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = key( 'a' + 17 ); + unindex( k ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + long long unused = 0; + ASSERT_EQUALS( 17, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + } + private: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'a' + i ); + Base::insert( k ); + } + }; + + // Tool to construct custom trees for tests. + class ArtificialTree : public BtreeBucket { + public: + void push( const BSONObj &key, const DiskLoc &child ) { + pushBack( dummyDiskLoc(), KeyOwned(key), Ordering::make( BSON( "a" << 1 ) ), child ); + } + void setNext( const DiskLoc &child ) { + nextChild = child; + } + static DiskLoc make( IndexDetails &id ) { + DiskLoc ret = addBucket( id ); + is( ret )->init(); + getDur().commitIfNeeded(); + return ret; + } + static ArtificialTree *is( const DiskLoc &l ) { + return static_cast< ArtificialTree * >( l.btreemod() ); + } + static DiskLoc makeTree( const string &spec, IndexDetails &id ) { + return makeTree( fromjson( spec ), id ); + } + static DiskLoc makeTree( const BSONObj &spec, IndexDetails &id ) { + DiskLoc node = make( id ); + ArtificialTree *n = ArtificialTree::is( node ); + BSONObjIterator i( spec ); + while( i.more() ) { + BSONElement e = i.next(); + DiskLoc child; + if ( e.type() == Object ) { + child = makeTree( e.embeddedObject(), id ); + } + if ( e.fieldName() == string( "_" ) ) { + n->setNext( child ); + } + else { + n->push( BSON( "" << expectedKey( e.fieldName() ) ), child ); + } + } + n->fixParentPtrs( node ); + return node; + } + static void setTree( const string &spec, IndexDetails &id ) { + set( makeTree( spec, id ), id ); + } + static void set( const DiskLoc &l, IndexDetails &id ) { + ArtificialTree::is( id.head )->deallocBucket( id.head, id ); + getDur().writingDiskLoc(id.head) = l; + } + static string expectedKey( const char *spec ) { + if ( spec[ 0 ] != '$' ) { + return spec; + } + char *endPtr; + // parsing a long long is a pain, so just allow shorter keys for now + unsigned long long num = strtol( spec + 1, &endPtr, 16 ); + int len = 800; + if( *endPtr == '$' ) { + len = strtol( endPtr + 1, 0, 16 ); + } + return Base::bigNumString( num, len ); + } + static void checkStructure( const BSONObj &spec, const IndexDetails &id, const DiskLoc node ) { + ArtificialTree *n = ArtificialTree::is( node ); + BSONObjIterator j( spec ); + for( int i = 0; i < n->n; ++i ) { + ASSERT( j.more() ); + BSONElement e = j.next(); + KeyNode kn = n->keyNode( i ); + string expected = expectedKey( e.fieldName() ); + ASSERT( present( id, BSON( "" << expected ), 1 ) ); + ASSERT( present( id, BSON( "" << expected ), -1 ) ); + ASSERT_EQUALS( expected, kn.key.toBson().firstElement().valuestr() ); + if ( kn.prevChildBucket.isNull() ) { + ASSERT( e.type() == jstNULL ); + } + else { + ASSERT( e.type() == Object ); + checkStructure( e.embeddedObject(), id, kn.prevChildBucket ); + } + } + if ( n->nextChild.isNull() ) { + // maybe should allow '_' field with null value? + ASSERT( !j.more() ); + } + else { + BSONElement e = j.next(); + ASSERT_EQUALS( string( "_" ), e.fieldName() ); + ASSERT( e.type() == Object ); + checkStructure( e.embeddedObject(), id, n->nextChild ); + } + ASSERT( !j.more() ); + } + static void checkStructure( const string &spec, const IndexDetails &id ) { + checkStructure( fromjson( spec ), id, id.head ); + } + static bool present( const IndexDetails &id, const BSONObj &key, int direction ) { + int pos; + bool found; + id.head.btree()->locate( id, id.head, key, Ordering::make(id.keyPattern()), pos, found, recordLoc(), direction ); + return found; + } + int headerSize() const { return BtreeBucket::headerSize(); } + int packedDataSize( int pos ) const { return BtreeBucket::packedDataSize( pos ); } + void fixParentPtrs( const DiskLoc &thisLoc ) { BtreeBucket::fixParentPtrs( thisLoc ); } + void forcePack() { + topSize += emptySize; + emptySize = 0; + setNotPacked(); + } + private: + DiskLoc dummyDiskLoc() const { return DiskLoc( 0, 2 ); } + }; + + /** + * We could probably refactor the following tests, but it's easier to debug + * them in the present state. + */ + + class MergeBucketsDelInternal : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "bb" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}", id() ); + } + }; + + class MergeBucketsRightNull : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "bb" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}", id() ); + } + }; + + // not yet handling this case + class DontMergeSingleBucket : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},c:null}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "c" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{d:{b:{a:null}}}", id() ); + } + }; + + class ParentMergeNonRightToLeft : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "bb" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + // child does not currently replace parent in this case + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() ); + } + }; + + class ParentMergeNonRightToRight : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "ff" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + // child does not currently replace parent in this case + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() ); + } + }; + + class CantMergeRightNoMerge : public Base { + public: + void run() { + ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "bb" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{d:{b:{a:null},cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() ); + } + }; + + class CantMergeLeftNoMerge : public Base { + public: + void run() { + ArtificialTree::setTree( "{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "g" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{c:{b:{a:null}},d:null,_:{f:{e:null}}}", id() ); + } + }; + + class MergeOption : public Base { + public: + void run() { + ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "ee" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}", id() ); + } + }; + + class ForceMergeLeft : public Base { + public: + void run() { + ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "ee" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}", id() ); + } + }; + + class ForceMergeRight : public Base { + public: + void run() { + ArtificialTree::setTree( "{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "ee" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}", id() ); + } + }; + + class RecursiveMerge : public Base { + public: + void run() { + ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "c" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + // height is not currently reduced in this case + ArtificialTree::checkStructure( "{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}", id() ); + } + }; + + class RecursiveMergeRightBucket : public Base { + public: + void run() { + ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "c" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}", id() ); + } + }; + + class RecursiveMergeDoubleRightBucket : public Base { + public: + void run() { + ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}", id() ); +// dump(); + string ns = id().indexNamespace(); + ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + + BSONObj k = BSON( "" << "c" ); + assert( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + // no recursion currently in this case + ArtificialTree::checkStructure( "{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}", id() ); + } + }; + + class MergeSizeBase : public Base { + public: + MergeSizeBase() : _count() {} + virtual ~MergeSizeBase() {} + void run() { + typedef ArtificialTree A; + A::set( A::make( id() ), id() ); + A* root = A::is( dl() ); + DiskLoc left = A::make( id() ); + root->push( biggestKey( 'm' ), left ); + _count = 1; + A* l = A::is( left ); + DiskLoc right = A::make( id() ); + root->setNext( right ); + A* r = A::is( right ); + root->fixParentPtrs( dl() ); + + //ASSERT_EQUALS( bigSize(), bigSize() / 2 * 2 ); + fillToExactSize( l, leftSize(), 'a' ); + fillToExactSize( r, rightSize(), 'n' ); + ASSERT( leftAdditional() <= 2 ); + if ( leftAdditional() >= 2 ) { + l->push( bigKey( 'k' ), DiskLoc() ); + } + if ( leftAdditional() >= 1 ) { + l->push( bigKey( 'l' ), DiskLoc() ); + } + ASSERT( rightAdditional() <= 2 ); + if ( rightAdditional() >= 2 ) { + r->push( bigKey( 'y' ), DiskLoc() ); + } + if ( rightAdditional() >= 1 ) { + r->push( bigKey( 'z' ), DiskLoc() ); + } + _count += leftAdditional() + rightAdditional(); + +// dump(); + + initCheck(); + string ns = id().indexNamespace(); + const char *keys = delKeys(); + for( const char *i = keys; *i; ++i ) { + long long unused = 0; + ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = bigKey( *i ); + unindex( k ); +// dump(); + --_count; + } + +// dump(); + + long long unused = 0; + ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + validate(); + if ( !merge() ) { + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + } + else { + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + } + } + protected: + virtual int leftAdditional() const { return 2; } + virtual int rightAdditional() const { return 2; } + virtual void initCheck() {} + virtual void validate() {} + virtual int leftSize() const = 0; + virtual int rightSize() const = 0; + virtual const char * delKeys() const { return "klyz"; } + virtual bool merge() const { return true; } + void fillToExactSize( ArtificialTree *t, int targetSize, char startKey ) { + int size = 0; + while( size < targetSize ) { + int space = targetSize - size; + int nextSize = space - sizeof( _KeyNode ); + assert( nextSize > 0 ); + BSONObj newKey = key( startKey++, nextSize ); + t->push( newKey, DiskLoc() ); + size += BtreeBucket::KeyOwned(newKey).dataSize() + sizeof( _KeyNode ); + _count += 1; + } + if( t->packedDataSize( 0 ) != targetSize ) { + ASSERT_EQUALS( t->packedDataSize( 0 ), targetSize ); + } + } + static BSONObj key( char a, int size ) { + if ( size >= bigSize() ) { + return bigKey( a ); + } + return simpleKey( a, size - ( bigSize() - 801 ) ); + } + static BSONObj bigKey( char a ) { + return simpleKey( a, 801 ); + } + static BSONObj biggestKey( char a ) { + int size = BtreeBucket::getKeyMax() - bigSize() + 801; + return simpleKey( a, size ); + } + static int bigSize() { + return BtreeBucket::KeyOwned(bigKey( 'a' )).dataSize(); + } + static int biggestSize() { + return BtreeBucket::KeyOwned(biggestKey( 'a' )).dataSize(); + } + int _count; + }; + + class MergeSizeJustRightRight : public MergeSizeBase { + protected: + virtual int rightSize() const { return BtreeBucket::lowWaterMark() - 1; } + virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::lowWaterMark() - 1 ); } + }; + + class MergeSizeJustRightLeft : public MergeSizeBase { + protected: + virtual int leftSize() const { return BtreeBucket::lowWaterMark() - 1; } + virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::lowWaterMark() - 1 ); } + virtual const char * delKeys() const { return "yzkl"; } + }; + + class MergeSizeRight : public MergeSizeJustRightRight { + virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() - 1; } + virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } + }; + + class MergeSizeLeft : public MergeSizeJustRightLeft { + virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } + virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() - 1; } + }; + + class NoMergeBelowMarkRight : public MergeSizeJustRightRight { + virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; } + virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() - 1; } + virtual bool merge() const { return false; } + }; + + class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft { + virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() - 1; } + virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; } + virtual bool merge() const { return false; } + }; + + class MergeSizeRightTooBig : public MergeSizeJustRightLeft { + virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } + virtual bool merge() const { return false; } + }; + + class MergeSizeLeftTooBig : public MergeSizeJustRightRight { + virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } + virtual bool merge() const { return false; } + }; + + class BalanceOneLeftToRight : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},b:{$20:null,$30:null,$40:null,$50:null,a:null},_:{c:null}}", id() ); + ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x40 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},b:{$10:null,$20:null,$30:null,$50:null,a:null},_:{c:null}}", id() ); + } + }; + + class BalanceOneRightToLeft : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null},b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() ); + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x3 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$20:{$1:null,$2:null,$4:null,$10:null},b:{$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() ); + } + }; + + class BalanceThreeLeftToRight : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},b:{$30:null,$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() ); + ASSERT_EQUALS( 23, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x30 ) ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$9:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},_:{$8:null}},b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() ); + } + }; + + class BalanceThreeRightToLeft : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() ); + ASSERT_EQUALS( 25, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x5 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 24, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},$30:{$25:null},$40:{$35:null},_:{$45:null}},b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() ); + } + }; + + class BalanceSingleParentKey : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() ); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x40 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() ); + } + }; + + class PackEmpty : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null}", id() ); + BSONObj k = BSON( "" << "a" ); + ASSERT( unindex( k ) ); + ArtificialTree *t = ArtificialTree::is( dl() ); + t->forcePack(); + Tester::checkEmpty( t, id() ); + } + class Tester : public ArtificialTree { + public: + static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) { + Tester *t = static_cast< Tester * >( a ); + ASSERT_EQUALS( 0, t->n ); + ASSERT( !( t->flags & Packed ) ); + Ordering o = Ordering::make( id.keyPattern() ); + int zero = 0; + t->_packReadyForMod( o, zero ); + ASSERT_EQUALS( 0, t->n ); + ASSERT_EQUALS( 0, t->topSize ); + ASSERT_EQUALS( BtreeBucket::bodySize(), t->emptySize ); + ASSERT( t->flags & Packed ); + } + }; + }; + + class PackedDataSizeEmpty : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null}", id() ); + BSONObj k = BSON( "" << "a" ); + ASSERT( unindex( k ) ); + ArtificialTree *t = ArtificialTree::is( dl() ); + t->forcePack(); + Tester::checkEmpty( t, id() ); + } + class Tester : public ArtificialTree { + public: + static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) { + Tester *t = static_cast< Tester * >( a ); + ASSERT_EQUALS( 0, t->n ); + ASSERT( !( t->flags & Packed ) ); + int zero = 0; + ASSERT_EQUALS( 0, t->packedDataSize( zero ) ); + ASSERT( !( t->flags & Packed ) ); + } + }; + }; + + class BalanceSingleParentKeyPackParent : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() ); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + // force parent pack + ArtificialTree::is( dl() )->forcePack(); + BSONObj k = BSON( "" << bigNumString( 0x40 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() ); + } + }; + + class BalanceSplitParent : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10$10:{$1:null,$2:null,$3:null,$4:null},$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null,$500:null,$600:null,$700:null,$800:null,$900:null,_:{c:null}}", id() ); + ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x3 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 21, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$500:{$30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},$100:{$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null},_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}", id() ); + } + }; + + class RebalancedSeparatorBase : public Base { + public: + void run() { + ArtificialTree::setTree( treeSpec(), id() ); + modTree(); + Tester::checkSeparator( id(), expectedSeparator() ); + } + virtual string treeSpec() const = 0; + virtual int expectedSeparator() const = 0; + virtual void modTree() {} + struct Tester : public ArtificialTree { + static void checkSeparator( const IndexDetails& id, int expected ) { + ASSERT_EQUALS( expected, static_cast< Tester * >( id.head.btreemod() )->rebalancedSeparatorPos( id.head, 0 ) ); + } + }; + }; + + class EvenRebalanceLeft : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$7:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null,$6:null},_:{$8:null,$9:null,$10$31e:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class EvenRebalanceLeftCusp : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},_:{$7:null,$8:null,$9$31e:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class EvenRebalanceRight : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class EvenRebalanceRightCusp : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class EvenRebalanceCenter : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class OddRebalanceLeft : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class OddRebalanceRight : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class OddRebalanceCenter : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:null}}"; } + virtual int expectedSeparator() const { return 4; } + }; + + class RebalanceEmptyRight : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$b:null}}"; } + virtual void modTree() { + BSONObj k = BSON( "" << bigNumString( 0xb ) ); + ASSERT( unindex( k ) ); + } + virtual int expectedSeparator() const { return 4; } + }; + + class RebalanceEmptyLeft : public RebalancedSeparatorBase { + virtual string treeSpec() const { return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$18:null,$19:null}}"; } + virtual void modTree() { + BSONObj k = BSON( "" << bigNumString( 0x1 ) ); + ASSERT( unindex( k ) ); + } + virtual int expectedSeparator() const { return 4; } + }; + + class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight { + virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; } + virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key.toBson(); } + virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key.toBson() ); } + virtual bool merge() const { return false; } + protected: + BSONObj _oldTop; + }; + + class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight { + virtual int rightSize() const { return MergeSizeJustRightRight::rightSize(); } + virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; } + // different top means we rebalanced + virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key.toBson() ) ); } + }; + + class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft { + virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; } + virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key.toBson(); } + virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key.toBson() ); } + virtual bool merge() const { return false; } + protected: + BSONObj _oldTop; + }; + + class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft { + virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize(); } + virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; } + // different top means we rebalanced + virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key.toBson() ) ); } + }; + + class PreferBalanceLeft : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$30:null}}", id() ); + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x12 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$5:{$1:null,$2:null,$3:null,$4:null},$20:{$6:null,$10:null,$11:null,$13:null,$14:null},_:{$30:null}}", id() ); + } + }; + + class PreferBalanceRight : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$1:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}", id() ); + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x12 ) ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$10:{$1:null},$31:{$11:null,$13:null,$14:null,$20:null},_:{$32:null,$33:null,$34:null,$35:null,$36:null}}", id() ); + } + }; + + class RecursiveMergeThenBalance : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},_:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null}}", id() ); + ASSERT_EQUALS( 15, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x7 ) ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},_:{$50:null,$60:null,$70:null,$80:null,$90:null}}", id() ); + } + }; + + class MergeRightEmpty : public MergeSizeBase { + protected: + virtual int rightAdditional() const { return 1; } + virtual int leftAdditional() const { return 1; } + virtual const char * delKeys() const { return "lz"; } + virtual int rightSize() const { return 0; } + virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); } + }; + + class MergeMinRightEmpty : public MergeSizeBase { + protected: + virtual int rightAdditional() const { return 1; } + virtual int leftAdditional() const { return 0; } + virtual const char * delKeys() const { return "z"; } + virtual int rightSize() const { return 0; } + virtual int leftSize() const { return bigSize() + sizeof( _KeyNode ); } + }; + + class MergeLeftEmpty : public MergeSizeBase { + protected: + virtual int rightAdditional() const { return 1; } + virtual int leftAdditional() const { return 1; } + virtual const char * delKeys() const { return "zl"; } + virtual int leftSize() const { return 0; } + virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); } + }; + + class MergeMinLeftEmpty : public MergeSizeBase { + protected: + virtual int leftAdditional() const { return 1; } + virtual int rightAdditional() const { return 0; } + virtual const char * delKeys() const { return "l"; } + virtual int leftSize() const { return 0; } + virtual int rightSize() const { return bigSize() + sizeof( _KeyNode ); } + }; + + class BalanceRightEmpty : public MergeRightEmpty { + protected: + virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; } + virtual bool merge() const { return false; } + virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key.toBson(); } + virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key.toBson() ) ); } + private: + BSONObj _oldTop; + }; + + class BalanceLeftEmpty : public MergeLeftEmpty { + protected: + virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; } + virtual bool merge() const { return false; } + virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key.toBson(); } + virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key.toBson() ) ); } + private: + BSONObj _oldTop; + }; + + class DelEmptyNoNeighbors : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{b:{a:null}}", id() ); + ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "a" ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{b:null}", id() ); + } + }; + + class DelEmptyEmptyNeighbors : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); + ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "b" ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) ); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{a:null,c:null,d:null}", id() ); + } + }; + + class DelInternal : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); + long long unused = 0; + ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "c" ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() ); + } + }; + + class DelInternalReplaceWithUnused : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() ); + getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket.btree()->keyNode( 0 ).recordLoc ).GETOFS() ) |= 1; // make unused + long long unused = 0; + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 1, unused ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "c" ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + unused = 0; + ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 1, unused ); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + // doesn't discriminate between used and unused + ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() ); + } + }; + + class DelInternalReplaceRight : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,_:{b:null}}", id() ); + long long unused = 0; + ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "a" ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + unused = 0; + ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{b:null}", id() ); + } + }; + + class DelInternalPromoteKey : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}", id() ); + long long unused = 0; + ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "y" ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + unused = 0; + ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{a:null,e:{c:{b:null},d:null},z:null}", id() ); + } + }; + + class DelInternalPromoteRightKey : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,_:{e:{c:null},_:{f:null}}}", id() ); + long long unused = 0; + ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "a" ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + unused = 0; + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{c:null,_:{e:null,f:null}}", id() ); + } + }; + + class DelInternalReplacementPrevNonNull : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,d:{c:{b:null}},e:null}", id() ); + long long unused = 0; + ASSERT_EQUALS( 5, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "d" ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 1, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{a:null,d:{c:{b:null}},e:null}", id() ); + ASSERT( bt()->keyNode( 1 ).recordLoc.getOfs() & 1 ); // check 'unused' key + } + }; + + class DelInternalReplacementNextNonNull : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{a:null,_:{c:null,_:{d:null}}}", id() ); + long long unused = 0; + ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << "a" ); + // dump(); + ASSERT( unindex( k ) ); + // dump(); + ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 1, unused ); + ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{a:null,_:{c:null,_:{d:null}}}", id() ); + ASSERT( bt()->keyNode( 0 ).recordLoc.getOfs() & 1 ); // check 'unused' key + } + }; + + class DelInternalSplitPromoteLeft : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}", id() ); + long long unused = 0; + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x30, 0x10 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$60:{$10:null,$20:null,$27:{$23:null,$25:null},$40:null,$50:null},_:{$70:null,$80:null,$90:null,$100:null}}", id() ); + } + }; + + class DelInternalSplitPromoteRight : public Base { + public: + void run() { + string ns = id().indexNamespace(); + ArtificialTree::setTree( "{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}", id() ); + long long unused = 0; + ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + BSONObj k = BSON( "" << bigNumString( 0x100, 0x10 ) ); +// dump(); + ASSERT( unindex( k ) ); +// dump(); + ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) ); + ASSERT_EQUALS( 0, unused ); + ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords ); + ArtificialTree::checkStructure( "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{$90:null,$97:{$93:null,$95:null}}}", id() ); + } + }; + + class All : public Suite { + public: + All() : Suite( testName ) { + } + + void setupTests() { + add< Create >(); + add< SimpleInsertDelete >(); + add< SplitRightHeavyBucket >(); + add< SplitLeftHeavyBucket >(); + add< MissingLocate >(); + add< MissingLocateMultiBucket >(); + add< SERVER983 >(); + add< DontReuseUnused >(); + add< PackUnused >(); + add< DontDropReferenceKey >(); + add< MergeBucketsLeft >(); + add< MergeBucketsRight >(); +// add< MergeBucketsHead >(); + add< MergeBucketsDontReplaceHead >(); + add< MergeBucketsDelInternal >(); + add< MergeBucketsRightNull >(); + add< DontMergeSingleBucket >(); + add< ParentMergeNonRightToLeft >(); + add< ParentMergeNonRightToRight >(); + add< CantMergeRightNoMerge >(); + add< CantMergeLeftNoMerge >(); + add< MergeOption >(); + add< ForceMergeLeft >(); + add< ForceMergeRight >(); + add< RecursiveMerge >(); + add< RecursiveMergeRightBucket >(); + add< RecursiveMergeDoubleRightBucket >(); + add< MergeSizeJustRightRight >(); + add< MergeSizeJustRightLeft >(); + add< MergeSizeRight >(); + add< MergeSizeLeft >(); + add< NoMergeBelowMarkRight >(); + add< NoMergeBelowMarkLeft >(); + add< MergeSizeRightTooBig >(); + add< MergeSizeLeftTooBig >(); + add< BalanceOneLeftToRight >(); + add< BalanceOneRightToLeft >(); + add< BalanceThreeLeftToRight >(); + add< BalanceThreeRightToLeft >(); + add< BalanceSingleParentKey >(); + add< PackEmpty >(); + add< PackedDataSizeEmpty >(); + add< BalanceSingleParentKeyPackParent >(); + add< BalanceSplitParent >(); + add< EvenRebalanceLeft >(); + add< EvenRebalanceLeftCusp >(); + add< EvenRebalanceRight >(); + add< EvenRebalanceRightCusp >(); + add< EvenRebalanceCenter >(); + add< OddRebalanceLeft >(); + add< OddRebalanceRight >(); + add< OddRebalanceCenter >(); + add< RebalanceEmptyRight >(); + add< RebalanceEmptyLeft >(); + add< NoMoveAtLowWaterMarkRight >(); + add< MoveBelowLowWaterMarkRight >(); + add< NoMoveAtLowWaterMarkLeft >(); + add< MoveBelowLowWaterMarkLeft >(); + add< PreferBalanceLeft >(); + add< PreferBalanceRight >(); + add< RecursiveMergeThenBalance >(); + add< MergeRightEmpty >(); + add< MergeMinRightEmpty >(); + add< MergeLeftEmpty >(); + add< MergeMinLeftEmpty >(); + add< BalanceRightEmpty >(); + add< BalanceLeftEmpty >(); + add< DelEmptyNoNeighbors >(); + add< DelEmptyEmptyNeighbors >(); + add< DelInternal >(); + add< DelInternalReplaceWithUnused >(); + add< DelInternalReplaceRight >(); + add< DelInternalPromoteKey >(); + add< DelInternalPromoteRightKey >(); + add< DelInternalReplacementPrevNonNull >(); + add< DelInternalReplacementNextNonNull >(); + add< DelInternalSplitPromoteLeft >(); + add< DelInternalSplitPromoteRight >(); + } + } myall; diff --git a/dbtests/cursortests.cpp b/dbtests/cursortests.cpp index 71585d6746d..20e7bf9e8cc 100644 --- a/dbtests/cursortests.cpp +++ b/dbtests/cursortests.cpp @@ -67,7 +67,8 @@ namespace CursorTests { int v[] = { 1, 2, 4, 6 }; boost::shared_ptr< FieldRangeVector > frv( vec( v, 4 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ); + scoped_ptr<BtreeCursor> _c( BtreeCursor::make( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ) ); + BtreeCursor &c = *_c.get(); ASSERT_EQUALS( "BtreeCursor a_1 multi", c.toString() ); double expected[] = { 1, 2, 4, 5, 6 }; for( int i = 0; i < 5; ++i ) { @@ -95,7 +96,8 @@ namespace CursorTests { int v[] = { -50, 2, 40, 60, 109, 200 }; boost::shared_ptr< FieldRangeVector > frv( vec( v, 6 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ); + scoped_ptr<BtreeCursor> _c( BtreeCursor::make(nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ) ); + BtreeCursor &c = *_c.get(); ASSERT_EQUALS( "BtreeCursor a_1 multi", c.toString() ); double expected[] = { 0, 1, 2, 109 }; for( int i = 0; i < 4; ++i ) { @@ -121,7 +123,8 @@ namespace CursorTests { int v[] = { 1, 2, 4, 6 }; boost::shared_ptr< FieldRangeVector > frv( vec( v, 4, -1 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, -1 ); + scoped_ptr<BtreeCursor> _c( BtreeCursor::make( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, -1 ) ); + BtreeCursor& c = *_c.get(); ASSERT_EQUALS( "BtreeCursor a_1 reverse multi", c.toString() ); double expected[] = { 6, 5, 4, 2, 1 }; for( int i = 0; i < 5; ++i ) { @@ -146,18 +149,24 @@ namespace CursorTests { _c.insert( ns(), o ); } void check( const BSONObj &spec ) { - _c.ensureIndex( ns(), idx() ); + { + BSONObj keypat = idx(); + cout << keypat.toString() << endl; + _c.ensureIndex( ns(), idx() ); + } + Client::Context ctx( ns() ); + NamespaceDetails *d = nsdetails( ns() ); FieldRangeSet frs( ns(), spec, true ); // orphan spec for this test. IndexSpec *idxSpec = new IndexSpec( idx() ); boost::shared_ptr< FieldRangeVector > frv( new FieldRangeVector( frs, *idxSpec, direction() ) ); - BtreeCursor c( nsdetails( ns() ), 1, nsdetails( ns() )->idx( 1 ), frv, direction() ); + scoped_ptr<BtreeCursor> c( BtreeCursor::make( nsdetails( ns() ), 1, nsdetails( ns() )->idx( 1 ), frv, direction() ) ); Matcher m( spec ); int count = 0; - while( c.ok() ) { - ASSERT( m.matches( c.current() ) ); - c.advance(); + while( c->ok() ) { + ASSERT( m.matches( c->current() ) ); + c->advance(); ++count; } int expectedCount = 0; diff --git a/dbtests/framework.cpp b/dbtests/framework.cpp index 92b40e4fd5f..808acb4caaf 100644 --- a/dbtests/framework.cpp +++ b/dbtests/framework.cpp @@ -84,7 +84,8 @@ namespace mongo { string currentTestName; Result * Suite::run( const string& filter ) { - tlogLevel = -1; + // set tlogLevel to -1 to suppress tlog() output in a test program + // TEMP tlogLevel = -1; log(1) << "\t about to setupTests" << endl; setupTests(); diff --git a/dbtests/jsobjtests.cpp b/dbtests/jsobjtests.cpp index 342ad75af5c..fdc72fa272d 100644 --- a/dbtests/jsobjtests.cpp +++ b/dbtests/jsobjtests.cpp @@ -23,11 +23,52 @@ #include "../db/json.h" #include "../db/repl.h" #include "../db/extsort.h" - #include "dbtests.h" #include "../util/mongoutils/checksum.h" +#include "../db/key.h" +#include "../db/btree.h" namespace JsobjTests { + + void keyTest(const BSONObj& o) { + static KeyV1Owned *kLast; + static BSONObj last; + + KeyV1Owned *key = new KeyV1Owned(o); + KeyV1Owned& k = *key; + BSONObj x = k.toBson(); + int res = o.woCompare(x, BSONObj(), /*considerfieldname*/false); + if( res ) { + cout << o.toString() << endl; + k.toBson(); + cout << x.toString() << endl; + o.woCompare(x, BSONObj(), /*considerfieldname*/false); + ASSERT( res == 0 ); + } + ASSERT( k.woEqual(k) ); + ASSERT( !k.isCompactFormat() || k.dataSize() < o.objsize() ); + + if( kLast ) { + int r1 = o.woCompare(last, BSONObj(), false); + int r2 = k.woCompare(*kLast, Ordering::make(BSONObj())); + bool ok = (r1<0 && r2<0) || (r1>0&&r2>0) || r1==r2; + if( !ok ) { + cout << "r1r2 " << r1 << ' ' << r2 << endl; + cout << "o:" << o.toString() << endl; + cout << "last:" << last.toString() << endl; + cout << "k:" << k.toString() << endl; + cout << "kLast:" << kLast->toString() << endl; + int r3 = k.woCompare(*kLast, Ordering::make(BSONObj())); + cout << r3 << endl; + } + ASSERT(ok); + } + + delete kLast; + kLast = key; + last = o.getOwned(); + } + class BufBuilderBasic { public: void run() { @@ -176,12 +217,12 @@ namespace JsobjTests { b.append( "" , "c" ); b.appendNull( "" ); BSONObj o = b.obj(); + keyTest(o); ASSERT( o.woSortOrder( BSON( "" << "b" << "" << "h" ) , key ) > 0 ); ASSERT( BSON( "" << "b" << "" << "h" ).woSortOrder( o , key ) < 0 ); } - ASSERT( BSON( "" << "a" ).woCompare( BSON( "" << "a" << "" << "c" ) ) < 0 ); { BSONObjBuilder b; @@ -322,6 +363,14 @@ namespace JsobjTests { struct AppendIntOrLL { void run() { const long long billion = 1000*1000*1000; + + { + BSONObjBuilder b; + b.appendIntOrLL("L4", -4*billion); + keyTest(b.obj()); + keyTest( BSON("" << billion) ); + } + BSONObjBuilder b; b.appendIntOrLL("i1", 1); b.appendIntOrLL("i2", -1); @@ -336,6 +385,7 @@ namespace JsobjTests { b.appendIntOrLL("L6", -16*billion); BSONObj o = b.obj(); + keyTest(o); ASSERT(o["i1"].type() == NumberInt); ASSERT(o["i1"].number() == 1); @@ -371,6 +421,7 @@ namespace JsobjTests { b.appendNumber( "e" , 1024LL*1024*1024*1024*1024*1024 ); BSONObj o = b.obj(); + keyTest(o); ASSERT( o["a"].type() == NumberInt ); ASSERT( o["b"].type() == NumberDouble ); @@ -386,7 +437,24 @@ namespace JsobjTests { void run() { string spec = "{ a: [ \"a\", \"b\" ] }"; ASSERT_EQUALS( spec, fromjson( spec ).toString() ); - } + + BSONObj x = BSON( "a" << "astring" << "b" << "str" ); + keyTest(x); + keyTest(x); + BSONObj y = BSON( "a" << "astring" << "b" << "stra" ); + keyTest(y); + y = BSON( "a" << "" ); + keyTest(y); + + keyTest( BSON("abc" << true ) ); + keyTest( BSON("abc" << false ) ); + keyTest( BSON("abc" << false << "b" << true ) ); + + Date_t now = jsTime(); + keyTest( BSON("" << now << "" << 3 << "" << jstNULL << "" << true) ); + keyTest( BSON("" << now << "" << 3 << "" << BSONObj() << "" << true) ); + + } }; class ToStringNumber { @@ -405,6 +473,8 @@ namespace JsobjTests { b.append( "g" , -123.456 ); BSONObj x = b.obj(); + keyTest(x); + ASSERT_EQUALS( "4", x["a"].toString( false , true ) ); ASSERT_EQUALS( "5.0", x["b"].toString( false , true ) ); ASSERT_EQUALS( "6", x["c"].toString( false , true ) ); @@ -426,6 +496,7 @@ namespace JsobjTests { b.append("b", string("a\0b", 3)); b.appendAs(b.asTempObj()["a"], "c"); BSONObj o = b.obj(); + keyTest(o); stringstream ss; ss << 'a' << '\0' << 'b'; @@ -479,6 +550,7 @@ namespace JsobjTests { ASSERT_EQUALS( 2 , o.getFieldDotted( "b.a" ).numberInt() ); ASSERT_EQUALS( 3 , o.getFieldDotted( "c.0.a" ).numberInt() ); ASSERT_EQUALS( 4 , o.getFieldDotted( "c.1.a" ).numberInt() ); + keyTest(o); } }; @@ -796,6 +868,7 @@ namespace JsobjTests { b.appendOID( "b" , 0 , false ); b.appendOID( "c" , 0 , true ); BSONObj o = b.obj(); + keyTest(o); ASSERT( o["a"].__oid().str() == "000000000000000000000000" ); ASSERT( o["b"].__oid().str() == "000000000000000000000000" ); @@ -1422,6 +1495,8 @@ namespace JsobjTests { ASSERT_EQUALS(obj, arr); BSONObj o = BSON( "obj" << obj << "arr" << arr << "arr2" << BSONArray(obj) ); + keyTest(o); + ASSERT_EQUALS(o["obj"].type(), Object); ASSERT_EQUALS(o["arr"].type(), Array); ASSERT_EQUALS(o["arr2"].type(), Array); @@ -1607,6 +1682,7 @@ namespace JsobjTests { void run() { BSONObj x = BSON( "a" << BSON( "b" << 1 ) ); BSONObj y = BSON( "a" << BSON( "b" << 1.0 ) ); + keyTest(x); keyTest(y); ASSERT_EQUALS( x , y ); ASSERT_EQUALS( 0 , x.woCompare( y ) ); } diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp index c2be0b0439e..67c75628992 100644 --- a/dbtests/namespacetests.cpp +++ b/dbtests/namespacetests.cpp @@ -27,6 +27,9 @@ #include "dbtests.h" namespace NamespaceTests { + + const int MinExtentSize = 4096; + namespace IndexDetailsTests { class Base { dblock lk; @@ -659,13 +662,16 @@ namespace NamespaceTests { create(); BSONObj b = bigObj(); - DiskLoc l[ 6 ]; - for ( int i = 0; i < 6; ++i ) { + const int N = 20; + const int Q = 16; // these constants depend on the size of the bson object, the extent size allocated by the system too + DiskLoc l[ N ]; + for ( int i = 0; i < N; ++i ) { l[ i ] = theDataFileMgr.insert( ns(), b.objdata(), b.objsize() ); ASSERT( !l[ i ].isNull() ); - ASSERT_EQUALS( 1 + i % 2, nRecords() ); - if ( i > 1 ) - ASSERT( l[ i ] == l[ i - 2 ] ); + ASSERT( nRecords() <= Q ); + //ASSERT_EQUALS( 1 + i % 2, nRecords() ); + if ( i >= 16 ) + ASSERT( l[ i ] == l[ i - Q] ); } } }; @@ -682,14 +688,15 @@ namespace NamespaceTests { for ( int i = 0; i < 8; ++i ) { l[ i ] = theDataFileMgr.insert( ns(), b.objdata(), b.objsize() ); ASSERT( !l[ i ].isNull() ); - ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() ); - if ( i > 3 ) - ASSERT( l[ i ] == l[ i - 4 ] ); + //ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() ); + //if ( i > 3 ) + // ASSERT( l[ i ] == l[ i - 4 ] ); } + ASSERT( nRecords() == 8 ); // Too big BSONObjBuilder bob; - bob.append( "a", string( 787, 'a' ) ); + bob.append( "a", string( MinExtentSize + 500, 'a' ) ); // min extent size is now 4096 BSONObj bigger = bob.done(); ASSERT( theDataFileMgr.insert( ns(), bigger.objdata(), bigger.objsize() ).isNull() ); ASSERT_EQUALS( 0, nRecords() ); @@ -712,14 +719,21 @@ namespace NamespaceTests { BSONObj b = bigObj(); - DiskLoc l[ 8 ]; - for ( int i = 0; i < 8; ++i ) { - l[ i ] = theDataFileMgr.insert( ns(), b.objdata(), b.objsize() ); - ASSERT( !l[ i ].isNull() ); - ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() ); + int N = MinExtentSize / b.objsize() * nExtents() + 5; + int T = N - 4; + + DiskLoc truncAt; + //DiskLoc l[ 8 ]; + for ( int i = 0; i < N; ++i ) { + DiskLoc a = theDataFileMgr.insert( ns(), b.objdata(), b.objsize() ); + if( T == i ) + truncAt = a; + ASSERT( !a.isNull() ); + /*ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() ); if ( i > 3 ) - ASSERT( l[ i ] == l[ i - 4 ] ); + ASSERT( l[ i ] == l[ i - 4 ] );*/ } + ASSERT( nRecords() < N ); NamespaceDetails *nsd = nsdetails(ns()); @@ -736,10 +750,9 @@ namespace NamespaceTests { ASSERT( first != last ) ; } - DiskLoc d = l[6]; long long n = nsd->stats.nrecords; - nsd->cappedTruncateAfter(ns(), d, false); - ASSERT_EQUALS( nsd->stats.nrecords , n-1 ); + nsd->cappedTruncateAfter(ns(), truncAt, false); + ASSERT_EQUALS( nsd->stats.nrecords , 28 ); { ForwardCappedCursor c(nsd); @@ -753,7 +766,7 @@ namespace NamespaceTests { // Too big BSONObjBuilder bob; - bob.append( "a", string( 787, 'a' ) ); + bob.append( "a", string( MinExtentSize + 300, 'a' ) ); BSONObj bigger = bob.done(); ASSERT( theDataFileMgr.insert( ns(), bigger.objdata(), bigger.objsize() ).isNull() ); ASSERT_EQUALS( 0, nRecords() ); diff --git a/dbtests/perftests.cpp b/dbtests/perftests.cpp index 087487a6192..b8326b7cc84 100644 --- a/dbtests/perftests.cpp +++ b/dbtests/perftests.cpp @@ -409,7 +409,7 @@ namespace PerfTests { b.appendBinData("bin", 200000, (BinDataType) 129, buf); x = b.obj(); } - string name() { return "insert big"; } + string name() { return "insert-big"; } void timed() { client().insert( ns(), x ); } diff --git a/dbtests/querytests.cpp b/dbtests/querytests.cpp index c81bbba778f..073d9242874 100644 --- a/dbtests/querytests.cpp +++ b/dbtests/querytests.cpp @@ -849,10 +849,12 @@ namespace QueryTests { writelock lk(""); Client::Context ctx( "unittests" ); + // note that extents are always at least 4KB now - so this will get rounded up a bit. ASSERT( userCreateNS( ns() , fromjson( "{ capped : true , size : 2000 }" ) , err , false ) ); - for ( int i=0; i<100; i++ ) { + for ( int i=0; i<200; i++ ) { insertNext(); - ASSERT( count() < 45 ); + cout << count() << endl; + ASSERT( count() < 90 ); } int a = count(); @@ -869,7 +871,7 @@ namespace QueryTests { insertNext(); ASSERT( c->more() ); - for ( int i=0; i<50; i++ ) { + for ( int i=0; i<90; i++ ) { insertNext(); } diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj index 9f4c1808a64..9aaf9e047df 100644 --- a/dbtests/test.vcxproj +++ b/dbtests/test.vcxproj @@ -269,6 +269,7 @@ <ClCompile Include="..\client\gridfs.cpp" />
<ClCompile Include="..\client\model.cpp" />
<ClCompile Include="..\client\parallel.cpp" />
+ <ClCompile Include="..\db\btreebuilder.cpp" />
<ClCompile Include="..\db\cap.cpp" />
<ClCompile Include="..\db\commands\isself.cpp" />
<ClCompile Include="..\db\compact.cpp" />
@@ -283,6 +284,7 @@ <ClCompile Include="..\db\dur_writetodatafiles.cpp" />
<ClCompile Include="..\db\geo\2d.cpp" />
<ClCompile Include="..\db\geo\haystack.cpp" />
+ <ClCompile Include="..\db\key.cpp" />
<ClCompile Include="..\db\mongommf.cpp" />
<ClCompile Include="..\db\projection.cpp" />
<ClCompile Include="..\db\querypattern.cpp">
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters index 8bb35103856..386f5978ed1 100755 --- a/dbtests/test.vcxproj.filters +++ b/dbtests/test.vcxproj.filters @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="misc and third party">
@@ -779,6 +779,12 @@ <ClCompile Include="..\util\ramlog.cpp">
<Filter>util</Filter>
</ClCompile>
+ <ClCompile Include="..\db\key.cpp">
+ <Filter>db\cpp</Filter>
+ </ClCompile>
+ <ClCompile Include="..\db\btreebuilder.cpp">
+ <Filter>btree</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\SConstruct">
diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp index ddc06f50375..c184f2b9c5a 100644 --- a/s/d_migrate.cpp +++ b/s/d_migrate.cpp @@ -431,7 +431,7 @@ namespace mongo { } scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , - shared_ptr<Cursor>( new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ) ) , + shared_ptr<Cursor>( BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 ) ) , _ns ) ); // use the average object size to estimate how many objects a full chunk would carry diff --git a/s/d_split.cpp b/s/d_split.cpp index 6eaf4cf502e..d76a81d01fa 100644 --- a/s/d_split.cpp +++ b/s/d_split.cpp @@ -77,7 +77,7 @@ namespace mongo { // only yielding on firt half for now // after this it should be in ram, so 2nd should be fast { - shared_ptr<Cursor> c( new BtreeCursor( d, idxNo, *id, min, max, false, 1 ) ); + shared_ptr<Cursor> c( BtreeCursor::make( d, idxNo, *id, min, max, false, 1 ) ); scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); while ( c->ok() ) { num++; @@ -89,7 +89,8 @@ namespace mongo { num /= 2; - BtreeCursor c( d, idxNo, *id, min, max, false, 1 ); + auto_ptr<BtreeCursor> _c( BtreeCursor::make( d, idxNo, *id, min, max, false, 1 ) ); + BtreeCursor& c = *_c; for( ; num; c.advance(), --num ); ostringstream os; @@ -174,7 +175,7 @@ namespace mongo { return false; } - BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); + BtreeCursor * bc = BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); shared_ptr<Cursor> c( bc ); scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); if ( ! cc->ok() ) { @@ -368,7 +369,7 @@ namespace mongo { long long currCount = 0; long long numChunks = 0; - BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); + BtreeCursor * bc = BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); shared_ptr<Cursor> c( bc ); scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); if ( ! cc->ok() ) { @@ -433,7 +434,7 @@ namespace mongo { currCount = 0; log() << "splitVector doing another cycle because of force, keyCount now: " << keyCount << endl; - bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); + bc = BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); c.reset( bc ); cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); } diff --git a/util/log.h b/util/log.h index 53fc0de7987..f43f0c22a6f 100644 --- a/util/log.h +++ b/util/log.h @@ -137,6 +137,9 @@ namespace mongo { virtual Nullstream& operator<<(unsigned) { return *this; } + virtual Nullstream& operator<<(unsigned short) { + return *this; + } virtual Nullstream& operator<<(double) { return *this; } @@ -242,6 +245,7 @@ namespace mongo { Logstream& operator<<(long x) { ss << x; return *this; } Logstream& operator<<(unsigned long x) { ss << x; return *this; } Logstream& operator<<(unsigned x) { ss << x; return *this; } + Logstream& operator<<(unsigned short x){ ss << x; return *this; } Logstream& operator<<(double x) { ss << x; return *this; } Logstream& operator<<(void *x) { ss << x; return *this; } Logstream& operator<<(const void *x) { ss << x; return *this; } |