diff options
author | Dwight <dwight@10gen.com> | 2011-04-22 18:16:43 -0400 |
---|---|---|
committer | Dwight <dwight@10gen.com> | 2011-04-22 18:16:43 -0400 |
commit | 52ea89ab4753084e2d4231c9105a68ef95a78ae7 (patch) | |
tree | 100e4691afe8705c4f2dfa22b0bb581ca95d2e9e | |
parent | 846a7a0bc516f80659fb10db4bb6b2f54ca1d791 (diff) | |
download | mongo-52ea89ab4753084e2d4231c9105a68ef95a78ae7.tar.gz |
towards index v1 format
-rw-r--r-- | db/btree.cpp | 169 | ||||
-rw-r--r-- | db/btree.h | 37 | ||||
-rw-r--r-- | db/btreebuilder.cpp | 35 | ||||
-rw-r--r-- | db/btreecursor.cpp | 67 | ||||
-rw-r--r-- | db/dbcommands.cpp | 8 | ||||
-rw-r--r-- | db/index.cpp | 31 | ||||
-rw-r--r-- | db/index.h | 33 | ||||
-rw-r--r-- | db/pdfile.cpp | 103 | ||||
-rw-r--r-- | db/query.cpp | 6 | ||||
-rw-r--r-- | db/update.cpp | 4 | ||||
-rw-r--r-- | util/log.h | 4 |
11 files changed, 277 insertions, 220 deletions
diff --git a/db/btree.cpp b/db/btree.cpp index c28016982e8..1662283b46b 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -31,7 +31,15 @@ namespace mongo { -#define VERIFYTHISLOC dassert( thisLoc.btree() == this ); + template class BucketBasics<V0>; + template class BucketBasics<V1>; + template class BtreeBucket<V0>; + template class BtreeBucket<V1>; + + BOOST_STATIC_ASSERT( Record::HeaderSize == 16 ); + BOOST_STATIC_ASSERT( Record::HeaderSize + BtreeData_V1::BucketSize == 8192 ); + +#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this ); /** * give us a writable version of the btree bucket (declares write intent). @@ -41,7 +49,7 @@ namespace mongo { BtreeBucket<V> * DiskLoc::btreemod() const { assert( _a != -1 ); BtreeBucket<V> *b = const_cast< BtreeBucket<V> * >( btree<V>() ); - return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, BucketSize ) ); + return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, V::BucketSize ) ); } _KeyNode& _KeyNode::writing() const { @@ -54,9 +62,6 @@ namespace mongo { recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs()) { } - // largest key size we allow. note we very much need to support bigger keys (somehow) in the future. - static const int KeyMax = OldBucketSize / 10; - // BucketBasics::lowWaterMark() // // We define this value as the maximum number of bytes such that, if we have @@ -112,7 +117,6 @@ namespace mongo { template< class V > int BucketBasics<V>::Size() const { - assert( _wasSize == BucketSize ); return BucketSize; } @@ -122,9 +126,9 @@ namespace mongo { ss << "*\n"; for ( int i = 0; i < n; i++ ) if ( !k(i).prevChildBucket.isNull() ) - k(i).prevChildBucket.btree()->_shape(level+1,ss); + k(i).prevChildBucket.btree<V>()->_shape(level+1,ss); if ( !nextChild.isNull() ) - nextChild.btree()->_shape(level+1,ss); + nextChild.btree<V>()->_shape(level+1,ss); } int bt_fv=0; @@ -169,7 +173,7 @@ namespace mongo { } if ( !kn.prevChildBucket.isNull() ) { DiskLoc left = kn.prevChildBucket; - const BtreeBucket *b = left.btree(); + const BtreeBucket *b = left.btree<V>(); if ( strict ) { assert( b->parent == thisLoc ); } @@ -180,7 +184,7 @@ namespace mongo { } } if ( !nextChild.isNull() ) { - const BtreeBucket *b = nextChild.btree(); + const BtreeBucket *b = nextChild.btree<V>(); if ( strict ) { assert( b->parent == thisLoc ); } @@ -222,7 +226,7 @@ namespace mongo { for ( int j = 0; j < n; j++ ) { out() << " " << keyNode(j).key.toString() << endl; } - ((BtreeBucket *) this)->dump(); + ((BtreeBucket<V> *) this)->dump(); } wassert(false); break; @@ -247,7 +251,7 @@ namespace mongo { if ( z > 0 ) { problem() << "btree keys out of order" << '\n'; ONCE { - ((BtreeBucket *) this)->dump(); + ((BtreeBucket<V> *) this)->dump(); } assert(false); } @@ -268,15 +272,13 @@ namespace mongo { template< class V > void BucketBasics<V>::init() { + _init(); parent.Null(); nextChild.Null(); - _wasSize = BucketSize; - _reserved1 = 0; flags = Packed; n = 0; emptySize = totalDataSize(); topSize = 0; - reserved = 0; } /** see _alloc */ @@ -403,7 +405,7 @@ namespace mongo { b->k(j) = b->k(j-1); } - getDur().declareWriteIntent(&b->emptySize, 12); // [b->emptySize..b->n] is 12 bytes and we are going to write those + getDur().declareWriteIntent(&b->emptySize, sizeof(emptySize)+sizeof(topSize)+sizeof(n)); b->emptySize -= sizeof(_KeyNode); b->n++; @@ -458,7 +460,7 @@ namespace mongo { declaration anyway within the group commit interval, in which case we would just be adding code and complexity without benefit. */ - thisLoc.btreemod()->_packReadyForMod(order, refPos); + thisLoc.btreemod<V>()->_packReadyForMod(order, refPos); } /** version when write intent already declared */ @@ -602,7 +604,7 @@ namespace mongo { void BtreeBucket<V>::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) { DiskLoc loc = thisLoc; while ( 1 ) { - const BtreeBucket *b = loc.btree(); + const BtreeBucket *b = loc.btree<V>(); if ( !b->nextChild.isNull() ) { loc = b->nextChild; continue; @@ -678,7 +680,7 @@ namespace mongo { while ( 1 ) { if( b.isNull() ) break; - const BtreeBucket *bucket = b.btree(); + const BtreeBucket *bucket = b.btree<V>(); const _KeyNode& kn = bucket->k(pos); if ( kn.isUsed() ) return bucket->keyAt(pos).woEqual(key); @@ -698,7 +700,7 @@ namespace mongo { while ( !b.isNull() ) { // we skip unused keys - const BtreeBucket *bucket = b.btree(); + const BtreeBucket *bucket = b.btree<V>(); const _KeyNode& kn = bucket->k(pos); if ( kn.isUsed() ) { if( bucket->keyAt(pos).woEqual(key) ) @@ -754,8 +756,8 @@ namespace mongo { // coding effort in here to make this particularly fast if( !dupsChecked ) { dupsChecked = true; - if( idx.head.btree()->exists(idx, idx.head, key, order) ) { - if( idx.head.btree()->wouldCreateDup(idx, idx.head, key, order, recordLoc) ) + if( idx.head.btree<V>()->exists(idx, idx.head, key, order) ) { + if( idx.head.btree<V>()->wouldCreateDup(idx, idx.head, key, order, recordLoc) ) uasserted( ASSERT_ID_DUPKEY , dupKeyError( idx , key ) ); else alreadyInIndex(); @@ -802,7 +804,7 @@ namespace mongo { ClientCursor::informAboutToDeleteBucket(thisLoc); // slow... assert( !isHead() ); - const BtreeBucket *p = parent.btree(); + const BtreeBucket *p = parent.btree<V>(); int parentIdx = indexInParent( thisLoc ); p->childForPos( parentIdx ).writing().Null(); deallocBucket( thisLoc, id ); @@ -892,19 +894,19 @@ namespace mongo { // child in the proper direction and all descendants of thisLoc must be // nonempty because they are not the root. - if ( !advanceLoc.btree()->childForPos( advanceKeyOfs ).isNull() || - !advanceLoc.btree()->childForPos( advanceKeyOfs + 1 ).isNull() ) { + if ( !advanceLoc.btree<V>()->childForPos( advanceKeyOfs ).isNull() || + !advanceLoc.btree<V>()->childForPos( advanceKeyOfs + 1 ).isNull() ) { // only expected with legacy btrees, see note above markUnused( keypos ); return; } - KeyNode kn = advanceLoc.btree()->keyNode( advanceKeyOfs ); + KeyNode kn = advanceLoc.btree<V>()->keyNode( advanceKeyOfs ); // Because advanceLoc is a descendant of thisLoc, updating thisLoc will // not affect packing or keys of advanceLoc and kn will be stable // during the following setInternalKey() setInternalKey( thisLoc, keypos, kn.recordLoc, kn.key, order, childForPos( keypos ), childForPos( keypos + 1 ), id ); - advanceLoc.btreemod()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order ); + advanceLoc.btreemod<V>()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order ); } template< class V > @@ -915,9 +917,9 @@ namespace mongo { id.head.writing() = nextChild; } else { - parent.btree()->childForPos( indexInParent( thisLoc ) ).writing() = nextChild; + parent.btree<V>()->childForPos( indexInParent( thisLoc ) ).writing() = nextChild; } - nextChild.btree()->parent.writing() = parent; + nextChild.btree<V>()->parent.writing() = parent; ClientCursor::informAboutToDeleteBucket( thisLoc ); deallocBucket( thisLoc, id ); } @@ -933,8 +935,8 @@ namespace mongo { } int pos = 0; { - const BtreeBucket *l = leftNodeLoc.btree(); - const BtreeBucket *r = rightNodeLoc.btree(); + const BtreeBucket *l = leftNodeLoc.btree<V>(); + const BtreeBucket *r = rightNodeLoc.btree<V>(); if ( ( headerSize() + l->packedDataSize( pos ) + r->packedDataSize( pos ) + keyNode( leftIndex ).key.dataSize() + sizeof(_KeyNode) > unsigned( BucketSize ) ) ) { return false; } @@ -950,8 +952,8 @@ namespace mongo { int BtreeBucket<V>::rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const { int split = -1; int rightSize = 0; - const BtreeBucket *l = childForPos( leftIndex ).btree(); - const BtreeBucket *r = childForPos( leftIndex + 1 ).btree(); + const BtreeBucket *l = childForPos( leftIndex ).btree<V>(); + const BtreeBucket *r = childForPos( leftIndex + 1 ).btree<V>(); int KNS = sizeof( _KeyNode ); int rightSizeLimit = ( l->topSize + l->n * KNS + keyNode( leftIndex ).key.dataSize() + KNS + r->topSize + r->n * KNS ) / 2; @@ -995,8 +997,8 @@ namespace mongo { void BtreeBucket<V>::doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) { DiskLoc leftNodeLoc = childForPos( leftIndex ); DiskLoc rightNodeLoc = childForPos( leftIndex + 1 ); - BtreeBucket *l = leftNodeLoc.btreemod(); - BtreeBucket *r = rightNodeLoc.btreemod(); + BtreeBucket *l = leftNodeLoc.btreemod<V>(); + BtreeBucket *r = rightNodeLoc.btreemod<V>(); int pos = 0; l->_packReadyForMod( order, pos ); r->_packReadyForMod( order, pos ); // pack r in case there are droppable keys @@ -1033,7 +1035,7 @@ namespace mongo { template< class V > int BtreeBucket<V>::indexInParent( const DiskLoc &thisLoc ) const { assert( !parent.isNull() ); - const BtreeBucket *p = parent.btree(); + const BtreeBucket *p = parent.btree<V>(); if ( p->nextChild == thisLoc ) { return p->n; } @@ -1060,7 +1062,7 @@ namespace mongo { if ( canMergeChildren( thisLoc, leftIndex ) ) { return false; } - thisLoc.btreemod()->doBalanceChildren( thisLoc, leftIndex, id, order ); + thisLoc.btreemod<V>()->doBalanceChildren( thisLoc, leftIndex, id, order ); return true; } @@ -1138,9 +1140,9 @@ namespace mongo { DiskLoc lchild = childForPos( leftIndex ); DiskLoc rchild = childForPos( leftIndex + 1 ); int zeropos = 0; - BtreeBucket *l = lchild.btreemod(); + BtreeBucket *l = lchild.btreemod<V>(); l->_packReadyForMod( order, zeropos ); - BtreeBucket *r = rchild.btreemod(); + BtreeBucket *r = rchild.btreemod<V>(); r->_packReadyForMod( order, zeropos ); int split = rebalancedSeparatorPos( thisLoc, leftIndex ); @@ -1161,11 +1163,11 @@ namespace mongo { return false; } - if ( packedDataSize( 0 ) >= lowWaterMark ) { + if ( packedDataSize( 0 ) >= lowWaterMark() ) { return false; } - const BtreeBucket *p = parent.btree(); + const BtreeBucket *p = parent.btree<V>(); int parentIdx = indexInParent( thisLoc ); // TODO will missing neighbor case be possible long term? Should we try to merge/balance somehow in that case if so? @@ -1184,7 +1186,7 @@ namespace mongo { return true; } - BtreeBucket *pm = parent.btreemod(); + BtreeBucket *pm = parent.btreemod<V>(); if ( mayBalanceRight ) { pm->doMergeChildren( parent, parentIdx, id, order ); return true; @@ -1209,7 +1211,7 @@ namespace mongo { OCCASIONALLY problem() << "unindex: key too large to index but was found for " << id.indexNamespace() << " reIndex suggested" << endl; } - loc.btreemod()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern())); + loc.btreemod<V>()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern())); return true; } @@ -1228,7 +1230,7 @@ namespace mongo { if ( !child.isNull() ) { if ( insert_debug ) out() << " " << child.toString() << ".parent=" << thisLoc.toString() << endl; - child.btree()->parent.writing() = thisLoc; + child.btree<V>()->parent.writing() = thisLoc; } } @@ -1287,7 +1289,7 @@ namespace mongo { if ( !basicInsert(thisLoc, keypos, recordLoc, key, order) ) { // If basicInsert() fails, the bucket will be packed as required by split(). - thisLoc.btreemod()->split(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx); + thisLoc.btreemod<V>()->split(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx); return; } @@ -1309,7 +1311,7 @@ namespace mongo { assert( kn->prevChildBucket == lchild ); nextChild.writing() = rchild; if ( !rchild.isNull() ) - rchild.btree()->parent.writing() = thisLoc; + rchild.btree<V>()->parent.writing() = thisLoc; } else { kn->prevChildBucket = lchild; @@ -1326,7 +1328,7 @@ namespace mongo { const DiskLoc *pc = &k(keypos+1).prevChildBucket; *getDur().alreadyDeclared((DiskLoc*) pc) = rchild; // declared in basicInsert() if ( !rchild.isNull() ) - rchild.btree()->parent.writing() = thisLoc; + rchild.btree<V>()->parent.writing() = thisLoc; } return; } @@ -1341,7 +1343,7 @@ namespace mongo { int split = splitPos( keypos ); DiskLoc rLoc = addBucket(idx); - BtreeBucket *r = rLoc.btreemod(); + BtreeBucket *r = rLoc.btreemod<V>(); if ( split_debug ) out() << " split:" << split << ' ' << keyNode(split).key.toString() << " n:" << n << endl; for ( int i = split+1; i < n; i++ ) { @@ -1354,7 +1356,7 @@ namespace mongo { if ( split_debug ) out() << " new rLoc:" << rLoc.toString() << endl; r = 0; - rLoc.btree()->fixParentPtrs(rLoc); + rLoc.btree<V>()->fixParentPtrs(rLoc); { KeyNode splitkey = keyNode(split); @@ -1371,21 +1373,21 @@ namespace mongo { if ( parent.isNull() ) { // make a new parent if we were the root DiskLoc L = addBucket(idx); - BtreeBucket *p = L.btreemod(); + BtreeBucket *p = L.btreemod<V>(); p->pushBack(splitkey.recordLoc, splitkey.key, order, thisLoc); p->nextChild = rLoc; p->assertValid( order ); parent = idx.head.writing() = L; if ( split_debug ) out() << " we were root, making new root:" << hex << parent.getOfs() << dec << endl; - rLoc.btree()->parent.writing() = parent; + rLoc.btree<V>()->parent.writing() = parent; } else { // set this before calling _insert - if it splits it will do fixParent() logic and change the value. - rLoc.btree()->parent.writing() = parent; + rLoc.btree<V>()->parent.writing() = parent; if ( split_debug ) out() << " promoting splitkey key " << splitkey.key.toString() << endl; - parent.btree()->_insert(parent, splitkey.recordLoc, splitkey.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx); + parent.btree<V>()->_insert(parent, splitkey.recordLoc, splitkey.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx); } } @@ -1403,7 +1405,7 @@ namespace mongo { else { int kp = keypos-split-1; assert(kp>=0); - rLoc.btree()->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx); + rLoc.btree<V>()->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx); } } @@ -1416,7 +1418,7 @@ namespace mongo { DiskLoc BtreeBucket<V>::addBucket(const IndexDetails& id) { string ns = id.indexNamespace(); DiskLoc loc = theDataFileMgr.insert(ns.c_str(), 0, BucketSize, true); - BtreeBucket *b = loc.btreemod(); + BtreeBucket *b = loc.btreemod<V>(); b->init(); return loc; } @@ -1428,8 +1430,8 @@ namespace mongo { template< class V > const DiskLoc BtreeBucket<V>::getHead(const DiskLoc& thisLoc) const { DiskLoc p = thisLoc; - while ( !p.btree()->isHead() ) - p = p.btree()->parent; + while ( !p.btree<V>()->isHead() ) + p = p.btree<V>()->parent; return p; } @@ -1447,8 +1449,8 @@ namespace mongo { DiskLoc nextDown = childForPos(ko+adj); if ( !nextDown.isNull() ) { while ( 1 ) { - keyOfs = direction>0 ? 0 : nextDown.btree()->n - 1; - DiskLoc loc = nextDown.btree()->childForPos(keyOfs + adj); + keyOfs = direction>0 ? 0 : nextDown.btree<V>()->n - 1; + DiskLoc loc = nextDown.btree<V>()->childForPos(keyOfs + adj); if ( loc.isNull() ) break; nextDown = loc; @@ -1467,7 +1469,7 @@ namespace mongo { while ( 1 ) { if ( ancestor.isNull() ) break; - const BtreeBucket *an = ancestor.btree(); + const BtreeBucket *an = ancestor.btree<V>(); for ( int i = 0; i < an->n; i++ ) { if ( an->childForPos(i+adj) == childLoc ) { keyOfs = i; @@ -1500,7 +1502,7 @@ namespace mongo { DiskLoc child = childForPos(p); if ( !child.isNull() ) { - DiskLoc l = child.btree()->locate(idx, child, key, order, pos, found, recordLoc, direction); + DiskLoc l = child.btree<V>()->locate(idx, child, key, order, pos, found, recordLoc, direction); if ( !l.isNull() ) return l; } @@ -1517,7 +1519,7 @@ namespace mongo { while( 1 ) { if ( l + 1 == h ) { keyOfs = ( direction > 0 ) ? h : l; - DiskLoc next = thisLoc.btree()->k( h ).prevChildBucket; + DiskLoc next = thisLoc.btree<V>()->k( h ).prevChildBucket; if ( !next.isNull() ) { bestParent = make_pair( thisLoc, keyOfs ); thisLoc = next; @@ -1528,7 +1530,7 @@ namespace mongo { } } int m = l + ( h - l ) / 2; - int cmp = customBSONCmp( thisLoc.btree()->keyNode( m ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ); + int cmp = customBSONCmp( thisLoc.btree<V>()->keyNode( m ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ); if ( cmp < 0 ) { l = m; } @@ -1574,15 +1576,15 @@ namespace mongo { } else { // go up parents until rightmost/leftmost node is >=/<= target or at top - while( !thisLoc.btree()->parent.isNull() ) { - thisLoc = thisLoc.btree()->parent; + while( !thisLoc.btree<V>()->parent.isNull() ) { + thisLoc = thisLoc.btree<V>()->parent; if ( direction > 0 ) { - if ( customBSONCmp( thisLoc.btree()->keyNode( thisLoc.btree()->n - 1 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) { + if ( customBSONCmp( thisLoc.btree<V>()->keyNode( thisLoc.btree<V>()->n - 1 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) { break; } } else { - if ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) { + if ( customBSONCmp( thisLoc.btree<V>()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) { break; } } @@ -1593,30 +1595,30 @@ namespace mongo { template< class V > void BtreeBucket<V>::customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const { - if ( thisLoc.btree()->n == 0 ) { + if ( thisLoc.btree<V>()->n == 0 ) { thisLoc = DiskLoc(); return; } // go down until find smallest/biggest >=/<= target while( 1 ) { int l = 0; - int h = thisLoc.btree()->n - 1; + int h = thisLoc.btree<V>()->n - 1; // leftmost/rightmost key may possibly be >=/<= search key bool firstCheck; if ( direction > 0 ) { - firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); + firstCheck = ( customBSONCmp( thisLoc.btree<V>()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ); } else { - firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); + firstCheck = ( customBSONCmp( thisLoc.btree<V>()->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ); } if ( firstCheck ) { DiskLoc next; if ( direction > 0 ) { - next = thisLoc.btree()->k( 0 ).prevChildBucket; + next = thisLoc.btree<V>()->k( 0 ).prevChildBucket; keyOfs = 0; } else { - next = thisLoc.btree()->nextChild; + next = thisLoc.btree<V>()->nextChild; keyOfs = h; } if ( !next.isNull() ) { @@ -1630,18 +1632,18 @@ namespace mongo { } bool secondCheck; if ( direction > 0 ) { - secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) < 0 ); + secondCheck = ( customBSONCmp( thisLoc.btree<V>()->keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) < 0 ); } else { - secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) > 0 ); + secondCheck = ( customBSONCmp( thisLoc.btree<V>()->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) > 0 ); } if ( secondCheck ) { DiskLoc next; if ( direction > 0 ) { - next = thisLoc.btree()->nextChild; + next = thisLoc.btree<V>()->nextChild; } else { - next = thisLoc.btree()->k( 0 ).prevChildBucket; + next = thisLoc.btree<V>()->k( 0 ).prevChildBucket; } if ( next.isNull() ) { // if bestParent is null, we've hit the end and thisLoc gets set to DiskLoc() @@ -1715,7 +1717,7 @@ namespace mongo { return 0; } - return child.btree()->_insert(child, recordLoc, key, order, dupsAllowed, /*lchild*/DiskLoc(), /*rchild*/DiskLoc(), idx); + return child.btree<V>()->_insert(child, recordLoc, key, order, dupsAllowed, /*lchild*/DiskLoc(), /*rchild*/DiskLoc(), idx); } template< class V > @@ -1760,11 +1762,6 @@ namespace mongo { } template< class V > - int BtreeBucket<V>::getLowWaterMark() { - return lowWaterMark; - } - - template< class V > int BtreeBucket<V>::getKeyMax() { return KeyMax; } @@ -1780,7 +1777,7 @@ namespace mongo { if ( bucket.isNull() ) return bucket; - const BtreeBucket *b = bucket.btree(); + const BtreeBucket<V> *b = bucket.btree<V>(); while ( 1 ) { const _KeyNode& knraw = b->k(pos); if ( knraw.isUsed() ) @@ -1788,7 +1785,7 @@ namespace mongo { bucket = b->advance( bucket , pos , 1 , "findSingle" ); if ( bucket.isNull() ) return bucket; - b = bucket.btree(); + b = bucket.btree<V>(); } KeyNode kn = b->keyNode( pos ); if ( KeyOwned(key).woCompare( kn.key, o ) != 0 ) @@ -1805,7 +1802,7 @@ namespace mongo { template< class V > void BtreeBucket<V>::a_test(IndexDetails& id) { - BtreeBucket *b = id.head.btreemod(); + BtreeBucket *b = id.head.btreemod<V>(); // record locs for testing DiskLoc A(1, 20); diff --git a/db/btree.h b/db/btree.h index bdcb99a7c1a..2d664e1ba6c 100644 --- a/db/btree.h +++ b/db/btree.h @@ -65,11 +65,10 @@ namespace mongo { const int OldBucketSize = 8192; + // largest key size we allow. note we very much need to support bigger keys (somehow) in the future. + const int KeyMax = OldBucketSize / 10;\ + #pragma pack(1) - class BtreeData_V0; - class BtreeData_V1; - typedef BtreeData_V0 V0; - typedef BtreeData_V1 V1; template< class Version > class BucketBasics; /** @@ -156,6 +155,12 @@ namespace mongo { unsigned short _reserved1; int flags; + void _init() { + _reserved1 = 0; + _wasSize = BucketSize; + reserved = 0; + } + /** basicInsert() assumes the next three members are consecutive and in this order: */ /** Size of the empty region. */ @@ -171,6 +176,7 @@ namespace mongo { public: typedef KeyBson Key; + typedef KeyBson KeyOwned; enum { BucketSize = 8192 }; }; @@ -180,30 +186,32 @@ namespace mongo { DiskLoc parent; /** Given that there are n keys, this is the n index child. */ DiskLoc nextChild; - /** can be reused, value is 8192 in current pdfile version Apr2010 */ - unsigned short _wasSize; - /** zero */ - // unsigned short _reserved1; - int flags; + + unsigned short flags; /** basicInsert() assumes the next three members are consecutive and in this order: */ /** Size of the empty region. */ - int emptySize; + unsigned short emptySize; /** Size used for bson storage, including storage of old keys. */ - int topSize; + unsigned short topSize; /* Number of keys in the bucket. */ - int n; + unsigned short n; - int reserved; /* Beginning of the bucket's body */ char data[4]; + void _init() { } + public: typedef KeyV1 Key; + typedef KeyV1Owned KeyOwned; enum { BucketSize = 8192-16 }; // leave room for Record header }; + typedef BtreeData_V0 V0; + typedef BtreeData_V1 V1; + /** * This class adds functionality to BtreeData for managing a single bucket. * The following policies are used in an attempt to encourage simplicity: @@ -234,7 +242,7 @@ namespace mongo { template< class Version > class BucketBasics : public Version { public: - //friend class BtreeBuilder< Version >; + template <class U> friend class BtreeBuilder; typedef typename Version::Key Key; /** @@ -630,7 +638,6 @@ namespace mongo { static void a_test(IndexDetails&); - static int getLowWaterMark(); static int getKeyMax(); protected: diff --git a/db/btreebuilder.cpp b/db/btreebuilder.cpp index 514e21c7990..c941d1da0ab 100644 --- a/db/btreebuilder.cpp +++ b/db/btreebuilder.cpp @@ -33,6 +33,9 @@ namespace mongo { /* --- BtreeBuilder --- */ + template class BtreeBuilder<V0>; + template class BtreeBuilder<V1>; + template<class V> BtreeBuilder<V>::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) : dupsAllowed(_dupsAllowed), @@ -41,7 +44,7 @@ namespace mongo { order( idx.keyPattern() ), ordering( Ordering::make(idx.keyPattern()) ) { first = cur = BtreeBucket<V>::addBucket(idx); - b = cur.btreemod(); + b = cur.btreemod<V>(); committed = false; } @@ -50,19 +53,19 @@ namespace mongo { DiskLoc L = BtreeBucket<V>::addBucket(idx); b->tempNext() = L; cur = L; - b = cur.btreemod(); + b = cur.btreemod<V>(); } template<class V> void BtreeBuilder<V>::mayCommitProgressDurably() { if ( getDur().commitIfNeeded() ) { - b = cur.btreemod(); + b = cur.btreemod<V>(); } } template<class V> void BtreeBuilder<V>::addKey(BSONObj& _key, DiskLoc loc) { - KeyOwned key(_key); + V::KeyOwned key(_key); if ( key.dataSize() > KeyMax ) { problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() @@ -76,7 +79,7 @@ namespace mongo { massert( 10288 , "bad key order in BtreeBuilder - server internal error", cmp <= 0 ); if( cmp == 0 ) { //if( !dupsAllowed ) - uasserted( ASSERT_ID_DUPKEY , BtreeBucket::dupKeyError( idx , keyLast ) ); + uasserted( ASSERT_ID_DUPKEY , BtreeBucket<V>::dupKeyError( idx , keyLast ) ); } } keyLast = key; @@ -95,26 +98,26 @@ namespace mongo { void BtreeBuilder<V>::buildNextLevel(DiskLoc loc) { int levels = 1; while( 1 ) { - if( loc.btree()->tempNext().isNull() ) { + if( loc.btree<V>()->tempNext().isNull() ) { // only 1 bucket at this level. we are done. getDur().writingDiskLoc(idx.head) = loc; break; } levels++; - DiskLoc upLoc = BtreeBucket::addBucket(idx); + DiskLoc upLoc = BtreeBucket<V>::addBucket(idx); DiskLoc upStart = upLoc; - BtreeBucket *up = upLoc.btreemod(); + BtreeBucket<V> *up = upLoc.btreemod<V>(); DiskLoc xloc = loc; while( !xloc.isNull() ) { if ( getDur().commitIfNeeded() ) { - b = cur.btreemod(); - up = upLoc.btreemod(); + b = cur.btreemod<V>(); + up = upLoc.btreemod<V>(); } - BtreeBucket *x = xloc.btreemod(); - Key k; + BtreeBucket<V> *x = xloc.btreemod<V>(); + V::Key k; DiskLoc r; x->popBack(r,k); bool keepX = ( x->n != 0 ); @@ -122,10 +125,10 @@ namespace mongo { if ( ! up->_pushBack(r, k, ordering, keepLoc) ) { // current bucket full - DiskLoc n = BtreeBucket::addBucket(idx); + DiskLoc n = BtreeBucket<V>::addBucket(idx); up->tempNext() = n; upLoc = n; - up = upLoc.btreemod(); + up = upLoc.btreemod<V>(); up->pushBack(r, k, ordering, keepLoc); } @@ -135,7 +138,7 @@ namespace mongo { } else { if ( !x->nextChild.isNull() ) - x->nextChild.btreemod()->parent = upLoc; + x->nextChild.btreemod<V>()->parent = upLoc; x->deallocBucket( xloc, idx ); } xloc = nextLoc; @@ -163,7 +166,7 @@ namespace mongo { log(2) << "Rolling back partially built index space" << endl; DiskLoc x = first; while( !x.isNull() ) { - DiskLoc next = x.btree()->tempNext(); + DiskLoc next = x.btree<V>()->tempNext(); string ns = idx.indexNamespace(); theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), x.rec(), x); x = next; diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp index a5ae9325c75..f4e0875007d 100644 --- a/db/btreecursor.cpp +++ b/db/btreecursor.cpp @@ -26,18 +26,19 @@ namespace mongo { extern int otherTraceLevel; - class BtreeCursorV0 : public BtreeCursor { + template< class V > + class BtreeCursorImpl : public BtreeCursor { public: - typedef BucketBasics<V0>::KeyNode KeyNode; - typedef V0::Key Key; + typename typedef BucketBasics<V>::KeyNode KeyNode; + typename typedef V::Key Key; - BtreeCursorV0(NamespaceDetails *a, int b, const IndexDetails& c, const BSONObj &d, const BSONObj &e, bool f, int g) : + BtreeCursorImpl(NamespaceDetails *a, int b, const IndexDetails& c, const BSONObj &d, const BSONObj &e, bool f, int g) : BtreeCursor(a,b,c,d,e,f,g) { } - BtreeCursorV0(NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction) : + BtreeCursorImpl(NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction) : BtreeCursor(_d,_idxNo,_id,_bounds,_direction) { pair< DiskLoc, int > noBestParent; - indexDetails.head.btree<V0>()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent ); + indexDetails.head.btree<V>()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent ); skipAndCheck(); dassert( _dups.size() == 0 ); } @@ -49,39 +50,48 @@ namespace mongo { virtual BSONObj currKey() const { assert( !bucket.isNull() ); - return bucket.btree<V0>()->keyNode(keyOfs).key.toBson(); + return bucket.btree<V>()->keyNode(keyOfs).key.toBson(); } protected: virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) { - thisLoc.btree<V0>()->advanceTo(thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction); + thisLoc.btree<V>()->advanceTo(thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction); } virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) { - return thisLoc.btree<V0>()->advance(thisLoc, keyOfs, direction, caller); + return thisLoc.btree<V>()->advance(thisLoc, keyOfs, direction, caller); } virtual void _audit() { if ( otherTraceLevel >= 200 ) { out() << "BtreeCursor() qtl>200. validating entire index." << endl; - indexDetails.head.btree<V0>()->fullValidate(indexDetails.head, _order); + indexDetails.head.btree<V>()->fullValidate(indexDetails.head, _order); } else { out() << "BtreeCursor(). dumping head bucket" << endl; - indexDetails.head.btree<V0>()->dump(); + indexDetails.head.btree<V>()->dump(); } } - virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc); + virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc) { + bool found; + return indexDetails.head.btree<V0>()-> + locate(indexDetails, indexDetails.head, key, _ordering, keyOfs, found, loc, _direction); + } + const _KeyNode& keyNode(int keyOfs) { - return bucket.btree<V0>()->k(keyOfs); + return bucket.btree<V>()->k(keyOfs); } private: const KeyNode currKeyNode() const { assert( !bucket.isNull() ); - const BtreeBucket<V0> *b = bucket.btree<V0>(); + const BtreeBucket<V> *b = bucket.btree<V>(); return b->keyNode(keyOfs); } }; + template class BtreeCursorImpl<V0>; + template class BtreeCursorImpl<V1>; + + /* class BtreeCursorV1 : public BtreeCursor { public: typedef BucketBasics<V1>::KeyNode KeyNode; @@ -136,17 +146,17 @@ namespace mongo { const BtreeBucket<V1> *b = bucket.btree<V1>(); return b->keyNode(keyOfs); } - }; + };*/ BtreeCursor* BtreeCursor::make( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction) { - double v = _id.version(); + int v = _id.version(); + if( v == 1 ) + return new BtreeCursorImpl<V1>(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); if( v == 0 ) - return new BtreeCursorV0(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); - if( v == 0.5 ) - return new BtreeCursorV1(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); + return new BtreeCursorImpl<V0>(_d,_idxNo,_id,startKey,endKey,endKeyInclusive,direction); uasserted(14800, str::stream() << "unsupported index version " << v); return 0; } @@ -155,11 +165,11 @@ namespace mongo { NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ) { - double v = _id.version(); + int v = _id.version(); + if( v == 1 ) + return new BtreeCursorImpl<V1>(_d,_idxNo,_id,_bounds,_direction); if( v == 0 ) - return new BtreeCursorV0(_d,_idxNo,_id,_bounds,_direction); - if( v == 0.5 ) - return new BtreeCursorV1(_d,_idxNo,_id,_bounds,_direction); + return new BtreeCursorImpl<V0>(_d,_idxNo,_id,_bounds,_direction); uasserted(14801, str::stream() << "unsupported index version " << v); return 0; } @@ -213,17 +223,6 @@ namespace mongo { } } - DiskLoc BtreeCursorV0::_locate(const BSONObj& key, const DiskLoc& loc) { - bool found; - return indexDetails.head.btree<V0>()-> - locate(indexDetails, indexDetails.head, key, _ordering, keyOfs, found, loc, _direction); - } - DiskLoc BtreeCursorV1::_locate(const BSONObj& key, const DiskLoc& loc) { - bool found; - return indexDetails.head.btree<V1>()-> - locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, loc, _direction); - } - void BtreeCursor::init() { if ( _spec.getType() ) { startKey = _spec.getType()->fixKey( startKey ); diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index d07c86090ca..30eb068bcca 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -15,6 +15,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/* SHARDING: + I believe this file is for mongod only. + See s/commnands_public.cpp for mongos. +*/ + #include "pch.h" #include "query.h" #include "pdfile.h" @@ -1180,7 +1185,8 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { - help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024"; + help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n" + " avgObjSize - in bytes"; } bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + jsobj.firstElement().valuestr(); diff --git a/db/index.cpp b/db/index.cpp index a63796f2aef..17ff4c994aa 100644 --- a/db/index.cpp +++ b/db/index.cpp @@ -26,6 +26,8 @@ namespace mongo { + const int DefaultindexVersionNumber = 0; + template< class V > class IndexInterfaceImpl : public IndexInterface { public: @@ -46,17 +48,18 @@ namespace mongo { virtual DiskLoc addBucket(const IndexDetails& id) { return BtreeBucket<V>::addBucket(id); } + virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, DiskLoc self, const Ordering& ordering) { + const BtreeBucket<V> *h = head.btree<V>(); + for( vector<BSONObj*>::iterator i = addedKeys.begin(); i != addedKeys.end(); i++ ) { + bool dup = h->wouldCreateDup(idx, head, V::KeyOwned(**i), ordering, self); + uassert( 11001 , "E11001 duplicate key on update", !dup); + } + } }; IndexInterfaceImpl<V0> iii_v0; IndexInterfaceImpl<V1> iii_v1; - - IndexInterface& IndexDetails::idxInterface() { - double v = version(); - if( v == 0 ) - return iii_v0; - return iii_v1; - } + IndexInterface *IndexDetails::iis[] = { &iii_v0, &iii_v1 }; int removeFromSysIndexes(const char *ns, const char *idxName) { string system_indexes = cc().database()->name + ".system.indexes"; @@ -295,15 +298,23 @@ namespace mongo { if ( plugin ) { fixedIndexObject = plugin->adjustIndexSpec( io ); } - else if ( io["v"].eoo() ) { + + if ( io["v"].eoo() ) { // add "v" if it doesn't exist // if it does - leave whatever value was there // this is for testing and replication - BSONObjBuilder b( io.objsize() + 32 ); + BSONObjBuilder b; b.appendElements( io ); - b.append( "v" , 0 ); + b.append( "v" , DefaultindexVersionNumber ); fixedIndexObject = b.obj(); } + else { + int v = io["v"].Int(); + // note (one day) we may be able to fresh build less versions than we can use + // isASupportedIndexVersionNumber() is what we can use + uassert(10000, str::stream() << "this version of mongod cannot build new indexes of version number " << v, + v == 0 || v == 1); + } return true; } diff --git a/db/index.h b/db/index.h index 03fdaab78ae..398d7a63665 100644 --- a/db/index.h +++ b/db/index.h @@ -34,6 +34,7 @@ namespace mongo { const BSONObj& key, const Ordering &order, bool dupsAllowed, IndexDetails& idx, bool toplevel = true) const = 0; virtual DiskLoc addBucket(const IndexDetails&) = 0; + virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, DiskLoc self, const Ordering& ordering) = 0; }; /* Details about a particular index. There is one of these effectively for each object in @@ -64,8 +65,6 @@ namespace mongo { */ DiskLoc info; - IndexInterface& idxInterface(); - /* extract key value from the query object e.g., if key() == { x : 1 }, { x : 70, y : 3 } -> { x : 70 } @@ -99,7 +98,6 @@ namespace mongo { /* true if the specified key is in the index */ bool hasKey(const BSONObj& key); - bool wouldCreateDup(const BSONObj& key, DiskLoc self); // returns name of this index's storage area // database.table.$index @@ -139,8 +137,12 @@ namespace mongo { return io.getStringField("ns"); } - double version() const { - return info.obj()["v"].number(); + int version() const { + BSONElement e = info.obj()["v"]; + if( e.type() == NumberInt ) + return e._numberInt(); + uassert(10000, "index v field should be Integer type", e.eoo()); + return 0; } bool unique() const { @@ -165,6 +167,21 @@ namespace mongo { string toString() const { return info.obj().toString(); } + + /** @return true if supported. supported means we can use the index, including adding new keys. + it may not mean we can build the index version in question: we may not maintain building + of indexes in old formats in the future. + */ + static bool isASupportedIndexVersionNumber(int v) { return v == 0 || v == 1; } + + IndexInterface& idxInterface() { + int v = version(); + dassert( isASupportedIndexVersionNumber(v) ); + return *iis[v&1]; + } + + private: + static IndexInterface *iis[]; }; struct IndexChanges { /*on an update*/ @@ -179,10 +196,8 @@ namespace mongo { void dupCheck(IndexDetails& idx, DiskLoc curObjLoc) { if( added.empty() || !idx.unique() ) return; - for( vector<BSONObj*>::iterator i = added.begin(); i != added.end(); i++ ) { - bool dup = idx.wouldCreateDup(**i, curObjLoc); - uassert( 11001 , "E11001 duplicate key on update", !dup); - } + const Ordering ordering = Ordering::make(idx.keyPattern()); + idx.idxInterface().uassertIfDups(idx, added, idx.head, curObjLoc, ordering); // "E11001 duplicate key on update" } }; diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 09db1984060..539107e15b5 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -1077,7 +1077,10 @@ namespace mongo { IndexDetails& idx = d->idx(idxNo); BSONObjSetDefaultOrder keys; idx.getKeysFromObject(obj, keys); + if( keys.empty() ) + return; BSONObj order = idx.keyPattern(); + IndexInterface& ii = idx.idxInterface(); Ordering ordering = Ordering::make(order); int n = 0; for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { @@ -1086,8 +1089,7 @@ namespace mongo { } assert( !recordLoc.isNull() ); try { - idx.head.btree()->bt_insert(idx.head, recordLoc, - *i, ordering, dupsAllowed, idx); + ii.bt_insert(idx.head, recordLoc, *i, ordering, dupsAllowed, idx); } catch (AssertionException& e) { if( e.getCode() == 10287 && idxNo == d->nIndexes ) { @@ -1129,6 +1131,52 @@ namespace mongo { SortPhaseOne *precalced = 0; + template< class V > + void buildBottomUpPhases2And3(bool dupsAllowed, IndexDetails& idx, BSONObjExternalSorter& sorter, + bool dropDups, list<DiskLoc> &dupsToDrop, CurOp * op, SortPhaseOne *phase1, ProgressMeterHolder &pm, + Timer& t + ) + { + BtreeBuilder<V> btBuilder(dupsAllowed, idx); + BSONObj keyLast; + auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator(); + assert( pm == op->setMessage( "index: (2/3) btree bottom up" , phase1->nkeys , 10 ) ); + while( i->more() ) { + RARELY killCurrentOp.checkForInterrupt(); + BSONObjExternalSorter::Data d = i->next(); + + try { + btBuilder.addKey(d.first, d.second); + } + catch( AssertionException& e ) { + if ( dupsAllowed ) { + // unknow exception?? + throw; + } + + if( e.interrupted() ) + throw; + + if ( ! dropDups ) + throw; + + /* we could queue these on disk, but normally there are very few dups, so instead we + keep in ram and have a limit. + */ + dupsToDrop.push_back(d.second); + uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); + } + pm.hit(); + } + pm.finished(); + op->setMessage( "index: (3/3) btree-middle" ); + log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl; + btBuilder.commit(); + if ( btBuilder.getn() != phase1->nkeys && ! dropDups ) { + warning() << "not all entries were added to the index, probably some keys were too large" << endl; + } + } + // throws DBException unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { CurOp * op = cc().curop(); @@ -1183,46 +1231,12 @@ namespace mongo { list<DiskLoc> dupsToDrop; /* build index --- */ - { - BtreeBuilder btBuilder(dupsAllowed, idx); - BSONObj keyLast; - auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator(); - assert( pm == op->setMessage( "index: (2/3) btree bottom up" , phase1->nkeys , 10 ) ); - while( i->more() ) { - RARELY killCurrentOp.checkForInterrupt(); - BSONObjExternalSorter::Data d = i->next(); - - try { - btBuilder.addKey(d.first, d.second); - } - catch( AssertionException& e ) { - if ( dupsAllowed ) { - // unknow exception?? - throw; - } - - if( e.interrupted() ) - throw; - - if ( ! dropDups ) - throw; - - /* we could queue these on disk, but normally there are very few dups, so instead we - keep in ram and have a limit. - */ - dupsToDrop.push_back(d.second); - uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); - } - pm.hit(); - } - pm.finished(); - op->setMessage( "index: (3/3) btree-middle" ); - log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl; - btBuilder.commit(); - if ( btBuilder.getn() != phase1->nkeys && ! dropDups ) { - warning() << "not all entries were added to the index, probably some keys were too large" << endl; - } - } + if( idx.version() == 0 ) + buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); + else if( idx.version() == 1 ) + buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); + else + assert(false); log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; @@ -1598,8 +1612,11 @@ namespace mongo { if ( addIndex ) { assert( obuf ); BSONObj io((const char *) obuf); - if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex, fixedIndexObject ) ) + if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex, fixedIndexObject ) ) { + // prepare creates _id itself, or this indicates to fail the build silently (such + // as if index already exists) return DiskLoc(); + } if ( ! fixedIndexObject.isEmpty() ) { obuf = fixedIndexObject.objdata(); diff --git a/db/query.cpp b/db/query.cpp index f6b0a4838cd..b3b184b9917 100644 --- a/db/query.cpp +++ b/db/query.cpp @@ -468,8 +468,7 @@ namespace mongo { _nscanned = _c->nscanned(); if ( _bc ) { if ( _firstMatch.isEmpty() ) { - _firstMatch = _bc->currKeyNode().key.toBson().getOwned(); - _firstMatchKey = Key(_firstMatch.objdata()); + _firstMatch = _bc->currKey().getOwned(); // if not match if ( _query.woCompare( _firstMatch, BSONObj(), false ) ) { setComplete(); @@ -478,7 +477,7 @@ namespace mongo { _gotOne(); } else { - if ( ! _firstMatchKey.woEqual( _bc->currKeyNode().key ) ) { + if ( ! _firstMatch.woEqual( _bc->currKey() ) ) { setComplete(); return; } @@ -534,7 +533,6 @@ namespace mongo { BSONObj _query; BtreeCursor * _bc; BSONObj _firstMatch; - Key _firstMatchKey; ClientCursor::CleanupPointer _cc; ClientCursor::YieldData _yieldData; diff --git a/db/update.cpp b/db/update.cpp index 85c3f9c3b7a..f1b7e368e90 100644 --- a/db/update.cpp +++ b/db/update.cpp @@ -966,8 +966,8 @@ namespace mongo { DiskLoc loc; { IndexDetails& i = d->idx(idIdxNo); - BSONObj key = i.getKeyFromQuery( patternOrig ); - loc = i.head.btree()->findSingle(i, i.head, key); + BSONObj key = i.getKeyFromQuery( patternOrig ); + loc = i.idxInterface().findSingle(i, i.head, key); if( loc.isNull() ) { // no upsert support in _updateById yet, so we are done. return UpdateResult(0, 0, 0); diff --git a/util/log.h b/util/log.h index de8969315f4..dbb32e5702d 100644 --- a/util/log.h +++ b/util/log.h @@ -137,6 +137,9 @@ namespace mongo { virtual Nullstream& operator<<(unsigned) { return *this; } + virtual Nullstream& operator<<(unsigned short) { + return *this; + } virtual Nullstream& operator<<(double) { return *this; } @@ -242,6 +245,7 @@ namespace mongo { Logstream& operator<<(long x) { ss << x; return *this; } Logstream& operator<<(unsigned long x) { ss << x; return *this; } Logstream& operator<<(unsigned x) { ss << x; return *this; } + Logstream& operator<<(unsigned short x){ ss << x; return *this; } Logstream& operator<<(double x) { ss << x; return *this; } Logstream& operator<<(void *x) { ss << x; return *this; } Logstream& operator<<(const void *x) { ss << x; return *this; } |