summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2014-04-25 16:09:51 -0400
committerMathias Stearn <mathias@10gen.com>2014-04-25 16:23:19 -0400
commite05aaf726c17e9deef9f025f0ffda3cee119bca1 (patch)
tree4697cd551c6e981cb4e3e28a978bfc5c1e019d83
parent5fba87f7968359e9da16137a97439d9f458ba62a (diff)
downloadmongo-e05aaf726c17e9deef9f025f0ffda3cee119bca1.tar.gz
SERVER-13629 Remove old btree code and DiskLoc::btree/btreemod
-rw-r--r--docs/errors.md16
-rw-r--r--src/mongo/SConscript1
-rw-r--r--src/mongo/db/diskloc.h7
-rw-r--r--src/mongo/db/index/btree_based_access_method.cpp6
-rw-r--r--src/mongo/db/index/btree_index_cursor.h2
-rw-r--r--src/mongo/db/mongod.vcxproj2
-rw-r--r--src/mongo/db/mongod.vcxproj.filters6
-rw-r--r--src/mongo/db/structure/btree/btree.cpp1993
-rw-r--r--src/mongo/db/structure/btree/btree.h1094
-rw-r--r--src/mongo/dbtests/indexupdatetests.cpp1
-rw-r--r--src/mongo/dbtests/test.vcxproj2
-rwxr-xr-xsrc/mongo/dbtests/test.vcxproj.filters6
12 files changed, 4 insertions, 3132 deletions
diff --git a/docs/errors.md b/docs/errors.md
index 3f107a218f2..bd75626f5d4 100644
--- a/docs/errors.md
+++ b/docs/errors.md
@@ -189,22 +189,6 @@ src/mongo/client/syncclusterconnection.cpp
* 8020 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/client/syncclusterconnection.cpp#L283) SyncClusterConnection::remove prepare failed:
-src/mongo/db/btree.cpp
-----
-* 10281 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L143) assert is misdefined
-* 10282 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L324) n==0 in btree popBack()
-* 10283 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L331) rchild not null in btree popBack()
-* 10285 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L1771) _insert: reuse key but lchild is not null
-* 10286 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L1772) _insert: reuse key but rchild is not null
-* 10287 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L84) btree: key+recloc already in index
-* 15898 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.cpp#L42) error in index possibly corruption consider repairing
-
-
-src/mongo/db/btree.h
-----
-* 13000 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btree.h#L361) invalid keyNode: " + BSON( "i" << i << "n
-
-
src/mongo/db/btreebuilder.cpp
----
* 10288 [code](http://github.com/mongodb/mongo/blob/master/src/mongo/db/btreebuilder.cpp#L76) bad key order in BtreeBuilder - server internal error
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index 375de4bebde..b61c29a7e61 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -553,7 +553,6 @@ serverOnlyFiles = [ "db/curop.cpp",
"db/storage/mmap_v1/dur_journal.cpp",
"db/storage/mmap_v1/dur_transaction.cpp",
"db/introspect.cpp",
- "db/structure/btree/btree.cpp",
"db/structure/btree/btree_interface.cpp",
"db/structure/btree/btree_logic.cpp",
"db/structure/btree/btree_stats.cpp",
diff --git a/src/mongo/db/diskloc.h b/src/mongo/db/diskloc.h
index cebb2cc78d9..7ff3786a2e0 100644
--- a/src/mongo/db/diskloc.h
+++ b/src/mongo/db/diskloc.h
@@ -169,13 +169,6 @@ namespace mongo {
*/
BSONObj obj() const; // TODO(ERH): remove
- template< class V >
- const BtreeBucket<V> * btree() const; // TODO(ERH): remove
-
- // Explicitly signals we are writing and casts away const
- template< class V >
- BtreeBucket<V> * btreemod() const; // TODO(ERH): remove
-
/// members for Sorter
struct SorterDeserializeSettings {}; // unused
void serializeForSorter(BufBuilder& buf) const { buf.appendStruct(*this); }
diff --git a/src/mongo/db/index/btree_based_access_method.cpp b/src/mongo/db/index/btree_based_access_method.cpp
index ebd933cb0ac..092e5f8d17f 100644
--- a/src/mongo/db/index/btree_based_access_method.cpp
+++ b/src/mongo/db/index/btree_based_access_method.cpp
@@ -43,11 +43,14 @@
#include "mongo/db/pdfile_private.h"
#include "mongo/db/repl/is_master.h"
#include "mongo/db/repl/rs.h"
+#include "mongo/db/server_parameters.h"
#include "mongo/db/structure/btree/btree_interface.h"
#include "mongo/util/progress_meter.h"
namespace mongo {
+ MONGO_EXPORT_SERVER_PARAMETER(failIndexKeyTooLong, bool, true);
+
BtreeBasedAccessMethod::BtreeBasedAccessMethod(IndexCatalogEntry* btreeState)
: _btreeState(btreeState), _descriptor(btreeState->descriptor()) {
@@ -58,9 +61,6 @@ namespace mongo {
_descriptor->version()));
}
- // This is currently in btree.cpp. Once that file is retired it'll move here.
- extern bool failIndexKeyTooLong;
-
// Find the keys for obj, put them in the tree pointing to loc
Status BtreeBasedAccessMethod::insert(const BSONObj& obj,
const DiskLoc& loc,
diff --git a/src/mongo/db/index/btree_index_cursor.h b/src/mongo/db/index/btree_index_cursor.h
index 2f9ab2ae494..e77116a05e3 100644
--- a/src/mongo/db/index/btree_index_cursor.h
+++ b/src/mongo/db/index/btree_index_cursor.h
@@ -46,7 +46,7 @@ namespace mongo {
bool isEOF() const;
/**
- * Called from btree.cpp when we're about to delete a Btree bucket.
+ * Called from btree_logic.cpp when we're about to delete a Btree bucket.
*/
static void aboutToDeleteBucket(const DiskLoc& bucket);
diff --git a/src/mongo/db/mongod.vcxproj b/src/mongo/db/mongod.vcxproj
index 4a642721e99..516ea3485f3 100644
--- a/src/mongo/db/mongod.vcxproj
+++ b/src/mongo/db/mongod.vcxproj
@@ -4587,7 +4587,6 @@ cscript //Nologo "$(ProjectDir)..\shell\createCPPfromJavaScriptFiles.js" "$(Proj
<ClCompile Include="stats\counters.cpp" />
<ClCompile Include="stats\snapshots.cpp" />
<ClCompile Include="stats\top.cpp" />
- <ClCompile Include="btree.cpp" />
<ClCompile Include="btreecursor.cpp" />
<ClCompile Include="repl\health.cpp" />
<ClCompile Include="repl\rs.cpp" />
@@ -4890,7 +4889,6 @@ cscript //Nologo "$(ProjectDir)..\shell\createCPPfromJavaScriptFiles.js" "$(Proj
<ClInclude Include="..\scripting\engine_v8.h" />
<ClInclude Include="..\scripting\v8_db.h" />
<ClInclude Include="..\scripting\v8_utils.h" />
- <ClInclude Include="btree.h" />
<ClInclude Include="repl\health.h" />
<ClInclude Include="repl\rs.h" />
<ClInclude Include="repl\rs_config.h" />
diff --git a/src/mongo/db/mongod.vcxproj.filters b/src/mongo/db/mongod.vcxproj.filters
index e7d7c998e19..c1445e3635e 100644
--- a/src/mongo/db/mongod.vcxproj.filters
+++ b/src/mongo/db/mongod.vcxproj.filters
@@ -485,9 +485,6 @@
<ClCompile Include="..\util\util.cpp">
<Filter>util\Source Files</Filter>
</ClCompile>
- <ClCompile Include="btree.cpp">
- <Filter>db\Source Files\a to d</Filter>
- </ClCompile>
<ClCompile Include="durop.cpp">
<Filter>db\Source Files\a to d</Filter>
</ClCompile>
@@ -2786,9 +2783,6 @@
<ClInclude Include="durop.h">
<Filter>db\Header Files\a to d</Filter>
</ClInclude>
- <ClInclude Include="btree.h">
- <Filter>db\Header Files\a to d</Filter>
- </ClInclude>
<ClInclude Include="client.h">
<Filter>db\Header Files\a to d</Filter>
</ClInclude>
diff --git a/src/mongo/db/structure/btree/btree.cpp b/src/mongo/db/structure/btree/btree.cpp
deleted file mode 100644
index 4537f73d8d9..00000000000
--- a/src/mongo/db/structure/btree/btree.cpp
+++ /dev/null
@@ -1,1993 +0,0 @@
-// btree.cpp
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/pch.h"
-
-#include "mongo/db/structure/btree/btree.h"
-
-#include "mongo/db/structure/btree/btree_stats.h"
-#include "mongo/db/client.h"
-#include "mongo/db/clientcursor.h"
-#include "mongo/db/curop-inl.h"
-#include "mongo/db/db.h"
-#include "mongo/db/dbhelpers.h"
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
-#include "mongo/db/index/btree_index_cursor.h" // for aboutToDeleteBucket
-#include "mongo/db/json.h"
-#include "mongo/db/kill_current_op.h"
-#include "mongo/db/pdfile.h"
-#include "mongo/db/repl/is_master.h"
-#include "mongo/db/server_parameters.h"
-#include "mongo/db/stats/counters.h"
-#include "mongo/server.h"
-#include "mongo/util/startup_test.h"
-
-namespace mongo {
-
- BOOST_STATIC_ASSERT( Record::HeaderSize == 16 );
- BOOST_STATIC_ASSERT( Record::HeaderSize + BtreeData_V1::BucketSize == 8192 );
-
- MONGO_EXPORT_SERVER_PARAMETER(failIndexKeyTooLong, bool, true);
-
-
- void keyTooLongAssert( int code, const string& msg ) {
-
- if ( !isMaster( NULL ) ) {
- // on secondaries, we do old behavior
- return;
- }
-
- if ( !failIndexKeyTooLong ) {
- // user doesn't want this
- return;
- }
-
- uasserted( code, msg );
- }
-
- NOINLINE_DECL void checkFailed(unsigned line) {
- static time_t last;
- if( time(0) - last >= 10 ) {
- msgasserted(15898, str::stream() << "error in index; possibly corruption. "
- "See http://dochub.mongodb.org/core/data-recovery" << line);
- }
- }
-
- /** data check. like assert, but gives a reasonable error message to the user. */
-#define check(expr) if(!(expr) ) { checkFailed(__LINE__); }
-
-#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this );
-
- template< class Loc >
- __KeyNode<Loc> & __KeyNode<Loc>::writing() const {
- return *getDur().writing( const_cast< __KeyNode<Loc> * >( this ) );
- }
-
- // BucketBasics::lowWaterMark()
- //
- // We define this value as the maximum number of bytes such that, if we have
- // fewer than this many bytes, we must be able to either merge with or receive
- // keys from any neighboring node. If our utilization goes below this value we
- // know we can bring up the utilization with a simple operation. Ignoring the
- // 90/10 split policy which is sometimes employed and our 'unused' nodes, this
- // is a lower bound on bucket utilization for non root buckets.
- //
- // Note that the exact value here depends on the implementation of
- // rebalancedSeparatorPos(). The conditions for lowWaterMark - 1 are as
- // follows: We know we cannot merge with the neighbor, so the total data size
- // for us, the neighbor, and the separator must be at least
- // BtreeBucket<V>::bodySize() + 1. We must be able to accept one key of any
- // allowed size, so our size plus storage for that additional key must be
- // <= BtreeBucket<V>::bodySize() / 2. This way, with the extra key we'll have a
- // new bucket data size < half the total data size and by the implementation
- // of rebalancedSeparatorPos() the key must be added.
-
- static const int split_debug = 0;
- static const int insert_debug = 0;
-
- /**
- * this error is ok/benign when doing a background indexing -- that logic in pdfile checks explicitly
- * for the 10287 error code.
- */
- static void alreadyInIndex() {
- // we don't use massert() here as that does logging and this is 'benign' - see catches in _indexRecord()
- throw MsgAssertionException(10287, "btree: key+recloc already in index");
- }
-
- /* BucketBasics --------------------------------------------------- */
-
- template< class V >
- void BucketBasics<V>::assertWritable() {
- if (storageGlobalParams.dur)
- dur::assertAlreadyDeclared(this, V::BucketSize);
- }
-
- template< class V >
- string BtreeBucket<V>::bucketSummary() const {
- stringstream ss;
- ss << " Bucket info:" << endl;
- ss << " n: " << this->n << endl;
- ss << " parent: " << this->parent.toString() << endl;
- ss << " nextChild: " << this->nextChild.toString() << endl;
- ss << " flags:" << this->flags << endl;
- ss << " emptySize: " << this->emptySize << " topSize: " << this->topSize << endl;
- return ss.str();
- }
-
- template< class V >
- int BucketBasics<V>::Size() const {
- return V::BucketSize;
- }
-
- template< class V >
- void BucketBasics<V>::_shape(int level, stringstream& ss) const {
- for ( int i = 0; i < level; i++ ) ss << ' ';
- ss << "*[" << this->n << "]\n";
- for ( int i = 0; i < this->n; i++ ) {
- if ( !k(i).prevChildBucket.isNull() ) {
- DiskLoc ll = k(i).prevChildBucket;
- ll.btree<V>()->_shape(level+1,ss);
- }
- }
- if ( !this->nextChild.isNull() ) {
- DiskLoc ll = this->nextChild;
- ll.btree<V>()->_shape(level+1,ss);
- }
- }
-
- int bt_fv=0;
- int bt_dmp=0;
-
- template< class V >
- void BtreeBucket<V>::dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const {
- bt_dmp=1;
- fullValidate(thisLoc, order);
- bt_dmp=0;
- }
-
- template< class V >
- long long BtreeBucket<V>::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount, bool strict, unsigned depth) const {
- killCurrentOp.checkForInterrupt();
- this->assertValid(order, true);
-
- if ( bt_dmp ) {
- log() << thisLoc.toString() << ' ';
- ((BtreeBucket *) this)->dump(depth);
- }
-
- // keycount
- long long kc = 0;
-
- for ( int i = 0; i < this->n; i++ ) {
- const _KeyNode& kn = this->k(i);
-
- if ( kn.isUsed() ) {
- kc++;
- }
- else {
- if ( unusedCount ) {
- ++( *unusedCount );
- }
- }
- if ( !kn.prevChildBucket.isNull() ) {
- DiskLoc left = kn.prevChildBucket;
- const BtreeBucket *b = left.btree<V>();
- if ( strict ) {
- verify( b->parent == thisLoc );
- }
- else {
- wassert( b->parent == thisLoc );
- }
- kc += b->fullValidate(kn.prevChildBucket, order, unusedCount, strict, depth+1);
- }
- }
- if ( !this->nextChild.isNull() ) {
- DiskLoc ll = this->nextChild;
- const BtreeBucket *b = ll.btree<V>();
- if ( strict ) {
- verify( b->parent == thisLoc );
- }
- else {
- wassert( b->parent == thisLoc );
- }
- kc += b->fullValidate(this->nextChild, order, unusedCount, strict, depth+1);
- }
-
- return kc;
- }
-
- int nDumped = 0;
-
- template< class V >
- void BucketBasics<V>::assertValid(const Ordering &order, bool force) const {
- if ( !debug && !force )
- return;
- {
- int foo = this->n;
- wassert( foo >= 0 && this->n < Size() );
- foo = this->emptySize;
- wassert( foo >= 0 && this->emptySize < V::BucketSize );
- wassert( this->topSize >= this->n && this->topSize <= V::BucketSize );
- }
-
- // this is very slow so don't do often
- {
- static int _k;
- if( ++_k % 128 )
- return;
- }
-
- DEV {
- // slow:
- for ( int i = 0; i < this->n-1; i++ ) {
- Key k1 = keyNode(i).key;
- Key k2 = keyNode(i+1).key;
- int z = k1.woCompare(k2, order); //OK
- if ( z > 0 ) {
- out() << "ERROR: btree key order corrupt. Keys:" << endl;
- if ( ++nDumped < 5 ) {
- for ( int j = 0; j < this->n; j++ ) {
- out() << " " << keyNode(j).key.toString() << endl;
- }
- ((BtreeBucket<V> *) this)->dump();
- }
- wassert(false);
- break;
- }
- else if ( z == 0 ) {
- if ( !(k(i).recordLoc < k(i+1).recordLoc) ) {
- out() << "ERROR: btree key order corrupt (recordloc's wrong):" << endl;
- out() << " k(" << i << ")" << keyNode(i).key.toString() << " RL:" << k(i).recordLoc.toString() << endl;
- out() << " k(" << i+1 << ")" << keyNode(i+1).key.toString() << " RL:" << k(i+1).recordLoc.toString() << endl;
- wassert( k(i).recordLoc < k(i+1).recordLoc );
- }
- }
- }
- }
- else {
- //faster:
- if ( this->n > 1 ) {
- Key k1 = keyNode(0).key;
- Key k2 = keyNode(this->n-1).key;
- int z = k1.woCompare(k2, order);
- //wassert( z <= 0 );
- if ( z > 0 ) {
- problem() << "btree keys out of order" << '\n';
- ONCE {
- ((BtreeBucket<V> *) this)->dump();
- }
- verify(false);
- }
- }
- }
- }
-
- template< class V >
- inline void BucketBasics<V>::markUnused(int keypos) {
- verify( keypos >= 0 && keypos < this->n );
- k(keypos).setUnused();
- }
-
- template< class V >
- inline int BucketBasics<V>::totalDataSize() const {
- return (int) (Size() - (this->data-(char*)this));
- }
-
- template< class V >
- void BucketBasics<V>::init() {
- this->_init();
- this->parent.Null();
- this->nextChild.Null();
- this->flags = Packed;
- this->n = 0;
- this->emptySize = totalDataSize();
- this->topSize = 0;
- }
-
- /** see _alloc */
- template< class V >
- inline void BucketBasics<V>::_unalloc(int bytes) {
- this->topSize -= bytes;
- this->emptySize += bytes;
- }
-
- /**
- * we allocate space from the end of the buffer for data.
- * the keynodes grow from the front.
- */
- template< class V >
- inline int BucketBasics<V>::_alloc(int bytes) {
- verify( this->emptySize >= bytes );
- this->topSize += bytes;
- this->emptySize -= bytes;
- int ofs = totalDataSize() - this->topSize;
- verify( ofs > 0 );
- return ofs;
- }
-
- template< class V >
- void BucketBasics<V>::_delKeyAtPos(int keypos, bool mayEmpty) {
- // TODO This should be keypos < n
- verify( keypos >= 0 && keypos <= this->n );
- verify( childForPos(keypos).isNull() );
- // TODO audit cases where nextChild is null
- verify( ( mayEmpty && this->n > 0 ) || this->n > 1 || this->nextChild.isNull() );
- this->emptySize += sizeof(_KeyNode);
- this->n--;
- for ( int j = keypos; j < this->n; j++ )
- k(j) = k(j+1);
- setNotPacked();
- }
-
- /**
- * pull rightmost key from the bucket. this version requires its right child to be null so it
- * does not bother returning that value.
- */
- template< class V >
- void BucketBasics<V>::popBack(DiskLoc& recLoc, Key &key) {
- massert( 10282 , "n==0 in btree popBack()", this->n > 0 );
- verify( k(this->n-1).isUsed() ); // no unused skipping in this function at this point - btreebuilder doesn't require that
- KeyNode kn = keyNode(this->n-1);
- recLoc = kn.recordLoc;
- key.assign(kn.key);
- int keysize = kn.key.dataSize();
-
- massert( 10283 , "rchild not null in btree popBack()", this->nextChild.isNull());
-
- // weirdly, we also put the rightmost down pointer in nextchild, even when bucket isn't full.
- this->nextChild = kn.prevChildBucket;
-
- this->n--;
- // This is risky because the key we are returning points to this unalloc'ed memory,
- // and we are assuming that the last key points to the last allocated
- // bson region.
- this->emptySize += sizeof(_KeyNode);
- _unalloc(keysize);
- }
-
- /** add a key. must be > all existing. be careful to set next ptr right. */
- template< class V >
- bool BucketBasics<V>::_pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild) {
- int bytesNeeded = key.dataSize() + sizeof(_KeyNode);
- if ( bytesNeeded > this->emptySize )
- return false;
- verify( bytesNeeded <= this->emptySize );
- if( this->n ) {
- const KeyNode klast = keyNode(this->n-1);
- if( klast.key.woCompare(key, order) > 0 ) {
- log() << "btree bucket corrupt? consider reindexing or running validate command" << endl;
- log() << " klast: " << keyNode(this->n-1).key.toString() << endl;
- log() << " key: " << key.toString() << endl;
- DEV klast.key.woCompare(key, order);
- verify(false);
- }
- }
- this->emptySize -= sizeof(_KeyNode);
- _KeyNode& kn = k(this->n++);
- kn.prevChildBucket = prevChild;
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs( (short) _alloc(key.dataSize()) );
- short ofs = kn.keyDataOfs();
- char *p = dataAt(ofs);
- memcpy(p, key.data(), key.dataSize());
-
- return true;
- }
-
- /* durability note
- we do separate intent declarations herein. arguably one could just declare
- the whole bucket given we do group commits. this is something we could investigate
- later as to what is faster under what situations.
- */
- /** insert a key in a bucket with no complexity -- no splits required
- @return false if a split is required.
- */
- template< class V >
- bool BucketBasics<V>::basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const Key& key, const Ordering &order) const {
- check( this->n < 1024 );
- check( keypos >= 0 && keypos <= this->n );
- int bytesNeeded = key.dataSize() + sizeof(_KeyNode);
- if ( bytesNeeded > this->emptySize ) {
- _pack(thisLoc, order, keypos);
- if ( bytesNeeded > this->emptySize )
- return false;
- }
-
- BucketBasics *b;
- {
- const char *p = (const char *) &k(keypos);
- const char *q = (const char *) &k(this->n+1);
- // declare that we will write to [k(keypos),k(n)]
- // todo: this writes a medium amount to the journal. we may want to add a verb "shift" to the redo log so
- // we can log a very small amount.
- b = (BucketBasics*) getDur().writingAtOffset((void *) this, p-(char*)this, q-p);
-
- // e.g. n==3, keypos==2
- // 1 4 9
- // ->
- // 1 4 _ 9
- for ( int j = this->n; j > keypos; j-- ) // make room
- b->k(j) = b->k(j-1);
- }
-
- getDur().declareWriteIntent(&b->emptySize, sizeof(this->emptySize)+sizeof(this->topSize)+sizeof(this->n));
- b->emptySize -= sizeof(_KeyNode);
- b->n++;
-
- // This _KeyNode was marked for writing above.
- _KeyNode& kn = b->k(keypos);
- kn.prevChildBucket.Null();
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short) b->_alloc(key.dataSize()) );
- char *p = b->dataAt(kn.keyDataOfs());
- getDur().declareWriteIntent(p, key.dataSize());
- memcpy(p, key.data(), key.dataSize());
- return true;
- }
-
- /**
- * With this implementation, refPos == 0 disregards effect of refPos.
- * index > 0 prevents creation of an empty bucket.
- */
- template< class V >
- bool BucketBasics<V>::mayDropKey( int index, int refPos ) const {
- return index > 0 && ( index != refPos ) && k( index ).isUnused() && k( index ).prevChildBucket.isNull();
- }
-
- template< class V >
- int BucketBasics<V>::packedDataSize( int refPos ) const {
- if ( this->flags & Packed ) {
- return V::BucketSize - this->emptySize - headerSize();
- }
- int size = 0;
- for( int j = 0; j < this->n; ++j ) {
- if ( mayDropKey( j, refPos ) ) {
- continue;
- }
- size += keyNode( j ).key.dataSize() + sizeof( _KeyNode );
- }
- return size;
- }
-
- /**
- * when we delete things we just leave empty space until the node is
- * full and then we repack it.
- */
- template< class V >
- void BucketBasics<V>::_pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const {
- if ( this->flags & Packed )
- return;
-
- VERIFYTHISLOC
-
- /** TODO perhaps this can be optimized. for example if packing does no write, we can skip intent decl.
- an empirical approach is probably best than just adding new code : perhaps the bucket would need
- declaration anyway within the group commit interval, in which case we would just be adding
- code and complexity without benefit.
- */
- thisLoc.btreemod<V>()->_packReadyForMod(order, refPos);
- }
-
- /** version when write intent already declared */
- template< class V >
- void BucketBasics<V>::_packReadyForMod( const Ordering &order, int &refPos ) {
- assertWritable();
-
- if ( this->flags & Packed )
- return;
-
- int tdz = totalDataSize();
- char temp[V::BucketSize];
- int ofs = tdz;
- this->topSize = 0;
- int i = 0;
- for ( int j = 0; j < this->n; j++ ) {
- if( mayDropKey( j, refPos ) ) {
- continue; // key is unused and has no children - drop it
- }
- if( i != j ) {
- if ( refPos == j ) {
- refPos = i; // i < j so j will never be refPos again
- }
- k( i ) = k( j );
- }
- short ofsold = k(i).keyDataOfs();
- int sz = keyNode(i).key.dataSize();
- ofs -= sz;
- this->topSize += sz;
- memcpy(temp+ofs, dataAt(ofsold), sz);
- k(i).setKeyDataOfsSavingUse( ofs );
- ++i;
- }
- if ( refPos == this->n ) {
- refPos = i;
- }
- this->n = i;
- int dataUsed = tdz - ofs;
- memcpy(this->data + ofs, temp + ofs, dataUsed);
-
- // assertWritable();
- // TEMP TEST getDur().declareWriteIntent(this, sizeof(*this));
-
- this->emptySize = tdz - dataUsed - this->n * sizeof(_KeyNode);
- {
- int foo = this->emptySize;
- verify( foo >= 0 );
- }
-
- setPacked();
-
- assertValid( order );
- }
-
- template< class V >
- inline void BucketBasics<V>::truncateTo(int N, const Ordering &order, int &refPos) {
- verify( Lock::somethingWriteLocked() );
- assertWritable();
- this->n = N;
- setNotPacked();
- _packReadyForMod( order, refPos );
- }
-
- /**
- * In the standard btree algorithm, we would split based on the
- * existing keys _and_ the new key. But that's more work to
- * implement, so we split the existing keys and then add the new key.
- *
- * There are several published heuristic algorithms for doing splits,
- * but basically what you want are (1) even balancing between the two
- * sides and (2) a small split key so the parent can have a larger
- * branching factor.
- *
- * We just have a simple algorithm right now: if a key includes the
- * halfway point (or 10% way point) in terms of bytes, split on that key;
- * otherwise split on the key immediately to the left of the halfway
- * point (or 10% point).
- *
- * This function is expected to be called on a packed bucket.
- */
- template< class V >
- int BucketBasics<V>::splitPos( int keypos ) const {
- verify( this->n > 2 );
- int split = 0;
- int rightSize = 0;
- // when splitting a btree node, if the new key is greater than all the other keys, we should not do an even split, but a 90/10 split.
- // see SERVER-983
- // TODO I think we only want to do the 90% split on the rhs node of the tree.
- int rightSizeLimit = ( this->topSize + sizeof( _KeyNode ) * this->n ) / ( keypos == this->n ? 10 : 2 );
- for( int i = this->n - 1; i > -1; --i ) {
- rightSize += keyNode( i ).key.dataSize() + sizeof( _KeyNode );
- if ( rightSize > rightSizeLimit ) {
- split = i;
- break;
- }
- }
- // safeguards - we must not create an empty bucket
- if ( split < 1 ) {
- split = 1;
- }
- else if ( split > this->n - 2 ) {
- split = this->n - 2;
- }
-
- return split;
- }
-
- template< class V >
- void BucketBasics<V>::reserveKeysFront( int nAdd ) {
- verify( this->emptySize >= int( sizeof( _KeyNode ) * nAdd ) );
- this->emptySize -= sizeof( _KeyNode ) * nAdd;
- for( int i = this->n - 1; i > -1; --i ) {
- k( i + nAdd ) = k( i );
- }
- this->n += nAdd;
- }
-
- template< class V >
- void BucketBasics<V>::setKey( int i, const DiskLoc recordLoc, const Key &key, const DiskLoc prevChildBucket ) {
- _KeyNode &kn = k( i );
- kn.recordLoc = recordLoc;
- kn.prevChildBucket = prevChildBucket;
- short ofs = (short) _alloc( key.dataSize() );
- kn.setKeyDataOfs( ofs );
- char *p = dataAt( ofs );
- memcpy( p, key.data(), key.dataSize() );
- }
-
- template< class V >
- void BucketBasics<V>::dropFront( int nDrop, const Ordering &order, int &refpos ) {
- for( int i = nDrop; i < this->n; ++i ) {
- k( i - nDrop ) = k( i );
- }
- this->n -= nDrop;
- setNotPacked();
- _packReadyForMod( order, refpos );
- }
-
- /* - BtreeBucket --------------------------------------------------- */
-
- /** @return largest key in the subtree. */
- template< class V >
- void BtreeBucket<V>::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) {
- DiskLoc loc = thisLoc;
- while ( 1 ) {
- const BtreeBucket *b = loc.btree<V>();
- if ( !b->nextChild.isNull() ) {
- loc = b->nextChild;
- continue;
- }
-
- verify(b->n>0);
- largestLoc = loc;
- largestKey = b->n-1;
-
- break;
- }
- }
-
- /**
- * NOTE Currently the Ordering implementation assumes a compound index will
- * not have more keys than an unsigned variable has bits. The same
- * assumption is used in the implementation below with respect to the 'mask'
- * variable.
- *
- * @param l a regular bsonobj
- * @param rBegin composed partly of an existing bsonobj, and the remaining keys are taken from a vector of elements that frequently changes
- *
- * see
- * jstests/index_check6.js
- * https://jira.mongodb.org/browse/SERVER-371
- */
- /* static */
- template< class V >
- int BtreeBucket<V>::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction ) {
- BSONObjIterator ll( l );
- BSONObjIterator rr( rBegin );
- vector< const BSONElement * >::const_iterator rr2 = rEnd.begin();
- vector< bool >::const_iterator inc = rEndInclusive.begin();
- unsigned mask = 1;
- for( int i = 0; i < rBeginLen; ++i, mask <<= 1 ) {
- BSONElement lll = ll.next();
- BSONElement rrr = rr.next();
- ++rr2;
- ++inc;
-
- int x = lll.woCompare( rrr, false );
- if ( o.descending( mask ) )
- x = -x;
- if ( x != 0 )
- return x;
- }
- if ( rSup ) {
- return -direction;
- }
- for( ; ll.more(); mask <<= 1 ) {
- BSONElement lll = ll.next();
- BSONElement rrr = **rr2;
- ++rr2;
- int x = lll.woCompare( rrr, false );
- if ( o.descending( mask ) )
- x = -x;
- if ( x != 0 )
- return x;
- if ( !*inc ) {
- return -direction;
- }
- ++inc;
- }
- return 0;
- }
-
- template< class V >
- bool BtreeBucket<V>::exists(const IndexCatalogEntry* btreeState,
- const DiskLoc &thisLoc,
- const Key& key ) const {
- int pos;
- bool found;
- DiskLoc b = locate(btreeState, thisLoc, key, pos, found, minDiskLoc);
-
- // skip unused keys
- while ( 1 ) {
- if( b.isNull() )
- break;
- const BtreeBucket *bucket = b.btree<V>();
- const _KeyNode& kn = bucket->k(pos);
- if ( kn.isUsed() )
- return bucket->keyAt(pos).woEqual(key);
- b = bucket->advance(b, pos, 1, "BtreeBucket<V>::exists");
- }
- return false;
- }
-
- template< class V >
- bool BtreeBucket<V>::wouldCreateDup(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const Key& key,
- const DiskLoc& self) const {
- int pos;
- bool found;
- DiskLoc b = locate(btreeState, thisLoc, key, pos, found, minDiskLoc);
-
- while ( !b.isNull() ) {
- // we skip unused keys
- Record* record = btreeState->recordStore()->recordFor( b );
- const BtreeBucket *bucket = asVersion( record );
- const _KeyNode& kn = bucket->k(pos);
- if ( kn.isUsed() ) {
- if( bucket->keyAt(pos).woEqual(key) )
- return kn.recordLoc != self;
- break;
- }
- b = bucket->advance(b, pos, 1, "BtreeBucket<V>::dupCheck");
- }
-
- return false;
- }
-
- template< class V >
- string BtreeBucket<V>::dupKeyError( const IndexDescriptor* idx , const Key& key ) {
- stringstream ss;
- ss << "E11000 duplicate key error ";
- ss << "index: " << idx->indexNamespace() << " ";
- ss << "dup key: " << key.toString();
- return ss.str();
- }
-
- /**
- * Find a key withing this btree bucket.
- *
- * When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
- * key. That assures that even when there are many duplicates (e.g., 1 million) for a key,
- * our performance is still good.
- *
- * assertIfDup: if the key exists (ignoring the recordLoc), uassert
- *
- * pos: for existing keys k0...kn-1.
- * returns # it goes BEFORE. so key[pos-1] < key < key[pos]
- * returns n if it goes after the last existing key.
- * note result might be an Unused location!
- */
-
- bool guessIncreasing = false;
- template< class V >
- bool BtreeBucket<V>::find(const IndexCatalogEntry* btreeState,
- const Key& key,
- const DiskLoc &rl,
- int& pos,
- bool assertIfDup) const {
- Loc recordLoc;
- recordLoc = rl;
- globalIndexCounters->btree( reinterpret_cast<const char*>(this) );
-
- // binary search for this key
- bool dupsChecked = false;
- int l=0;
- int h=this->n-1;
- int m = (l+h)/2;
- if( guessIncreasing ) {
- m = h;
- }
- while ( l <= h ) {
- KeyNode M = this->keyNode(m);
- int x = key.woCompare(M.key, btreeState->ordering());
- if ( x == 0 ) {
- if( assertIfDup ) {
- if( k(m).isUnused() ) {
- // ok that key is there if unused. but we need to check that there aren't other
- // entries for the key then. as it is very rare that we get here, we don't put any
- // coding effort in here to make this particularly fast
- if( !dupsChecked ) {
- dupsChecked = true;
- DiskLoc head = btreeState->head();
- if( head.btree<V>()->exists(btreeState, head, key) ) {
- if( head.btree<V>()->wouldCreateDup(btreeState, head, key, recordLoc) )
- uasserted( ASSERT_ID_DUPKEY , dupKeyError( btreeState->descriptor(), key ) );
- else
- alreadyInIndex();
- }
- }
- }
- else {
- if( M.recordLoc == recordLoc )
- alreadyInIndex();
- uasserted( ASSERT_ID_DUPKEY, dupKeyError( btreeState->descriptor(), key ) );
- }
- }
-
- // dup keys allowed. use recordLoc as if it is part of the key
- Loc unusedRL = M.recordLoc;
- unusedRL.GETOFS() &= ~1; // so we can test equality without the used bit messing us up
- x = recordLoc.compare(unusedRL);
- }
- if ( x < 0 ) // key < M.key
- h = m-1;
- else if ( x > 0 )
- l = m+1;
- else {
- // found it.
- pos = m;
- return true;
- }
- m = (l+h)/2;
- }
- // not found
- pos = l;
- if ( pos != this->n ) {
- Key keyatpos = keyNode(pos).key;
- wassert( key.woCompare(keyatpos, btreeState->ordering()) <= 0 );
- if ( pos > 0 ) {
- if( !( keyNode(pos-1).key.woCompare(key, btreeState->ordering()) <= 0 ) ) {
- DEV {
- log() << key.toString() << endl;
- log() << keyNode(pos-1).key.toString() << endl;
- }
- wassert(false);
- }
- }
- }
-
- return false;
- }
-
- template< class V >
- void BtreeBucket<V>::delBucket(IndexCatalogEntry* btreeState, const DiskLoc thisLoc ) {
- BtreeIndexCursor::aboutToDeleteBucket(thisLoc);
- verify( !isHead() );
-
- DiskLoc ll = this->parent;
- const BtreeBucket *p = ll.btree<V>();
- int parentIdx = indexInParent( thisLoc );
- p->childForPos( parentIdx ).writing().Null();
- deallocBucket( btreeState, thisLoc );
- }
-
- template< class V >
- void BtreeBucket<V>::deallocBucket(IndexCatalogEntry* btreeState, const DiskLoc thisLoc ) {
- // Mark the bucket as deallocated, see SERVER-4575.
- this->n = this->INVALID_N_SENTINEL;
- // defensive:
- this->parent.Null();
- btreeState->recordStore()->deleteRecord( thisLoc );
- }
-
- /** note: may delete the entire bucket! this invalid upon return sometimes. */
- template< class V >
- void BtreeBucket<V>::delKeyAtPos(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int p ) {
- verify(this->n>0);
- DiskLoc left = this->childForPos(p);
-
- if ( this->n == 1 ) {
- if ( left.isNull() && this->nextChild.isNull() ) {
- this->_delKeyAtPos(p);
- if ( isHead() ) {
- // we don't delete the top bucket ever
- }
- else {
- if ( !mayBalanceWithNeighbors( btreeState, thisLoc ) ) {
- // An empty bucket is only allowed as a transient state. If
- // there are no neighbors to balance with, we delete ourself.
- // This condition is only expected in legacy btrees.
- delBucket(btreeState, thisLoc);
- }
- }
- return;
- }
- deleteInternalKey( btreeState, thisLoc, p );
- return;
- }
-
- if ( left.isNull() ) {
- this->_delKeyAtPos(p);
- mayBalanceWithNeighbors( btreeState, thisLoc );
- }
- else {
- deleteInternalKey( btreeState, thisLoc, p );
- }
- }
-
- /**
- * This function replaces the specified key (k) by either the prev or next
- * key in the btree (k'). We require that k have either a left or right
- * child. If k has a left child, we set k' to the prev key of k, which must
- * be a leaf present in the left child. If k does not have a left child, we
- * set k' to the next key of k, which must be a leaf present in the right
- * child. When we replace k with k', we copy k' over k (which may cause a
- * split) and then remove k' from its original location. Because k' is
- * stored in a descendent of k, replacing k by k' will not modify the
- * storage location of the original k', and we can easily remove k' from
- * its original location.
- *
- * This function is only needed in cases where k has a left or right child;
- * in other cases a simpler key removal implementation is possible.
- *
- * NOTE on noncompliant BtreeBuilder btrees:
- * It is possible (though likely rare) for btrees created by BtreeBuilder to
- * have k' that is not a leaf, see SERVER-2732. These cases are handled in
- * the same manner as described in the "legacy btree structures" note below.
- *
- * NOTE on legacy btree structures:
- * In legacy btrees, k' can be a nonleaf. In such a case we 'delete' k by
- * marking it as an unused node rather than replacing it with k'. Also, k'
- * may be a leaf but marked as an unused node. In such a case we replace
- * k by k', preserving the key's unused marking. This function is only
- * expected to mark a key as unused when handling a legacy btree.
- */
- template< class V >
- void BtreeBucket<V>::deleteInternalKey(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int keypos ) {
- DiskLoc lchild = this->childForPos( keypos );
- DiskLoc rchild = this->childForPos( keypos + 1 );
- verify( !lchild.isNull() || !rchild.isNull() );
- int advanceDirection = lchild.isNull() ? 1 : -1;
- int advanceKeyOfs = keypos;
- DiskLoc advanceLoc = advance( thisLoc, advanceKeyOfs, advanceDirection, __FUNCTION__ );
- // advanceLoc must be a descentant of thisLoc, because thisLoc has a
- // child in the proper direction and all descendants of thisLoc must be
- // nonempty because they are not the root.
-
- if ( !advanceLoc.btree<V>()->childForPos( advanceKeyOfs ).isNull() ||
- !advanceLoc.btree<V>()->childForPos( advanceKeyOfs + 1 ).isNull() ) {
- // only expected with legacy btrees, see note above
- this->markUnused( keypos );
- return;
- }
-
- KeyNode kn = advanceLoc.btree<V>()->keyNode( advanceKeyOfs );
- // Because advanceLoc is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of advanceLoc and kn will be stable
- // during the following setInternalKey()
- setInternalKey( btreeState, thisLoc, keypos, kn.recordLoc, kn.key,
- this->childForPos( keypos ), this->childForPos( keypos + 1 ) );
- advanceLoc.btreemod<V>()->delKeyAtPos( btreeState, advanceLoc, advanceKeyOfs );
- }
-
-//#define BTREE(loc) (static_cast<DiskLoc>(loc).btree<V>())
-#define BTREE(loc) (loc.template btree<V>())
-//#define BTREEMOD(loc) (static_cast<DiskLoc>(loc).btreemod<V>())
-#define BTREEMOD(loc) (loc.template btreemod<V>())
-
- template< class V >
- const BtreeBucket<V>* BtreeBucket<V>::asVersion( Record* rec ) {
- return reinterpret_cast< BtreeBucket<V>* >( rec->data() );
- }
-
- template< class V >
- BtreeBucket<V>* BtreeBucket<V>::asVersionMod( Record* rec ) {
- BtreeBucket<V>* bucket = const_cast<BtreeBucket<V>* >( asVersion( rec ) );
- return static_cast< BtreeBucket<V>* >( getDur().writingPtr( bucket, V::BucketSize ) );
- }
-
- template< class V >
- void BtreeBucket<V>::replaceWithNextChild( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc ) {
- verify( this->n == 0 && !this->nextChild.isNull() );
- if ( this->parent.isNull() ) {
- verify( btreeState->head() == thisLoc );
- btreeState->setHead( this->nextChild );
- }
- else {
- DiskLoc ll = this->parent;
- ll.btree<V>()->childForPos( indexInParent( thisLoc ) ).writing() = this->nextChild;
- }
- BTREE(this->nextChild)->parent.writing() = this->parent;
- BtreeIndexCursor::aboutToDeleteBucket(thisLoc);
- deallocBucket( btreeState, thisLoc );
- }
-
- template< class V >
- bool BtreeBucket<V>::canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const {
- verify( leftIndex >= 0 && leftIndex < this->n );
- DiskLoc leftNodeLoc = this->childForPos( leftIndex );
- DiskLoc rightNodeLoc = this->childForPos( leftIndex + 1 );
- if ( leftNodeLoc.isNull() || rightNodeLoc.isNull() ) {
- // TODO if this situation is possible in long term implementation, maybe we should compact somehow anyway
- return false;
- }
- int pos = 0;
- {
- const BtreeBucket *l = leftNodeLoc.btree<V>();
- const BtreeBucket *r = rightNodeLoc.btree<V>();
- if ( ( this->headerSize() + l->packedDataSize( pos ) + r->packedDataSize( pos ) + keyNode( leftIndex ).key.dataSize() + sizeof(_KeyNode) > unsigned( V::BucketSize ) ) ) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * This implementation must respect the meaning and value of lowWaterMark.
- * Also see comments in splitPos().
- */
- template< class V >
- int BtreeBucket<V>::rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const {
- int split = -1;
- int rightSize = 0;
- const BtreeBucket *l = BTREE(this->childForPos( leftIndex ));
- const BtreeBucket *r = BTREE(this->childForPos( leftIndex + 1 ));
-
- int KNS = sizeof( _KeyNode );
- int rightSizeLimit = ( l->topSize + l->n * KNS + keyNode( leftIndex ).key.dataSize() + KNS + r->topSize + r->n * KNS ) / 2;
- // This constraint should be ensured by only calling this function
- // if we go below the low water mark.
- verify( rightSizeLimit < BtreeBucket<V>::bodySize() );
- for( int i = r->n - 1; i > -1; --i ) {
- rightSize += r->keyNode( i ).key.dataSize() + KNS;
- if ( rightSize > rightSizeLimit ) {
- split = l->n + 1 + i;
- break;
- }
- }
- if ( split == -1 ) {
- rightSize += keyNode( leftIndex ).key.dataSize() + KNS;
- if ( rightSize > rightSizeLimit ) {
- split = l->n;
- }
- }
- if ( split == -1 ) {
- for( int i = l->n - 1; i > -1; --i ) {
- rightSize += l->keyNode( i ).key.dataSize() + KNS;
- if ( rightSize > rightSizeLimit ) {
- split = i;
- break;
- }
- }
- }
- // safeguards - we must not create an empty bucket
- if ( split < 1 ) {
- split = 1;
- }
- else if ( split > l->n + 1 + r->n - 2 ) {
- split = l->n + 1 + r->n - 2;
- }
-
- return split;
- }
-
- template< class V >
- void BtreeBucket<V>::doMergeChildren( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int leftIndex ) {
- DiskLoc leftNodeLoc = this->childForPos( leftIndex );
- DiskLoc rightNodeLoc = this->childForPos( leftIndex + 1 );
- BtreeBucket *l = leftNodeLoc.btreemod<V>();
- BtreeBucket *r = rightNodeLoc.btreemod<V>();
- int pos = 0;
- l->_packReadyForMod( btreeState->ordering(), pos );
- r->_packReadyForMod( btreeState->ordering(), pos ); // pack r in case there are droppable keys
-
- // We know the additional keys below will fit in l because canMergeChildren()
- // must be true.
- int oldLNum = l->n;
- {
- // left child's right child becomes old parent key's left child
- KeyNode kn = keyNode( leftIndex );
- l->pushBack( kn.recordLoc, kn.key, btreeState->ordering(), l->nextChild );
- }
- for( int i = 0; i < r->n; ++i ) {
- KeyNode kn = r->keyNode( i );
- l->pushBack( kn.recordLoc, kn.key, btreeState->ordering(), kn.prevChildBucket );
- }
- l->nextChild = r->nextChild;
- l->fixParentPtrs( leftNodeLoc, oldLNum );
- r->delBucket( btreeState, rightNodeLoc );
- this->childForPos( leftIndex + 1 ) = leftNodeLoc;
- this->childForPos( leftIndex ) = DiskLoc();
- this->_delKeyAtPos( leftIndex, true );
- if ( this->n == 0 ) {
- // will trash this and thisLoc
- // TODO To ensure all leaves are of equal height, we should ensure
- // this is only called on the root.
- replaceWithNextChild( btreeState, thisLoc );
- }
- else {
- // balance recursively - maybe we should do this even when n == 0?
- mayBalanceWithNeighbors( btreeState, thisLoc );
- }
- }
-
- template< class V >
- int BtreeBucket<V>::indexInParent( const DiskLoc &thisLoc ) const {
- verify( !this->parent.isNull() );
- const BtreeBucket *p = BTREE(this->parent);
- if ( p->nextChild == thisLoc ) {
- return p->n;
- }
- else {
- for( int i = 0; i < p->n; ++i ) {
- if ( p->k( i ).prevChildBucket == thisLoc ) {
- return i;
- }
- }
- }
- out() << "ERROR: can't find ref to child bucket.\n";
- out() << "child: " << thisLoc << "\n";
- dump();
- out() << "Parent: " << this->parent << "\n";
- p->dump();
- verify(false);
- return -1; // just to compile
- }
-
- template< class V >
- bool BtreeBucket<V>::tryBalanceChildren( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int leftIndex ) const {
- // If we can merge, then we must merge rather than balance to preserve
- // bucket utilization constraints.
- if ( canMergeChildren( thisLoc, leftIndex ) ) {
- return false;
- }
- thisLoc.btreemod<V>()->doBalanceChildren( btreeState, thisLoc, leftIndex );
- return true;
- }
-
- template< class V >
- void BtreeBucket<V>::doBalanceLeftToRight( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int leftIndex, int split,
- BtreeBucket *l, const DiskLoc lchild,
- BtreeBucket *r, const DiskLoc rchild ) {
- // TODO maybe do some audits the same way pushBack() does?
- // As a precondition, rchild + the old separator are <= half a body size,
- // and lchild is at most completely full. Based on the value of split,
- // rchild will get <= half of the total bytes which is at most 75%
- // of a full body. So rchild will have room for the following keys:
- int rAdd = l->n - split;
- r->reserveKeysFront( rAdd );
- for( int i = split + 1, j = 0; i < l->n; ++i, ++j ) {
- KeyNode kn = l->keyNode( i );
- r->setKey( j, kn.recordLoc, kn.key, kn.prevChildBucket );
- }
- {
- KeyNode kn = keyNode( leftIndex );
- r->setKey( rAdd - 1, kn.recordLoc, kn.key, l->nextChild ); // left child's right child becomes old parent key's left child
- }
- r->fixParentPtrs( rchild, 0, rAdd - 1 );
- {
- KeyNode kn = l->keyNode( split );
- l->nextChild = kn.prevChildBucket;
- // Because lchild is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of lchild and kn will be stable
- // during the following setInternalKey()
- setInternalKey( btreeState, thisLoc, leftIndex, kn.recordLoc, kn.key, lchild, rchild );
- }
- int zeropos = 0;
- // lchild and rchild cannot be merged, so there must be >0 (actually more)
- // keys to the left of split.
- l->truncateTo( split, btreeState->ordering(), zeropos );
- }
-
- template< class V >
- void BtreeBucket<V>::doBalanceRightToLeft( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int leftIndex, int split,
- BtreeBucket *l, const DiskLoc lchild,
- BtreeBucket *r, const DiskLoc rchild ) {
- // As a precondition, lchild + the old separator are <= half a body size,
- // and rchild is at most completely full. Based on the value of split,
- // lchild will get less than half of the total bytes which is at most 75%
- // of a full body. So lchild will have room for the following keys:
- int lN = l->n;
- {
- // left child's right child becomes old parent key's left child
- KeyNode kn = keyNode( leftIndex );
- l->pushBack( kn.recordLoc, kn.key, btreeState->ordering(), l->nextChild );
- }
- for( int i = 0; i < split - lN - 1; ++i ) {
- KeyNode kn = r->keyNode( i );
- l->pushBack( kn.recordLoc, kn.key, btreeState->ordering(), kn.prevChildBucket );
- }
- {
- KeyNode kn = r->keyNode( split - lN - 1 );
- l->nextChild = kn.prevChildBucket;
- // Child lN was lchild's old nextChild, and don't need to fix that one.
- l->fixParentPtrs( lchild, lN + 1, l->n );
- // Because rchild is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of rchild and kn will be stable
- // during the following setInternalKey()
- setInternalKey( btreeState, thisLoc, leftIndex, kn.recordLoc, kn.key, lchild, rchild );
- }
- int zeropos = 0;
- // lchild and rchild cannot be merged, so there must be >0 (actually more)
- // keys to the right of split.
- r->dropFront( split - lN, btreeState->ordering(), zeropos );
- }
-
- template< class V >
- void BtreeBucket<V>::doBalanceChildren( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int leftIndex ) {
- DiskLoc lchild = this->childForPos( leftIndex );
- DiskLoc rchild = this->childForPos( leftIndex + 1 );
- int zeropos = 0;
- BtreeBucket *l = lchild.btreemod<V>();
- l->_packReadyForMod( btreeState->ordering(), zeropos );
- BtreeBucket *r = rchild.btreemod<V>();
- r->_packReadyForMod( btreeState->ordering(), zeropos );
- int split = rebalancedSeparatorPos( thisLoc, leftIndex );
-
- // By definition, if we are below the low water mark and cannot merge
- // then we must actively balance.
- verify( split != l->n );
- if ( split < l->n ) {
- doBalanceLeftToRight( btreeState, thisLoc, leftIndex, split, l, lchild, r, rchild );
- }
- else {
- doBalanceRightToLeft( btreeState, thisLoc, leftIndex, split, l, lchild, r, rchild );
- }
- }
-
- template< class V >
- bool BtreeBucket<V>::mayBalanceWithNeighbors(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc ) {
- if ( this->parent.isNull() ) { // we are root, there are no neighbors
- return false;
- }
-
- if ( this->packedDataSize( 0 ) >= this->lowWaterMark() ) {
- return false;
- }
-
- const BtreeBucket *p = BTREE(this->parent);
- int parentIdx = indexInParent( thisLoc );
-
- // TODO will missing neighbor case be possible long term? Should we try to merge/balance somehow in that case if so?
- bool mayBalanceRight = ( ( parentIdx < p->n ) && !p->childForPos( parentIdx + 1 ).isNull() );
- bool mayBalanceLeft = ( ( parentIdx > 0 ) && !p->childForPos( parentIdx - 1 ).isNull() );
-
- // Balance if possible on one side - we merge only if absolutely necessary
- // to preserve btree bucket utilization constraints since that's a more
- // heavy duty operation (especially if we must re-split later).
- if ( mayBalanceRight &&
- p->tryBalanceChildren( btreeState, this->parent, parentIdx) ) {
- return true;
- }
- if ( mayBalanceLeft &&
- p->tryBalanceChildren( btreeState, this->parent, parentIdx - 1 ) ) {
- return true;
- }
-
- BtreeBucket *pm = BTREEMOD(this->parent);
- if ( mayBalanceRight ) {
- pm->doMergeChildren( btreeState, this->parent, parentIdx );
- return true;
- }
- else if ( mayBalanceLeft ) {
- pm->doMergeChildren( btreeState, this->parent, parentIdx - 1 );
- return true;
- }
-
- return false;
- }
-
- /** remove a key from the index */
- template< class V >
- bool BtreeBucket<V>::unindex(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- const BSONObj& key,
- const DiskLoc recordLoc) const {
- int pos;
- bool found;
- DiskLoc loc = locate(btreeState, thisLoc, key, pos, found, recordLoc, 1);
- if ( found ) {
- loc.btreemod<V>()->delKeyAtPos(btreeState, loc, pos);
- return true;
- }
- return false;
- }
-
- template< class V >
- inline void BtreeBucket<V>::fix(const DiskLoc thisLoc, const DiskLoc child) {
- if ( !child.isNull() ) {
- if ( insert_debug )
- out() << " fix " << child.toString() << ".parent=" << thisLoc.toString() << endl;
- child.btree<V>()->parent.writing() = thisLoc;
- }
- }
-
- /**
- * This can cause a lot of additional page writes when we assign buckets to
- * different parents. Maybe get rid of parent ptrs?
- */
- template< class V >
- void BtreeBucket<V>::fixParentPtrs(const DiskLoc thisLoc, int firstIndex, int lastIndex) const {
- VERIFYTHISLOC
- if ( lastIndex == -1 ) {
- lastIndex = this->n;
- }
- for ( int i = firstIndex; i <= lastIndex; i++ ) {
- fix(thisLoc, this->childForPos(i));
- }
- }
-
- template< class V >
- void BtreeBucket<V>::setInternalKey( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int keypos,
- const DiskLoc recordLoc,
- const Key &key,
- const DiskLoc lchild,
- const DiskLoc rchild ) {
- this->childForPos( keypos ).Null();
-
- // This may leave the bucket empty (n == 0) which is ok only as a
- // transient state. In the instant case, the implementation of
- // insertHere behaves correctly when n == 0 and as a side effect
- // increments n.
- this->_delKeyAtPos( keypos, true );
-
- // Ensure we do not orphan neighbor's old child.
- verify( this->childForPos( keypos ) == rchild );
-
- // Just set temporarily - required to pass validation in insertHere()
- this->childForPos( keypos ) = lchild;
-
- insertHere(btreeState, thisLoc, keypos, recordLoc, key, lchild, rchild );
- }
-
- /**
- * insert a key in this bucket, splitting if necessary.
- * @keypos - where to insert the key in range 0..n. 0=make leftmost, n=make rightmost.
- * NOTE this function may free some data, and as a result the value passed for keypos may
- * be invalid after calling insertHere()
- *
- * Some of the write intent signaling below relies on the implementation of
- * the optimized write intent code in basicInsert().
- */
- template< class V >
- void BtreeBucket<V>::insertHere( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int keypos,
- const DiskLoc recordLoc, const Key& key,
- const DiskLoc lchild, const DiskLoc rchild ) const {
- if ( insert_debug )
- out() << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' '
- << lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
-
- if ( !this->basicInsert(thisLoc, keypos, recordLoc, key, btreeState->ordering()) ) {
- // If basicInsert() fails, the bucket will be packed as required by split().
- thisLoc.btreemod<V>()->split(btreeState, thisLoc, keypos, recordLoc, key, lchild, rchild );
- return;
- }
-
- {
- const _KeyNode *_kn = &k(keypos);
- _KeyNode *kn = (_KeyNode *) getDur().alreadyDeclared((_KeyNode*) _kn); // already declared intent in basicInsert()
- if ( keypos+1 == this->n ) { // last key
- if ( this->nextChild != lchild ) {
- out() << "ERROR nextChild != lchild" << endl;
- out() << " thisLoc: " << thisLoc.toString() << ' ' << btreeState->descriptor()->indexNamespace() << endl;
- out() << " keyPos: " << keypos << " n:" << this->n << endl;
- out() << " nextChild: " << this->nextChild.toString() << " lchild: " << lchild.toString() << endl;
- out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
- out() << " key: " << key.toString() << endl;
- dump();
- verify(false);
- }
- kn->prevChildBucket = this->nextChild;
- verify( kn->prevChildBucket == lchild );
- this->nextChild.writing() = rchild;
- if ( !rchild.isNull() )
- BTREE(rchild)->parent.writing() = thisLoc;
- }
- else {
- kn->prevChildBucket = lchild;
- if ( k(keypos+1).prevChildBucket != lchild ) {
- out() << "ERROR k(keypos+1).prevChildBucket != lchild" << endl;
- out() << " thisLoc: " << thisLoc.toString() << ' ' << btreeState->descriptor()->indexNamespace() << endl;
- out() << " keyPos: " << keypos << " n:" << this->n << endl;
- out() << " k(keypos+1).pcb: " << k(keypos+1).prevChildBucket.toString() << " lchild: " << lchild.toString() << endl;
- out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
- out() << " key: " << key.toString() << endl;
- dump();
- verify(false);
- }
- const Loc *pc = &k(keypos+1).prevChildBucket;
- *getDur().alreadyDeclared( const_cast<Loc*>(pc) ) = rchild; // declared in basicInsert()
- if ( !rchild.isNull() )
- rchild.btree<V>()->parent.writing() = thisLoc;
- }
- return;
- }
- }
-
- template< class V >
- void BtreeBucket<V>::split(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int keypos,
- const DiskLoc recordLoc, const Key& key,
- const DiskLoc lchild, const DiskLoc rchild) {
- this->assertWritable();
-
- if ( split_debug )
- out() << " " << thisLoc.toString() << ".split" << endl;
-
- int split = this->splitPos( keypos );
- DiskLoc rLoc = addBucket(btreeState);
- BtreeBucket *r = rLoc.btreemod<V>();
- if ( split_debug )
- out() << " split:" << split << ' ' << keyNode(split).key.toString() << " n:" << this->n << endl;
- for ( int i = split+1; i < this->n; i++ ) {
- KeyNode kn = keyNode(i);
- r->pushBack(kn.recordLoc, kn.key, btreeState->ordering(), kn.prevChildBucket);
- }
- r->nextChild = this->nextChild;
- r->assertValid( btreeState->ordering() );
-
- if ( split_debug )
- out() << " new rLoc:" << rLoc.toString() << endl;
- r = 0;
- rLoc.btree<V>()->fixParentPtrs(rLoc);
-
- {
- KeyNode splitkey = keyNode(split);
- this->nextChild = splitkey.prevChildBucket; // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r)
- if ( split_debug ) {
- out() << " splitkey key:" << splitkey.key.toString() << endl;
- }
-
- // Because thisLoc is a descendant of parent, updating parent will
- // not affect packing or keys of thisLoc and splitkey will be stable
- // during the following:
-
- // promote splitkey to a parent this->node
- if ( this->parent.isNull() ) {
- // make a new parent if we were the root
- DiskLoc L = addBucket(btreeState);
- BtreeBucket *p = L.btreemod<V>();
- p->pushBack(splitkey.recordLoc, splitkey.key, btreeState->ordering(), thisLoc);
- p->nextChild = rLoc;
- p->assertValid( btreeState->ordering() );
- this->parent = L;
- btreeState->setHead( L );
- if ( split_debug )
- out() << " we were root, making new root:" << hex << this->parent.getOfs() << dec << endl;
- rLoc.btree<V>()->parent.writing() = this->parent;
- }
- else {
- // set this before calling _insert - if it splits it will do fixParent() logic and change the value.
- rLoc.btree<V>()->parent.writing() = this->parent;
- if ( split_debug )
- out() << " promoting splitkey key " << splitkey.key.toString() << endl;
- BTREE(this->parent)->_insert(btreeState, this->parent, splitkey.recordLoc, splitkey.key, /*dupsallowed*/true, thisLoc, rLoc );
- }
- }
-
- int newpos = keypos;
- // note this may trash splitkey.key. thus we had to promote it before finishing up here.
- this->truncateTo(split, btreeState->ordering(), newpos);
-
- // add our this->new key, there is room this->now
- {
- if ( keypos <= split ) {
- if ( split_debug )
- out() << " keypos<split, insertHere() the new key" << endl;
- insertHere(btreeState, thisLoc, newpos, recordLoc, key, lchild, rchild );
- }
- else {
- int kp = keypos-split-1;
- verify(kp>=0);
- BTREE(rLoc)->insertHere(btreeState, rLoc, kp, recordLoc, key, lchild, rchild );
- }
- }
-
- if ( split_debug )
- out() << " split end " << hex << thisLoc.getOfs() << dec << endl;
- }
-
- class DummyDocWriter : public DocWriter {
- public:
- DummyDocWriter( size_t sz ) : _sz( sz ){}
- virtual void writeDocument( char* buf ) const { /* no-op */ }
- virtual size_t documentSize() const { return _sz; }
- private:
- size_t _sz;
- };
-
- /** start a new index off, empty */
- template< class V >
- DiskLoc BtreeBucket<V>::addBucket(IndexCatalogEntry* btreeState) {
- DummyDocWriter docWriter( V::BucketSize );
- StatusWith<DiskLoc> loc = btreeState->recordStore()->insertRecord( &docWriter, 0 );
- uassertStatusOK( loc.getStatus() );
- BtreeBucket *b = BTREEMOD(loc.getValue());
- b->init();
- return loc.getValue();
- }
-
- template< class V >
- const DiskLoc BtreeBucket<V>::getHead(const DiskLoc& thisLoc) const {
- DiskLoc p = thisLoc;
- while ( !BTREE(p)->isHead() )
- p = BTREE(p)->parent;
- return p;
- }
-
- template< class V >
- DiskLoc BtreeBucket<V>::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const {
- if ( keyOfs < 0 || keyOfs >= this->n ) {
- out() << "ASSERT failure BtreeBucket<V>::advance, caller: " << caller << endl;
- out() << " thisLoc: " << thisLoc.toString() << endl;
- out() << " keyOfs: " << keyOfs << " n:" << this->n << " direction: " << direction << endl;
- out() << bucketSummary() << endl;
- verify(false);
- }
- int adj = direction < 0 ? 1 : 0;
- int ko = keyOfs + direction;
- DiskLoc nextDown = this->childForPos(ko+adj);
- if ( !nextDown.isNull() ) {
- while ( 1 ) {
- keyOfs = direction>0 ? 0 : BTREE(nextDown)->n - 1;
- DiskLoc loc = BTREE(nextDown)->childForPos(keyOfs + adj);
- if ( loc.isNull() )
- break;
- nextDown = loc;
- }
- return nextDown;
- }
-
- if ( ko < this->n && ko >= 0 ) {
- keyOfs = ko;
- return thisLoc;
- }
-
- // end of bucket. traverse back up.
- DiskLoc childLoc = thisLoc;
- DiskLoc ancestor = this->parent;
- while ( 1 ) {
- if ( ancestor.isNull() )
- break;
- const BtreeBucket *an = BTREE(ancestor);
- for ( int i = 0; i < an->n; i++ ) {
- if ( an->childForPos(i+adj) == childLoc ) {
- keyOfs = i;
- return ancestor;
- }
- }
- verify( direction<0 || an->nextChild == childLoc );
- // parent exhausted also, keep going up
- childLoc = ancestor;
- ancestor = an->parent;
- }
-
- return DiskLoc();
- }
-
- template< class V >
- DiskLoc BtreeBucket<V>::locate(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const BSONObj& key,
- int& pos,
- bool& found,
- const DiskLoc& recordLoc,
- int direction) const {
- KeyOwned k(key);
- return locate(btreeState, thisLoc, k, pos, found, recordLoc, direction);
- }
-
- template< class V >
- DiskLoc BtreeBucket<V>::locate(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const Key& key,
- int& pos,
- bool& found,
- const DiskLoc& recordLoc,
- int direction) const {
- int p;
- found = find(btreeState, key, recordLoc, p, /*assertIfDup*/ false);
- if ( found ) {
- pos = p;
- return thisLoc;
- }
-
- DiskLoc child = this->childForPos(p);
-
- if ( !child.isNull() ) {
- DiskLoc l = BTREE(child)->locate(btreeState, child, key, pos, found, recordLoc, direction);
- if ( !l.isNull() )
- return l;
- }
-
- pos = p;
- if ( direction < 0 )
- return --pos == -1 ? DiskLoc() /*theend*/ : thisLoc;
- else
- return pos == this->n ? DiskLoc() /*theend*/ : thisLoc;
- }
-
- template< class V >
- bool BtreeBucket<V>::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) {
- const BtreeBucket<V> * bucket = BTREE(thisLoc);
- while( 1 ) {
- if ( l + 1 == h ) {
- keyOfs = ( direction > 0 ) ? h : l;
- DiskLoc next = bucket->k( h ).prevChildBucket;
- if ( !next.isNull() ) {
- bestParent = make_pair( thisLoc, keyOfs );
- thisLoc = next;
- return true;
- }
- else {
- return false;
- }
- }
- int m = l + ( h - l ) / 2;
- int cmp = customBSONCmp( bucket->keyNode( m ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction );
- if ( cmp < 0 ) {
- l = m;
- }
- else if ( cmp > 0 ) {
- h = m;
- }
- else {
- if ( direction < 0 ) {
- l = m;
- }
- else {
- h = m;
- }
- }
- }
- }
-
- /**
- * find smallest/biggest value greater-equal/less-equal than specified
- * starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd
- * All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient
- */
- template< class V >
- void BtreeBucket<V>::advanceTo(const IndexCatalogEntry* btreeState,
- DiskLoc &thisLoc,
- int &keyOfs,
- const BSONObj &keyBegin,
- int keyBeginLen,
- bool afterKey,
- const vector<const BSONElement*>& keyEnd,
- const vector<bool>& keyEndInclusive,
- int direction) const {
-
- const Ordering& order = btreeState->ordering();
-
- int l,h;
- bool dontGoUp;
- if ( direction > 0 ) {
- l = keyOfs;
- h = this->n - 1;
- dontGoUp = ( customBSONCmp( keyNode( h ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 );
- }
- else {
- l = 0;
- h = keyOfs;
- dontGoUp = ( customBSONCmp( keyNode( l ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 );
- }
- pair< DiskLoc, int > bestParent;
- if ( dontGoUp ) {
- // this comparison result assures h > l
- if ( !customFind( l, h, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, thisLoc, keyOfs, bestParent ) ) {
- return;
- }
- }
- else {
- // go up parents until rightmost/leftmost node is >=/<= target or at top
- while( !BTREE(thisLoc)->parent.isNull() ) {
- thisLoc = BTREE(thisLoc)->parent;
- if ( direction > 0 ) {
- if ( customBSONCmp( BTREE(thisLoc)->keyNode( BTREE(thisLoc)->n - 1 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) {
- break;
- }
- }
- else {
- if ( customBSONCmp( BTREE(thisLoc)->keyNode( 0 ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) {
- break;
- }
- }
- }
- }
- customLocate(btreeState, thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, direction, bestParent );
- }
-
- /** @param thisLoc in/out param. perhaps thisLoc isn't the best name given that.
- Ut is used by advanceTo, which skips
- from one key to another key without necessarily checking all the keys
- between them in the btree (it can skip to different btree buckets).
- The advanceTo function can get called a lot, and for different targets
- we want to advance to, don't want to create a bson obj in a new
- buffer each time we call that function. The
- customLocate function necessary for advanceTo, and does the same thing
- as normal locate function but takes basically the same arguments
- as advanceTo.
- */
- template< class V >
- void BtreeBucket<V>::customLocate(const IndexCatalogEntry* btreeState,
- DiskLoc& locInOut,
- int& keyOfs,
- const BSONObj& keyBegin,
- int keyBeginLen, bool afterKey,
- const vector<const BSONElement*>& keyEnd,
- const vector<bool>& keyEndInclusive,
- int direction,
- pair<DiskLoc, int>& bestParent) {
- const Ordering& order = btreeState->ordering();
-
- dassert( direction == 1 || direction == -1 );
- const BtreeBucket<V> *bucket = BTREE(locInOut);
- if ( bucket->n == 0 ) {
- locInOut = DiskLoc();
- return;
- }
- // go down until find smallest/biggest >=/<= target
- while( 1 ) {
- int l = 0;
- int h = bucket->n - 1;
-
- // +direction: 0, -direction: h
- int z = (1-direction)/2*h;
-
- // leftmost/rightmost key may possibly be >=/<= search key
- int res = customBSONCmp( bucket->keyNode( z ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction );
- bool firstCheck = direction*res >= 0;
-
- if ( firstCheck ) {
- DiskLoc next;
- keyOfs = z;
- if ( direction > 0 ) {
- dassert( z == 0 );
- next = bucket->k( 0 ).prevChildBucket;
- }
- else {
- next = bucket->nextChild;
- }
- if ( !next.isNull() ) {
- bestParent = pair< DiskLoc, int >( locInOut, keyOfs );
- locInOut = next;
- bucket = BTREE(locInOut);
- continue;
- }
- else {
- return;
- }
- }
-
- res = customBSONCmp( bucket->keyNode( h-z ).key.toBson(), keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction );
- bool secondCheck = direction*res < 0;
-
- if ( secondCheck ) {
- DiskLoc next;
- if ( direction > 0 ) {
- next = bucket->nextChild;
- }
- else {
- next = bucket->k( 0 ).prevChildBucket;
- }
- if ( next.isNull() ) {
- // if bestParent is null, we've hit the end and locInOut gets set to DiskLoc()
- locInOut = bestParent.first;
- keyOfs = bestParent.second;
- return;
- }
- else {
- locInOut = next;
- bucket = BTREE(locInOut);
- continue;
- }
- }
-
- if ( !customFind( l, h, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, locInOut, keyOfs, bestParent ) ) {
- return;
- }
- bucket = BTREE(locInOut);
- }
- }
-
- /** @thisLoc disk location of *this */
- template< class V >
- int BtreeBucket<V>::_insert(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, const DiskLoc recordLoc,
- const Key& key, bool dupsAllowed,
- const DiskLoc lChild, const DiskLoc rChild ) const {
- if ( key.dataSize() > getKeyMax() ) {
- string msg = str::stream() << "ERROR: key too large len:" << key.dataSize()
- << " max:" << getKeyMax() << ' ' << key.dataSize()
- << ' ' << btreeState->descriptor()->indexNamespace();
-
- problem() << msg << endl;
- keyTooLongAssert( 17281, msg );
- return 2;
- }
- verify( key.dataSize() > 0 );
-
- int pos;
- bool found = find(btreeState, key, recordLoc, pos, !dupsAllowed);
- if ( insert_debug ) {
- out() << " " << thisLoc.toString() << '.' << "_insert " <<
- key.toString() << '/' << recordLoc.toString() <<
- " l:" << lChild.toString() << " r:" << rChild.toString() << endl;
- out() << " found:" << found << " pos:" << pos << " n:" << this->n << endl;
- }
-
- if ( found ) {
- const _KeyNode& kn = k(pos);
- if ( kn.isUnused() ) {
- LOG(4) << "btree _insert: reusing unused key" << endl;
- massert( 10285 , "_insert: reuse key but lchild is not null", lChild.isNull());
- massert( 10286 , "_insert: reuse key but rchild is not null", rChild.isNull());
- kn.writing().setUsed();
- return 0;
- }
-
- DEV {
- log() << "_insert(): key already exists in index (ok for background:true)\n";
- log() << " " << btreeState->descriptor()->indexNamespace() << " thisLoc:" << thisLoc.toString() << '\n';
- log() << " " << key.toString() << '\n';
- log() << " " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl;
- log() << " old l r: " << this->childForPos(pos).toString() << ' ' << this->childForPos(pos+1).toString() << endl;
- log() << " new l r: " << lChild.toString() << ' ' << rChild.toString() << endl;
- }
- alreadyInIndex();
- }
-
- DEBUGGING out() << "TEMP: key: " << key.toString() << endl;
- Loc ch = this->childForPos(pos);
- DiskLoc child = ch;
- if ( insert_debug )
- out() << " getChild(" << pos << "): " << child.toString() << endl;
- // In current usage, rChild isNull() for a new key and false when we are
- // promoting a split key. These are the only two cases where _insert()
- // is called currently.
- if ( child.isNull() || !rChild.isNull() ) {
- // A new key will be inserted at the same tree height as an adjacent existing key.
- insertHere(btreeState, thisLoc, pos, recordLoc, key, lChild, rChild);
- return 0;
- }
-
- return child.btree<V>()->_insert(btreeState, child, recordLoc, key, dupsAllowed, /*lchild*/DiskLoc(), /*rchild*/DiskLoc() );
- }
-
- template< class V >
- void BtreeBucket<V>::dump(unsigned depth) const {
- string indent = string(depth, ' ');
- LogstreamBuilder l = log();
- l << "BUCKET n:" << this->n;
- l << " parent:" << hex << this->parent.getOfs() << dec;
- for ( int i = 0; i < this->n; i++ ) {
- l << '\n' << indent;
- KeyNode k = keyNode(i);
- string ks = k.key.toString();
- l << " " << hex << k.prevChildBucket.getOfs() << '\n';
- l << indent << " " << i << ' ' << ks.substr(0, 30) << " Loc:" << k.recordLoc.toString() << dec;
- if ( this->k(i).isUnused() )
- l << " UNUSED";
- }
- l << "\n" << indent << " " << hex << this->nextChild.getOfs() << dec << endl;
- }
-
- /** todo: meaning of return code unclear clean up */
- template< class V >
- int BtreeBucket<V>::bt_insert(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- const DiskLoc recordLoc,
- const BSONObj& keyBson,
- bool dupsAllowed,
- bool toplevel) const {
- guessIncreasing = keyBson.firstElementType() == jstOID && btreeState->descriptor()->isIdIndex();
- KeyOwned key(keyBson);
-
- dassert(toplevel);
- if ( toplevel ) {
- if ( key.dataSize() > getKeyMax() ) {
- string msg = str::stream() << "Btree::insert: key too large to index, failing "
- << btreeState->descriptor()->indexNamespace() << ' '
- << key.dataSize() << ' ' << key.toString();
- problem() << msg << endl;
- keyTooLongAssert( 17280, msg );
- return 3;
- }
- }
-
- int x;
- try {
- x = _insert(btreeState, thisLoc, recordLoc, key, dupsAllowed, DiskLoc(), DiskLoc() );
- this->assertValid( btreeState->ordering() );
- }
- catch( ... ) {
- guessIncreasing = false;
- throw;
- }
- guessIncreasing = false;
- return x;
- }
-
- template< class V >
- void BtreeBucket<V>::shape(stringstream& ss) const {
- this->_shape(0, ss);
- }
-
- template< class V >
- int BtreeBucket<V>::getKeyMax() {
- return V::KeyMax;
- }
-
- template< class V >
- DiskLoc BtreeBucket<V>::findSingle( const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const BSONObj& key ) const {
- int pos;
- bool found;
- // TODO: is it really ok here that the order is a default?
- // for findById() use, yes. for checkNoIndexConflicts, no?
- DiskLoc bucket = locate( btreeState, btreeState->head(), key, pos, found, minDiskLoc );
- if ( bucket.isNull() )
- return bucket;
-
- const BtreeBucket<V> *b = bucket.btree<V>();
- while ( 1 ) {
- const _KeyNode& knraw = b->k(pos);
- if ( knraw.isUsed() )
- break;
- bucket = b->advance( bucket , pos , 1 , "findSingle" );
- if ( bucket.isNull() )
- return bucket;
- b = bucket.btree<V>();
- }
- KeyNode kn = b->keyNode( pos );
- if ( KeyOwned(key).woCompare( kn.key, btreeState->ordering() ) != 0 )
- return DiskLoc();
- return kn.recordLoc;
- }
-
- template class BucketBasics<V0>;
- template class BucketBasics<V1>;
- template class BtreeBucket<V0>;
- template class BtreeBucket<V1>;
- template struct __KeyNode<DiskLoc>;
- template struct __KeyNode<DiskLoc56Bit>;
-
- struct BTUnitTest : public StartupTest {
- void run() {
- DiskLoc big(0xf12312, 0x70001234);
- DiskLoc56Bit bigl;
- {
- bigl = big;
- verify( big == bigl );
- DiskLoc e = bigl;
- verify( big == e );
- }
- {
- DiskLoc d;
- verify( d.isNull() );
- DiskLoc56Bit l;
- l = d;
- verify( l.isNull() );
- d = l;
- verify( d.isNull() );
- verify( l < bigl );
- }
- }
- } btunittest;
-}
-
diff --git a/src/mongo/db/structure/btree/btree.h b/src/mongo/db/structure/btree/btree.h
deleted file mode 100644
index ad89b763bd5..00000000000
--- a/src/mongo/db/structure/btree/btree.h
+++ /dev/null
@@ -1,1094 +0,0 @@
-// btree.h
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/pch.h"
-
-#include "mongo/db/diskloc.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/storage/record.h"
-#include "mongo/db/structure/btree/key.h"
-
-namespace mongo {
-
- class IndexCatalogEntry;
- class IndexDescriptor;
-
- template< class Version > class BtreeBucket;
-
- /**
- * Our btree implementation generally follows the standard btree algorithm,
- * which is described in many places. The nodes of our btree are referred to
- * as buckets below. These buckets are of size BucketSize and their body is
- * an ordered array of <bson key, disk loc> pairs, where disk loc is the disk
- * location of a document and bson key is a projection of this document into
- * the schema of the index for this btree. Ordering is determined on the
- * basis of bson key first and then disk loc in case of a tie. All bson keys
- * for a btree have identical schemas with empty string field names and may
- * not have an objsize() exceeding KeyMax. The btree's buckets are
- * themselves organized into an ordered tree. Although there are exceptions,
- * generally buckets with n keys have n+1 children and the body of a bucket is
- * at least lowWaterMark bytes. A more strictly enforced requirement is that
- * a non root bucket must have at least one key except in certain transient
- * states.
- *
- * Our btrees support the following primary read operations: finding a
- * specified key; iterating from a starting key to the next or previous
- * ordered key; and skipping from a starting key to another specified key
- * without checking every intermediate key. The primary write operations
- * are insertion and deletion of keys. Insertion may trigger a bucket split
- * if necessary to avoid bucket overflow. In such a case, subsequent splits
- * will occur recursively as necessary. Deletion may trigger a bucket
- * rebalance, in which a size deficient bucket is filled with keys from an
- * adjacent bucket. In this case, splitting may potentially occur in the
- * parent. Deletion may alternatively trigger a merge, in which the keys
- * from two buckets and a key from their shared parent are combined into the
- * same bucket. In such a case, rebalancing or merging may proceed
- * recursively from the parent.
- *
- * While the btree data format has been relatively constant over time, btrees
- * initially created by versions of mongo earlier than the current version
- * may embody different properties than freshly created btrees (while
- * following the same data format). These older btrees are referred to
- * below as legacy btrees.
- */
-
- const int OldBucketSize = 8192;
-
-#pragma pack(1)
- template< class Version > class BucketBasics;
-
- /**
- * This is the fixed width data component for storage of a key within a
- * bucket. It contains an offset pointer to the variable width bson
- * data component. A _KeyNode may be 'unused', please see below.
- */
- template< class Loc >
- struct __KeyNode {
- /** Signals that we are writing this _KeyNode and casts away const */
- __KeyNode<Loc> & writing() const;
- /**
- * The 'left' child bucket of this key. If this is the i-th key, it
- * points to the i index child bucket.
- */
- Loc prevChildBucket;
- /** The location of the record associated with this key. */
- Loc recordLoc;
- short keyDataOfs() const { return (short) _kdo; }
-
- /** Offset within current bucket of the variable width bson key for this _KeyNode. */
- unsigned short _kdo;
- void setKeyDataOfs(short s) {
- _kdo = s;
- verify(s>=0);
- }
- /** Seems to be redundant. */
- void setKeyDataOfsSavingUse(short s) {
- _kdo = s;
- verify(s>=0);
- }
- /**
- * Unused keys are not returned by read operations. Keys may be marked
- * as unused in cases where it is difficult to delete them while
- * maintaining the constraints required of a btree.
- *
- * Setting ofs to odd is the sentinel for unused, as real recordLoc's
- * are always even numbers. Note we need to keep its value basically
- * the same as we use the recordLoc as part of the key in the index
- * (to handle duplicate keys efficiently).
- *
- * Flagging keys as unused is a feature that is being phased out in favor
- * of deleting the keys outright. The current btree implementation is
- * not expected to mark a key as unused in a non legacy btree.
- */
- void setUnused() {
- recordLoc.GETOFS() |= 1;
- }
- void setUsed() { recordLoc.GETOFS() &= ~1; }
- int isUnused() const {
- return recordLoc.getOfs() & 1;
- }
- int isUsed() const {
- return !isUnused();
- }
- };
-
- /**
- * This structure represents header data for a btree bucket. An object of
- * this type is typically allocated inside of a buffer of size BucketSize,
- * resulting in a full bucket with an appropriate header.
- *
- * The body of a btree bucket contains an array of _KeyNode objects starting
- * from its lowest indexed bytes and growing to higher indexed bytes. The
- * body also contains variable width bson keys, which are allocated from the
- * highest indexed bytes toward lower indexed bytes.
- *
- * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
- * h = header data
- * k = KeyNode data
- * - = empty space
- * b = bson key data
- * u = unused (old) bson key data, that may be garbage collected
- */
- class BtreeData_V0 {
- protected:
- /** Parent bucket of this bucket, which isNull() for the root bucket. */
- DiskLoc parent;
- /** Given that there are n keys, this is the n index child. */
- DiskLoc nextChild;
- /** can be reused, value is 8192 in current pdfile version Apr2010 */
- unsigned short _wasSize;
- /** zero */
- unsigned short _reserved1;
- int flags;
-
- void _init() {
- _reserved1 = 0;
- _wasSize = BucketSize;
- reserved = 0;
- }
-
- /** basicInsert() assumes the next three members are consecutive and in this order: */
-
- /** Size of the empty region. */
- int emptySize;
- /** Size used for bson storage, including storage of old keys. */
- int topSize;
- /* Number of keys in the bucket. */
- int n;
-
- int reserved;
- /* Beginning of the bucket's body */
- char data[4];
-
- public:
- typedef __KeyNode<DiskLoc> _KeyNode;
- typedef DiskLoc Loc;
- typedef KeyBson Key;
- typedef KeyBson KeyOwned;
- enum { BucketSize = 8192 };
-
- // largest key size we allow. note we very much need to support bigger keys (somehow) in the future.
- static const int KeyMax = OldBucketSize / 10;
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const int INVALID_N_SENTINEL = -1;
- };
-
- // a a a ofs ofs ofs ofs
- class DiskLoc56Bit {
- int ofs;
- unsigned char _a[3];
- unsigned long long Z() const {
- // endian
- unsigned long long result = ofs;
- char* cursor = reinterpret_cast<char *>(&result);
- *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
- *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
- *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
- return result;
- }
- enum {
- // first bit of offsets used in _KeyNode we don't use -1 here.
- OurNullOfs = -2
- };
- public:
- template< class V >
- const BtreeBucket<V> * btree() const {
- return DiskLoc(*this).btree<V>();
- }
- template< class V >
- BtreeBucket<V> * btreemod() const {
- return DiskLoc(*this).btreemod<V>();
- }
- operator const DiskLoc() const {
- // endian
- if( isNull() ) return DiskLoc();
- unsigned a = *((unsigned *) (_a-1));
- return DiskLoc(a >> 8, ofs);
- }
- int& GETOFS() { return ofs; }
- int getOfs() const { return ofs; }
- bool operator<(const DiskLoc56Bit& rhs) const {
- // the orderering of dup keys in btrees isn't too critical, but we'd like to put items that are
- // close together on disk close together in the tree, so we do want the file # to be the most significant
- // bytes
- return Z() < rhs.Z();
- }
- int compare(const DiskLoc56Bit& rhs) const {
- unsigned long long a = Z();
- unsigned long long b = rhs.Z();
- if( a < b ) return -1;
- return a == b ? 0 : 1;
- }
- bool operator==(const DiskLoc56Bit& rhs) const { return Z() == rhs.Z(); }
- bool operator!=(const DiskLoc56Bit& rhs) const { return Z() != rhs.Z(); }
- bool operator==(const DiskLoc& rhs) const {
- return DiskLoc(*this) == rhs;
- }
- bool operator!=(const DiskLoc& rhs) const { return !(*this==rhs); }
- bool isNull() const { return ofs < 0; }
- void Null() {
- ofs = OurNullOfs;
- _a[0] = _a[1] = _a[2] = 0;
- }
- string toString() const { return DiskLoc(*this).toString(); }
- void operator=(const DiskLoc& loc) {
- ofs = loc.getOfs();
- int la = loc.a();
- verify( la <= 0xffffff ); // must fit in 3 bytes
- if( la < 0 ) {
- if ( la != -1 ) {
- log() << "btree diskloc isn't negative 1: " << la << endl;
- verify ( la == -1 );
- }
- la = 0;
- ofs = OurNullOfs;
- }
- memcpy(_a, &la, 3); // endian
- }
- DiskLoc56Bit& writing() const {
- return *((DiskLoc56Bit*) getDur().writingPtr((void*)this, 7));
- }
- };
-
- class BtreeData_V1 {
- public:
- typedef DiskLoc56Bit Loc;
- //typedef DiskLoc Loc;
- typedef __KeyNode<Loc> _KeyNode;
- typedef KeyV1 Key;
- typedef KeyV1Owned KeyOwned;
- enum { BucketSize = 8192-16 }; // leave room for Record header
- // largest key size we allow. note we very much need to support bigger keys (somehow) in the future.
- static const int KeyMax = 1024;
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const unsigned short INVALID_N_SENTINEL = 0xffff;
- protected:
- /** Parent bucket of this bucket, which isNull() for the root bucket. */
- Loc parent;
- /** Given that there are n keys, this is the n index child. */
- Loc nextChild;
-
- unsigned short flags;
-
- /** basicInsert() assumes the next three members are consecutive and in this order: */
-
- /** Size of the empty region. */
- unsigned short emptySize;
- /** Size used for bson storage, including storage of old keys. */
- unsigned short topSize;
- /* Number of keys in the bucket. */
- unsigned short n;
-
- /* Beginning of the bucket's body */
- char data[4];
-
- void _init() { }
- };
-
- typedef BtreeData_V0 V0;
- typedef BtreeData_V1 V1;
-
- /**
- * This class adds functionality to BtreeData for managing a single bucket.
- * The following policies are used in an attempt to encourage simplicity:
- *
- * Const member functions of this class are those which may be called on
- * an object for which writing has not been signaled. Non const member
- * functions may only be called on objects for which writing has been
- * signaled. Note that currently some const functions write to the
- * underlying memory representation of this bucket using optimized methods
- * to signal write operations.
- *
- * DiskLoc parameters that may shadow references within the btree should
- * be passed by value rather than by reference to non const member
- * functions or to const member functions which may perform writes. This way
- * a callee need not worry that write operations will change or invalidate
- * its arguments.
- *
- * The current policy for dealing with bson arguments is the opposite of
- * what is described above for DiskLoc arguments. We do not want to copy
- * bson into memory as an intermediate step for btree changes, and if bson
- * is to be moved it must be copied to the new location before the old
- * location is invalidated. Care should be taken in cases where that invalid
- * memory may be implicitly referenced by function arguments.
- *
- * A number of functions below require a thisLoc argument, which must be the
- * disk location of the bucket mapped to 'this'.
- */
- template< class Version >
- class BucketBasics : public Version {
- public:
- template <class U> friend class BtreeBuilder;
- typedef typename Version::Key Key;
- typedef typename Version::_KeyNode _KeyNode;
- typedef typename Version::Loc Loc;
-
- int getN() const { return this->n; }
-
- /**
- * This is an in memory wrapper for a _KeyNode, and not itself part of btree
- * storage. This object and its BSONObj 'key' will become invalid if the
- * _KeyNode data that generated it is moved within the btree. In general,
- * a KeyNode should not be expected to be valid after a write.
- */
- class KeyNode {
- public:
- KeyNode(const BucketBasics<Version>& bb, const _KeyNode &k);
- const Loc& prevChildBucket;
- const Loc& recordLoc;
- /* Points to the bson key storage for a _KeyNode */
- Key key;
- };
- friend class KeyNode;
-
- /** Assert write intent declared for this bucket already. */
- void assertWritable();
-
- void assertValid(const Ordering &order, bool force = false) const;
- void assertValid(const BSONObj &orderObj, bool force = false) const { return assertValid(Ordering::make(orderObj),force); }
-
- /**
- * @return KeyNode for key at index i. The KeyNode will become invalid
- * if the key is moved or reassigned, or if the node is packed. In general
- * a KeyNode should not be expected to be valid after a write.
- */
- const KeyNode keyNode(int i) const {
- if ( i >= this->n ) {
- massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << this->n ).jsonString() , i < this->n );
- }
- return KeyNode(*this, k(i));
- }
-
- static int headerSize() {
- const BucketBasics *d = 0;
- return (char*)&(d->data) - (char*)&(d->parent);
- }
- static int bodySize() { return Version::BucketSize - headerSize(); }
- static int lowWaterMark() { return bodySize() / 2 - Version::KeyMax - sizeof( _KeyNode ) + 1; } // see comment in btree.cpp
-
- // for testing
- int nKeys() const { return this->n; }
- const DiskLoc getNextChild() const { return this->nextChild; }
-
- // for tree inspection and statistical analysis
- // NOTE: topSize and emptySize have different types in BtreeData_V0 and BtreeData_V1
-
- /** Size used for bson storage, including storage of old keys. */
- unsigned int getTopSize() const { return static_cast<unsigned int>(this->topSize); }
- /** Size of the empty region. */
- unsigned int getEmptySize() const { return static_cast<unsigned int>(this->emptySize); }
-
- protected:
- char * dataAt(short ofs) { return this->data + ofs; }
-
- /** Initialize the header for a new node. */
- void init();
-
- /**
- * Preconditions:
- * - 0 <= keypos <= n
- * - If key is inserted at position keypos, the bucket's keys will still be
- * in order.
- * Postconditions:
- * - If key can fit in the bucket, the bucket may be packed and keypos
- * may be decreased to reflect deletion of earlier indexed keys during
- * packing, the key will be inserted at the updated keypos index with
- * a null prevChildBucket, the subsequent keys shifted to the right,
- * and the function will return true.
- * - If key cannot fit in the bucket, the bucket will be packed and
- * the function will return false.
- * Although this function is marked const, it modifies the underlying
- * btree representation through an optimized write intent mechanism.
- */
- bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const Key& key, const Ordering &order) const;
-
- /**
- * Preconditions:
- * - key / recordLoc are > all existing keys
- * - The keys in prevChild and their descendants are between all existing
- * keys and 'key'.
- * Postconditions:
- * - If there is space for key without packing, it is inserted as the
- * last key with specified prevChild and true is returned.
- * Importantly, nextChild is not updated!
- * - Otherwise false is returned and there is no change.
- */
- bool _pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild);
- void pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild) {
- bool ok = _pushBack( recordLoc , key , order , prevChild );
- verify(ok);
- }
-
- /**
- * This is a special purpose function used by BtreeBuilder. The
- * interface is quite dangerous if you're not careful. The bson key
- * returned here points to bucket memory that has been invalidated but
- * not yet reclaimed.
- *
- * TODO Maybe this could be replaced with two functions, one which
- * returns the last key without deleting it and another which simply
- * deletes the last key. Then the caller would have enough control to
- * ensure proper memory integrity.
- *
- * Preconditions:
- * - bucket is not empty
- * - last key of bucket is used (not unused)
- * - nextChild isNull()
- * - _unalloc will work correctly as used - see code
- * Postconditions:
- * - The last key of the bucket is removed, and its key and recLoc are
- * returned. As mentioned above, the key points to unallocated memory.
- */
- void popBack(DiskLoc& recLoc, Key &key);
-
- /**
- * Preconditions:
- * - 0 <= keypos < n
- * - there is no child bucket at keypos
- * - n > 1
- * - if mayEmpty == false or nextChild.isNull(), n > 0
- * Postconditions:
- * - The key at keypos is removed, and remaining keys are shifted over.
- * - The bucket becomes unpacked.
- * - if mayEmpty is true and nextChild.isNull(), the bucket may have no keys.
- */
- void _delKeyAtPos(int keypos, bool mayEmpty = false);
-
- /* !Packed means there is deleted fragment space within the bucket.
- We "repack" when we run out of space before considering the node
- to be full.
- */
- enum Flags { Packed=1 };
-
- /** n == 0 is ok */
- const Loc& childForPos(int p) const { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
- Loc& childForPos(int p) { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
-
- /** Same as bodySize(). */
- int totalDataSize() const;
- /**
- * @return true when a key may be dropped by pack()
- * @param index index of the key that may be dropped
- * @param refPos index of a particular key of interest, which must not
- * be dropped; = 0 to safely ignore
- */
- bool mayDropKey( int index, int refPos ) const;
-
- /**
- * Pack the bucket to reclaim space from invalidated memory.
- * @refPos is an index in the bucket which may be updated if we
- * delete keys from the bucket
- * This function may cast away const and perform a write.
- * Preconditions: none
- * Postconditions:
- * - Bucket will be packed
- * - Some unused nodes may be dropped, but not ones at index 0 or refPos
- * - Some used nodes may be moved
- * - If refPos is the index of an existing key, it will be updated to that
- * key's new index if the key is moved.
- */
- void _pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const;
- /** Pack when already writable */
- void _packReadyForMod(const Ordering &order, int &refPos);
-
- /** @return the size the bucket's body would have if we were to call pack() */
- int packedDataSize( int refPos ) const;
- void setNotPacked() { this->flags &= ~Packed; }
- void setPacked() { this->flags |= Packed; }
- /**
- * Preconditions: 'bytes' is <= emptySize
- * Postconditions: A buffer of size 'bytes' is allocated on the top side,
- * and its offset is returned.
- */
- int _alloc(int bytes);
- /**
- * This function can be used to deallocate the lowest byte index bson
- * buffer in the top region, which in some but not all cases is for the
- * n - 1 index key. This function only works correctly in certain
- * special cases, please be careful.
- * Preconditions: 'bytes' <= topSize
- * Postconditions: The top region is decreased
- */
- void _unalloc(int bytes);
- /**
- * Preconditions: 'N' <= n
- * Postconditions:
- * - All keys after the N index key are dropped.
- * - Then bucket is packed, without dropping refPos if < refPos N.
- */
- void truncateTo(int N, const Ordering &order, int &refPos);
- /**
- * Preconditions:
- * - 'nDrop' < n
- * - for now, refPos should be zero.
- * Postconditions:
- * - All keys before the nDrop index key are dropped.
- * - The bucket is packed.
- */
- void dropFront(int nDrop, const Ordering &order, int &refPos);
- /**
- * Preconditions: 0 <= keypos < n
- * Postconditions: keypos indexed key is marked unused.
- */
- void markUnused(int keypos);
-
- /**
- * BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
- * we use tempNext() when we do that to be less confusing. (one might have written a union in C)
- */
- DiskLoc tempNext() const { return this->parent; }
- void setTempNext(DiskLoc l) { this->parent = l; }
-
- void _shape(int level, stringstream&) const;
- int Size() const;
-
- /** @return i-indexed _KeyNode, without bounds checking */
- public:
- const _KeyNode& k(int i) const { return ((const _KeyNode*)this->data)[i]; }
- _KeyNode& _k(int i) { return ((_KeyNode*)this->data)[i]; }
- protected:
- _KeyNode& k(int i) { return ((_KeyNode*)this->data)[i]; }
-
- /**
- * Preconditions: 'this' is packed
- * @return the key index to be promoted on split
- * @param keypos The requested index of a key to insert, which may affect
- * the choice of split position.
- */
- int splitPos( int keypos ) const;
-
- /**
- * Preconditions: nAdd * sizeof( _KeyNode ) <= emptySize
- * Postconditions:
- * - Increases indexes of existing _KeyNode objects by nAdd, reserving
- * space for additional _KeyNode objects at front.
- * - Does not initialize ofs values for the bson data of these
- * _KeyNode objects.
- */
- void reserveKeysFront( int nAdd );
-
- /**
- * Preconditions:
- * - 0 <= i < n
- * - The bson 'key' must fit in the bucket without packing.
- * - If 'key' and 'prevChildBucket' are set at index i, the btree
- * ordering properties will be maintained.
- * Postconditions:
- * - The specified key is set at index i, replacing the existing
- * _KeyNode data and without shifting any other _KeyNode objects.
- */
- void setKey( int i, const DiskLoc recordLoc, const Key& key, const DiskLoc prevChildBucket );
- };
-
- /**
- * This class adds functionality for manipulating buckets that are assembled
- * in a tree. The requirements for const and non const functions and
- * arguments are generally the same as in BtreeBucket. Because this class
- * deals with tree structure, some functions that are marked const may
- * trigger modification of another node in the btree or potentially of the
- * current node. In such cases, the function's implementation explicitly
- * casts away const when indicating an intent to write to the durability
- * layer. The DiskLocs provided to such functions should be passed by
- * value if they shadow pointers within the btree.
- *
- * To clarify enforcement of referential integrity in this implementation,
- * we use the following pattern when deleting data we have a persistent
- * pointer to. The pointer is cleared or removed explicitly, then the data
- * it pointed to is cleaned up with a helper function.
- *
- * TODO It might make sense to put some of these functions in a class
- * representing a full btree instead of a single btree bucket. That would
- * allow us to use the const qualifier in a manner more consistent with
- * standard usage. Right now the interface is for both a node and a tree,
- * so assignment of const is sometimes nonideal.
- *
- * TODO There are several cases in which the 'this' pointer is invalidated
- * as a result of deallocation. A separate class representing a btree would
- * alleviate some fragile cases where the implementation must currently
- * behave correctly if the 'this' pointer is suddenly invalidated by a
- * callee.
- */
- template< class V >
- class BtreeBucket : public BucketBasics<V> {
- friend class BtreeCursor;
- public:
- // make compiler happy:
- typedef typename V::Key Key;
- typedef typename V::KeyOwned KeyOwned;
- typedef typename BucketBasics<V>::KeyNode KeyNode;
- typedef typename BucketBasics<V>::_KeyNode _KeyNode;
- typedef typename BucketBasics<V>::Loc Loc;
- const _KeyNode& k(int i) const { return static_cast< const BucketBasics<V> * >(this)->k(i); }
- protected:
- _KeyNode& k(int i) { return static_cast< BucketBasics<V> * >(this)->_k(i); }
- public:
-
- static const BtreeBucket<V>* asVersion( Record* rec );
- static BtreeBucket<V>* asVersionMod( Record* rec );
-
- const KeyNode keyNode(int i) const { return static_cast< const BucketBasics<V> * >(this)->keyNode(i); }
-
- bool isHead() const { return this->parent.isNull(); }
- void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const;
- long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount = 0, bool strict = false, unsigned depth=0) const; /* traverses everything */
-
- bool isUsed( int i ) const { return this->k(i).isUsed(); }
- string bucketSummary() const;
- void dump(unsigned depth=0) const;
-
- /**
- * @return true if key exists in index
- *
- * @order - indicates order of keys in the index. this is basically the index's key pattern, e.g.:
- * BSONObj order = ((IndexDetails&)idx).keyPattern();
- * likewise below in bt_insert() etc.
- */
- private:
- bool exists(const IndexCatalogEntry* btreeState, const DiskLoc &thisLoc, const Key& key ) const;
- public:
-
- /**
- * @param self - Don't complain about ourself already being in the index case.
- * @return true = There is a duplicate used key.
- */
- bool wouldCreateDup(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const Key& key,
- const DiskLoc& self) const;
-
- /**
- * Preconditions: none
- * Postconditions: @return a new bucket allocated from pdfile storage
- * and init()-ed. This bucket is suitable to for use as a new root
- * or any other new node in the tree.
- */
- static DiskLoc addBucket(IndexCatalogEntry* btreeState);
-
- /**
- * Preconditions: none
- * Postconditions:
- * - Some header values in this bucket are cleared, and the bucket is
- * deallocated from pdfile storage.
- * - The memory at thisLoc is invalidated, and 'this' is invalidated.
- */
- void deallocBucket(IndexCatalogEntry* btreeState, const DiskLoc thisLoc );
-
- /**
- * Preconditions:
- * - 'key' has a valid schema for this index.
- * - All other paramenters are valid and consistent with this index if applicable.
- * Postconditions:
- * - If key is bigger than KeyMax, @return 2 or 3 and no change.
- * - If key / recordLoc exist in the btree as an unused key, set them
- * as used and @return 0
- * - If key / recordLoc exist in the btree as a used key, @throw
- * exception 10287 and no change.
- * - If key / recordLoc do not exist in the btree, they are inserted
- * and @return 0. The root of the btree may be changed, so
- * 'this'/thisLoc may no longer be the root upon return.
- */
- int bt_insert(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- const DiskLoc recordLoc,
- const BSONObj& key,
- bool dupsallowed,
- bool toplevel) const;
- /**
- * Preconditions:
- * - 'key' has a valid schema for this index, and may have objsize() > KeyMax.
- * Postconditions:
- * - If key / recordLoc are in the btree, they are removed (possibly
- * by being marked as an unused key), @return true, and potentially
- * invalidate 'this' / thisLoc and change the head.
- * - If key / recordLoc are not in the btree, @return false and do nothing.
- */
- bool unindex(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- const BSONObj& key,
- const DiskLoc recordLoc) const;
-
- /**
- * locate may return an "unused" key that is just a marker. so be careful.
- * looks for a key:recordloc pair.
- *
- * @found - returns true if exact match found. note you can get back a position
- * result even if found is false.
- */
- DiskLoc locate(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const BSONObj& key,
- int& pos,
- bool& found,
- const DiskLoc& recordLoc,
- int direction=1) const;
-
- DiskLoc locate(const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const Key& key,
- int& pos,
- bool& found,
- const DiskLoc& recordLoc,
- int direction=1) const;
-
- /**
- * find the first instance of the key
- * does not handle dups
- * WARNING: findSingle may not be compound index safe. this may need to change. see notes in
- * findSingle code.
- * @return the record location of the first match
- */
- DiskLoc findSingle( const IndexCatalogEntry* btreeState,
- const DiskLoc& thisLoc,
- const BSONObj& key ) const;
-
- /**
- * Advance to next or previous key in the index.
- * @param direction to advance.
- */
- DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const;
-
- /** Advance in specified direction to the specified key */
- void advanceTo(const IndexCatalogEntry* btreeState,
- DiskLoc &thisLoc,
- int &keyOfs,
- const BSONObj &keyBegin,
- int keyBeginLen,
- bool afterKey,
- const vector<const BSONElement*>& keyEnd,
- const vector<bool>& keyEndInclusive,
- int direction) const;
-
- /** Locate a key with fields comprised of a combination of keyBegin fields and keyEnd fields. */
- static void customLocate(const IndexCatalogEntry* btreeState,
- DiskLoc& locInOut,
- int& keyOfs,
- const BSONObj& keyBegin,
- int keyBeginLen, bool afterVersion,
- const vector<const BSONElement*>& keyEnd,
- const vector<bool>& keyEndInclusive,
- int direction,
- pair<DiskLoc, int>& bestParent);
-
- /** @return head of the btree by traversing from current bucket. */
- const DiskLoc getHead(const DiskLoc& thisLoc) const;
-
- /** get tree shape */
- void shape(stringstream&) const;
-
- static int getKeyMax();
-
- protected:
- /**
- * Preconditions:
- * - 0 <= firstIndex <= n
- * - -1 <= lastIndex <= n ( -1 is equivalent to n )
- * Postconditions:
- * - Any children at indexes firstIndex through lastIndex (inclusive)
- * will have their parent pointers set to thisLoc.
- */
- void fixParentPtrs(const DiskLoc thisLoc, int firstIndex = 0, int lastIndex = -1) const;
-
- /**
- * Preconditions:
- * - thisLoc is not the btree head.
- * - n == 0 is ok
- * Postconditions:
- * - All cursors pointing to this bucket will be updated.
- * - This bucket's parent's child pointer is set to null.
- * - This bucket is deallocated from pdfile storage.
- * - 'this' and thisLoc are invalidated.
- */
- void delBucket(IndexCatalogEntry* btreeState, const DiskLoc thisLoc );
-
- /**
- * Preconditions: 0 <= p < n
- * Postconditions:
- * - The key at index p is removed from the btree.
- * - 'this' and thisLoc may be invalidated.
- * - The tree head may change.
- */
- void delKeyAtPos(IndexCatalogEntry* btreeSate,
- const DiskLoc thisLoc,
- int p );
-
- /**
- * Preconditions:
- * - n == 0 is ok
- * Postconditions:
- * - If thisLoc is head, or if its body has at least lowWaterMark bytes,
- * return false and do nothing.
- * - Otherwise, if thisLoc has left or right neighbors, either balance
- * or merge with them and return true. Also, 'this' and thisLoc may
- * be invalidated and the tree head may change.
- */
- bool mayBalanceWithNeighbors(IndexCatalogEntry* btreeState, const DiskLoc thisLoc);
-
- /**
- * Preconditions:
- * - 0 <= leftIndex < n
- * - The child at leftIndex or the child at leftIndex + 1 contains
- * fewer than lowWaterMark bytes.
- * Postconditions:
- * - If the child bucket at leftIndex can merge with the child index
- * at leftIndex + 1, do nothing and return false.
- * - Otherwise, balance keys between the leftIndex child and the
- * leftIndex + 1 child, return true, and possibly change the tree head.
- */
- bool tryBalanceChildren(IndexCatalogEntry* btreeState, const DiskLoc thisLoc, int leftIndex) const;
-
- /**
- * Preconditions:
- * - All preconditions of tryBalanceChildren.
- * - The leftIndex child and leftIndex + 1 child cannot be merged.
- * Postconditions:
- * - Keys are moved between the leftIndex child and the leftIndex + 1
- * child such that neither child has fewer than lowWaterMark bytes.
- * The tree head may change.
- */
- void doBalanceChildren( IndexCatalogEntry* btreeState, const DiskLoc thisLoc, int leftIndex );
-
- /**
- * Preconditions:
- * - All preconditions of doBalanceChildren
- * - The leftIndex and leftIndex + 1 children are packed.
- * - The leftIndex + 1 child has fewer than lowWaterMark bytes.
- * - split returned by rebalancedSeparatorPos()
- * Postconditions:
- * - The key in lchild at index split is set as thisLoc's key at index
- * leftIndex, which may trigger a split and change the tree head.
- * The previous key in thisLoc at index leftIndex and all keys with
- * indexes greater than split in lchild are moved to rchild.
- */
- void doBalanceLeftToRight( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int leftIndex, int split,
- BtreeBucket<V> *l, const DiskLoc lchild,
- BtreeBucket<V> *r, const DiskLoc rchild );
- /**
- * Preconditions:
- * - All preconditions of doBalanceChildren
- * - The leftIndex and leftIndex + 1 children are packed.
- * - The leftIndex child has fewer than lowWaterMark bytes.
- * - split returned by rebalancedSeparatorPos()
- * Postconditions:
- * - The key in rchild at index split - l->n - 1 is set as thisLoc's key
- * at index leftIndex, which may trigger a split and change the tree
- * head. The previous key in thisLoc at index leftIndex and all keys
- * with indexes less than split - l->n - 1 in rchild are moved to
- * lchild.
- */
- void doBalanceRightToLeft( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int leftIndex, int split,
- BtreeBucket<V> *l, const DiskLoc lchild,
- BtreeBucket<V> *r, const DiskLoc rchild );
-
- /**
- * Preconditions:
- * - 0 <= leftIndex < n
- * - this->canMergeChildren( thisLoc, leftIndex ) == true
- * Postconditions:
- * - All of the above mentioned keys will be placed in the left child.
- * - The tree may be updated recursively, resulting in 'this' and
- * thisLoc being invalidated and the tree head being changed.
- */
- void doMergeChildren(IndexCatalogEntry* btreeState,const DiskLoc thisLoc, int leftIndex );
-
- /**
- * Preconditions:
- * - n == 0
- * - !nextChild.isNull()
- * Postconditions:
- * - 'this' and thisLoc are deallocated (and invalidated), any cursors
- * to them are updated, and the tree head may change.
- * - nextChild replaces thisLoc in the btree structure.
- */
- void replaceWithNextChild( IndexCatalogEntry* btreeState, const DiskLoc thisLoc );
-
- /**
- * @return true iff the leftIndex and leftIndex + 1 children both exist,
- * and if their body sizes when packed and the thisLoc key at leftIndex
- * would fit in a single bucket body.
- */
- bool canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const;
-
- /**
- * Preconditions:
- * - leftIndex and leftIndex + 1 children are packed
- * - leftIndex or leftIndex + 1 child is below lowWaterMark
- * @return index of the rebalanced separator; the index value is
- * determined as if we had a bucket with body
- * <left bucket keys array>.push( <old separator> ).concat( <right bucket keys array> )
- * and called splitPos( 0 ) on it.
- */
- int rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const;
-
- /**
- * Preconditions: thisLoc has a parent
- * @return parent's index of thisLoc.
- */
- int indexInParent( const DiskLoc &thisLoc ) const;
-
- public:
- Key keyAt(int i) const {
- if( i >= this->n )
- return Key();
- return Key(this->data + k(i).keyDataOfs());
- }
- protected:
-
- /**
- * Preconditions:
- * - This bucket is packed.
- * - Cannot add a key of size KeyMax to this bucket.
- * - 0 <= keypos <= n is the position of a new key that will be inserted
- * - lchild is equal to the existing child at index keypos.
- * Postconditions:
- * - The thisLoc bucket is split into two packed buckets, possibly
- * invalidating the initial position of keypos, with a split key
- * promoted to the parent. The new key key/recordLoc will be inserted
- * into one of the split buckets, and lchild/rchild set appropriately.
- * Splitting may occur recursively, possibly changing the tree head.
- */
- void split(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int keypos,
- const DiskLoc recordLoc, const Key& key,
- const DiskLoc lchild, const DiskLoc rchild);
-
- /**
- * Preconditions:
- * - 0 <= keypos <= n
- * - If key / recordLoc are inserted at position keypos, with provided
- * lchild and rchild, the btree ordering requirements will be
- * maintained.
- * - lchild is equal to the existing child at index keypos.
- * - n == 0 is ok.
- * Postconditions:
- * - The key / recordLoc are inserted at position keypos, and the
- * bucket is split if necessary, which may change the tree head.
- * - The bucket may be packed or split, invalidating the specified value
- * of keypos.
- * This function will always modify thisLoc, but it's marked const because
- * it commonly relies on the specialized writ]e intent mechanism of basicInsert().
- */
- void insertHere(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, int keypos,
- const DiskLoc recordLoc, const Key& key,
- const DiskLoc lchild, const DiskLoc rchild ) const;
-
- /** bt_insert() is basically just a wrapper around this. */
- int _insert(IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc, const DiskLoc recordLoc,
- const Key& key, bool dupsallowed,
- const DiskLoc lChild, const DiskLoc rChild ) const;
-
- bool find(const IndexCatalogEntry* btreeState,
- const Key& key,
- const DiskLoc &recordLoc,
- int& pos,
- bool assertIfDup) const;
-
- static bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) ;
- static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
- static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction );
-
- /** If child is non null, set its parent to thisLoc */
- static void fix(const DiskLoc thisLoc, const DiskLoc child);
-
- /**
- * Preconditions:
- * - 0 <= keypos < n
- * - If the specified key and recordLoc are placed in keypos of thisLoc,
- * and lchild and rchild are set, the btree ordering properties will
- * be maintained.
- * - rchild == childForPos( keypos + 1 )
- * - childForPos( keypos ) is referenced elsewhere if nonnull.
- * Postconditions:
- * - The key at keypos will be replaced with the specified key and
- * lchild, potentially splitting this bucket and changing the tree
- * head.
- * - childForPos( keypos ) will be orphaned.
- */
- void setInternalKey( IndexCatalogEntry* btreeState,
- const DiskLoc thisLoc,
- int keypos,
- const DiskLoc recordLoc,
- const Key &key,
- const DiskLoc lchild,
- const DiskLoc rchild );
-
- /**
- * Preconditions:
- * - 0 <= keypos < n
- * - The keypos or keypos+1 indexed child is non null.
- * Postconditions:
- * - The specified key is deleted by replacing it with another key if
- * possible. This replacement may cause a split and change the tree
- * head. The replacement key will be deleted from its original
- * location, potentially causing merges and splits that may invalidate
- * 'this' and thisLoc and change the tree head.
- * - If the key cannot be replaced, it will be marked as unused. This
- * is only expected in legacy btrees.
- */
- void deleteInternalKey( IndexCatalogEntry* btreeState, const DiskLoc thisLoc, int keypos );
- public:
- /** simply builds and returns a dup key error message string */
- static string dupKeyError( const IndexDescriptor* idx , const Key& key );
- };
-#pragma pack()
-
- template< class V >
- BucketBasics<V>::KeyNode::KeyNode(const BucketBasics<V>& bb, const _KeyNode &k) :
- prevChildBucket(k.prevChildBucket),
- recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
- { }
-
- template< class V >
- const BtreeBucket<V> * DiskLoc::btree() const {
- verify( _a != -1 );
- Record *r = rec();
- return (const BtreeBucket<V> *) r->data();
- }
-
- /**
- * give us a writable version of the btree bucket (declares write intent).
- * note it is likely more efficient to declare write intent on something smaller when you can.
- */
- template< class V >
- BtreeBucket<V> * DiskLoc::btreemod() const {
- verify( _a != -1 );
- BtreeBucket<V> *b = const_cast< BtreeBucket<V> * >( btree<V>() );
- return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, V::BucketSize ) );
- }
-
-
-
-} // namespace mongo;
diff --git a/src/mongo/dbtests/indexupdatetests.cpp b/src/mongo/dbtests/indexupdatetests.cpp
index d28edbf2ab4..fa5b119922c 100644
--- a/src/mongo/dbtests/indexupdatetests.cpp
+++ b/src/mongo/dbtests/indexupdatetests.cpp
@@ -28,7 +28,6 @@
* then also delete it in the license file.
*/
-#include "mongo/db/structure/btree/btree.h"
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/index/btree_based_bulk_access_method.h"
diff --git a/src/mongo/dbtests/test.vcxproj b/src/mongo/dbtests/test.vcxproj
index ced56ad2b7b..4ae594420e2 100644
--- a/src/mongo/dbtests/test.vcxproj
+++ b/src/mongo/dbtests/test.vcxproj
@@ -987,7 +987,6 @@ cscript //Nologo "$(ProjectDir)..\shell\createCPPfromJavaScriptFiles.js" "$(Proj
<ClInclude Include="..\..\third_party\pcre-8.30\pcre.h" />
<ClInclude Include="..\client\connpool.h" />
<ClInclude Include="..\client\dbclient.h" />
- <ClInclude Include="..\db\btree.h" />
<ClInclude Include="..\db\clientcursor.h" />
<ClInclude Include="..\db\cmdline.h" />
<ClInclude Include="..\db\commands.h" />
@@ -3621,7 +3620,6 @@ cscript //Nologo "$(ProjectDir)..\shell\createCPPfromJavaScriptFiles.js" "$(Proj
<ClCompile Include="..\client\connpool.cpp" />
<ClCompile Include="..\client\dbclient.cpp" />
<ClCompile Include="..\client\syncclusterconnection.cpp" />
- <ClCompile Include="..\db\btree.cpp" />
<ClCompile Include="..\db\btreecursor.cpp" />
<ClCompile Include="..\pch.cpp" />
<ClCompile Include="..\db\client.cpp" />
diff --git a/src/mongo/dbtests/test.vcxproj.filters b/src/mongo/dbtests/test.vcxproj.filters
index 4082afd669a..30e8d34eb63 100755
--- a/src/mongo/dbtests/test.vcxproj.filters
+++ b/src/mongo/dbtests/test.vcxproj.filters
@@ -526,9 +526,6 @@
<ClInclude Include="..\db\mongomutex.h">
<Filter>db\Header Files\e to n</Filter>
</ClInclude>
- <ClInclude Include="..\db\btree.h">
- <Filter>db\Header Files\a to d</Filter>
- </ClInclude>
<ClInclude Include="..\db\durop.h">
<Filter>db\Header Files\a to d</Filter>
</ClInclude>
@@ -3327,9 +3324,6 @@
<ClCompile Include="..\db\repl\rs_rollback.cpp">
<Filter>db\repl</Filter>
</ClCompile>
- <ClCompile Include="..\db\btree.cpp">
- <Filter>db\Source Files\a to d</Filter>
- </ClCompile>
<ClCompile Include="..\db\dur_writetodatafiles.cpp">
<Filter>db\Source Files\a to d</Filter>
</ClCompile>