summaryrefslogtreecommitdiff
path: root/src/mongo/db/btree.h
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:26 -0500
committerEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:45 -0500
commitae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba (patch)
tree92f8e1649e6f080b251ff5f1763679a72eb59b34 /src/mongo/db/btree.h
parentdfa4cd7e2cf109b072440155fabc08a93c8045a0 (diff)
downloadmongo-ae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba.tar.gz
bulk move of code to src/ SERVER-4551
Diffstat (limited to 'src/mongo/db/btree.h')
-rw-r--r--src/mongo/db/btree.h1174
1 files changed, 1174 insertions, 0 deletions
diff --git a/src/mongo/db/btree.h b/src/mongo/db/btree.h
new file mode 100644
index 00000000000..85e5172d163
--- /dev/null
+++ b/src/mongo/db/btree.h
@@ -0,0 +1,1174 @@
+// btree.h
+
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../pch.h"
+#include "jsobj.h"
+#include "diskloc.h"
+#include "pdfile.h"
+#include "key.h"
+
+namespace mongo {
+
+ /**
+ * Our btree implementation generally follows the standard btree algorithm,
+ * which is described in many places. The nodes of our btree are referred to
+ * as buckets below. These buckets are of size BucketSize and their body is
+ * an ordered array of <bson key, disk loc> pairs, where disk loc is the disk
+ * location of a document and bson key is a projection of this document into
+ * the schema of the index for this btree. Ordering is determined on the
+ * basis of bson key first and then disk loc in case of a tie. All bson keys
+ * for a btree have identical schemas with empty string field names and may
+ * not have an objsize() exceeding KeyMax. The btree's buckets are
+ * themselves organized into an ordered tree. Although there are exceptions,
+ * generally buckets with n keys have n+1 children and the body of a bucket is
+ * at least lowWaterMark bytes. A more strictly enforced requirement is that
+ * a non root bucket must have at least one key except in certain transient
+ * states.
+ *
+ * Our btrees support the following primary read operations: finding a
+ * specified key; iterating from a starting key to the next or previous
+ * ordered key; and skipping from a starting key to another specified key
+ * without checking every intermediate key. The primary write operations
+ * are insertion and deletion of keys. Insertion may trigger a bucket split
+ * if necessary to avoid bucket overflow. In such a case, subsequent splits
+ * will occur recursively as necessary. Deletion may trigger a bucket
+ * rebalance, in which a size deficient bucket is filled with keys from an
+ * adjacent bucket. In this case, splitting may potentially occur in the
+ * parent. Deletion may alternatively trigger a merge, in which the keys
+ * from two buckets and a key from their shared parent are combined into the
+ * same bucket. In such a case, rebalancing or merging may proceed
+ * recursively from the parent.
+ *
+ * While the btree data format has been relatively constant over time, btrees
+ * initially created by versions of mongo earlier than the current version
+ * may embody different properties than freshly created btrees (while
+ * following the same data format). These older btrees are referred to
+ * below as legacy btrees.
+ */
+
+ const int OldBucketSize = 8192;
+
+#pragma pack(1)
+ template< class Version > class BucketBasics;
+
+ /**
+ * This is the fixed width data component for storage of a key within a
+ * bucket. It contains an offset pointer to the variable width bson
+ * data component. A _KeyNode may be 'unused', please see below.
+ */
+ template< class Loc >
+ struct __KeyNode {
+ /** Signals that we are writing this _KeyNode and casts away const */
+ __KeyNode<Loc> & writing() const;
+ /**
+ * The 'left' child bucket of this key. If this is the i-th key, it
+ * points to the i index child bucket.
+ */
+ Loc prevChildBucket;
+ /** The location of the record associated with this key. */
+ Loc recordLoc;
+ short keyDataOfs() const { return (short) _kdo; }
+
+ /** Offset within current bucket of the variable width bson key for this _KeyNode. */
+ unsigned short _kdo;
+ void setKeyDataOfs(short s) {
+ _kdo = s;
+ assert(s>=0);
+ }
+ /** Seems to be redundant. */
+ void setKeyDataOfsSavingUse(short s) {
+ _kdo = s;
+ assert(s>=0);
+ }
+ /**
+ * Unused keys are not returned by read operations. Keys may be marked
+ * as unused in cases where it is difficult to delete them while
+ * maintaining the constraints required of a btree.
+ *
+ * Setting ofs to odd is the sentinel for unused, as real recordLoc's
+ * are always even numbers. Note we need to keep its value basically
+ * the same as we use the recordLoc as part of the key in the index
+ * (to handle duplicate keys efficiently).
+ *
+ * Flagging keys as unused is a feature that is being phased out in favor
+ * of deleting the keys outright. The current btree implementation is
+ * not expected to mark a key as unused in a non legacy btree.
+ */
+ void setUnused() {
+ recordLoc.GETOFS() |= 1;
+ }
+ void setUsed() { recordLoc.GETOFS() &= ~1; }
+ int isUnused() const {
+ return recordLoc.getOfs() & 1;
+ }
+ int isUsed() const {
+ return !isUnused();
+ }
+ };
+
+ /**
+ * This structure represents header data for a btree bucket. An object of
+ * this type is typically allocated inside of a buffer of size BucketSize,
+ * resulting in a full bucket with an appropriate header.
+ *
+ * The body of a btree bucket contains an array of _KeyNode objects starting
+ * from its lowest indexed bytes and growing to higher indexed bytes. The
+ * body also contains variable width bson keys, which are allocated from the
+ * highest indexed bytes toward lower indexed bytes.
+ *
+ * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
+ * h = header data
+ * k = KeyNode data
+ * - = empty space
+ * b = bson key data
+ * u = unused (old) bson key data, that may be garbage collected
+ */
+ class BtreeData_V0 {
+ protected:
+ /** Parent bucket of this bucket, which isNull() for the root bucket. */
+ DiskLoc parent;
+ /** Given that there are n keys, this is the n index child. */
+ DiskLoc nextChild;
+ /** can be reused, value is 8192 in current pdfile version Apr2010 */
+ unsigned short _wasSize;
+ /** zero */
+ unsigned short _reserved1;
+ int flags;
+
+ void _init() {
+ _reserved1 = 0;
+ _wasSize = BucketSize;
+ reserved = 0;
+ }
+
+ /** basicInsert() assumes the next three members are consecutive and in this order: */
+
+ /** Size of the empty region. */
+ int emptySize;
+ /** Size used for bson storage, including storage of old keys. */
+ int topSize;
+ /* Number of keys in the bucket. */
+ int n;
+
+ int reserved;
+ /* Beginning of the bucket's body */
+ char data[4];
+
+ public:
+ typedef __KeyNode<DiskLoc> _KeyNode;
+ typedef DiskLoc Loc;
+ typedef KeyBson Key;
+ typedef KeyBson KeyOwned;
+ enum { BucketSize = 8192 };
+
+ // largest key size we allow. note we very much need to support bigger keys (somehow) in the future.
+ static const int KeyMax = OldBucketSize / 10;
+ };
+
+ // a a a ofs ofs ofs ofs
+ class DiskLoc56Bit {
+ int ofs;
+ unsigned char _a[3];
+ unsigned long long Z() const {
+ // endian
+ return *((unsigned long long*)this) & 0x00ffffffffffffffULL;
+ }
+ enum {
+ // first bit of offsets used in _KeyNode we don't use -1 here.
+ OurNullOfs = -2
+ };
+ public:
+ template< class V >
+ const BtreeBucket<V> * btree() const {
+ return DiskLoc(*this).btree<V>();
+ }
+ template< class V >
+ BtreeBucket<V> * btreemod() const {
+ return DiskLoc(*this).btreemod<V>();
+ }
+ operator const DiskLoc() const {
+ // endian
+ if( isNull() ) return DiskLoc();
+ unsigned a = *((unsigned *) (_a-1));
+ return DiskLoc(a >> 8, ofs);
+ }
+ int& GETOFS() { return ofs; }
+ int getOfs() const { return ofs; }
+ bool operator<(const DiskLoc56Bit& rhs) const {
+ // the orderering of dup keys in btrees isn't too critical, but we'd like to put items that are
+ // close together on disk close together in the tree, so we do want the file # to be the most significant
+ // bytes
+ return Z() < rhs.Z();
+ }
+ int compare(const DiskLoc56Bit& rhs) const {
+ unsigned long long a = Z();
+ unsigned long long b = rhs.Z();
+ if( a < b ) return -1;
+ return a == b ? 0 : 1;
+ }
+ bool operator==(const DiskLoc56Bit& rhs) const { return Z() == rhs.Z(); }
+ bool operator!=(const DiskLoc56Bit& rhs) const { return Z() != rhs.Z(); }
+ bool operator==(const DiskLoc& rhs) const {
+ return DiskLoc(*this) == rhs;
+ }
+ bool operator!=(const DiskLoc& rhs) const { return !(*this==rhs); }
+ bool isNull() const { return ofs < 0; }
+ void Null() {
+ ofs = OurNullOfs;
+ _a[0] = _a[1] = _a[2] = 0;
+ }
+ string toString() const { return DiskLoc(*this).toString(); }
+ void operator=(const DiskLoc& loc) {
+ ofs = loc.getOfs();
+ int la = loc.a();
+ assert( la <= 0xffffff ); // must fit in 3 bytes
+ if( la < 0 ) {
+ assert( la == -1 );
+ la = 0;
+ ofs = OurNullOfs;
+ }
+ memcpy(_a, &la, 3); // endian
+ dassert( ofs != 0 );
+ }
+ DiskLoc56Bit& writing() const {
+ return *((DiskLoc56Bit*) getDur().writingPtr((void*)this, 7));
+ }
+ };
+
+ class BtreeData_V1 {
+ public:
+ typedef DiskLoc56Bit Loc;
+ //typedef DiskLoc Loc;
+ typedef __KeyNode<Loc> _KeyNode;
+ typedef KeyV1 Key;
+ typedef KeyV1Owned KeyOwned;
+ enum { BucketSize = 8192-16 }; // leave room for Record header
+ // largest key size we allow. note we very much need to support bigger keys (somehow) in the future.
+ static const int KeyMax = 1024;
+ protected:
+ /** Parent bucket of this bucket, which isNull() for the root bucket. */
+ Loc parent;
+ /** Given that there are n keys, this is the n index child. */
+ Loc nextChild;
+
+ unsigned short flags;
+
+ /** basicInsert() assumes the next three members are consecutive and in this order: */
+
+ /** Size of the empty region. */
+ unsigned short emptySize;
+ /** Size used for bson storage, including storage of old keys. */
+ unsigned short topSize;
+ /* Number of keys in the bucket. */
+ unsigned short n;
+
+ /* Beginning of the bucket's body */
+ char data[4];
+
+ void _init() { }
+ };
+
+ typedef BtreeData_V0 V0;
+ typedef BtreeData_V1 V1;
+
+ /**
+ * This class adds functionality to BtreeData for managing a single bucket.
+ * The following policies are used in an attempt to encourage simplicity:
+ *
+ * Const member functions of this class are those which may be called on
+ * an object for which writing has not been signaled. Non const member
+ * functions may only be called on objects for which writing has been
+ * signaled. Note that currently some const functions write to the
+ * underlying memory representation of this bucket using optimized methods
+ * to signal write operations.
+ *
+ * DiskLoc parameters that may shadow references within the btree should
+ * be passed by value rather than by reference to non const member
+ * functions or to const member functions which may perform writes. This way
+ * a callee need not worry that write operations will change or invalidate
+ * its arguments.
+ *
+ * The current policy for dealing with bson arguments is the opposite of
+ * what is described above for DiskLoc arguments. We do not want to copy
+ * bson into memory as an intermediate step for btree changes, and if bson
+ * is to be moved it must be copied to the new location before the old
+ * location is invalidated. Care should be taken in cases where that invalid
+ * memory may be implicitly referenced by function arguments.
+ *
+ * A number of functions below require a thisLoc argument, which must be the
+ * disk location of the bucket mapped to 'this'.
+ */
+ template< class Version >
+ class BucketBasics : public Version {
+ public:
+ template <class U> friend class BtreeBuilder;
+ typedef typename Version::Key Key;
+ typedef typename Version::_KeyNode _KeyNode;
+ typedef typename Version::Loc Loc;
+
+ int getN() const { return this->n; }
+
+ /**
+ * This is an in memory wrapper for a _KeyNode, and not itself part of btree
+ * storage. This object and its BSONObj 'key' will become invalid if the
+ * _KeyNode data that generated it is moved within the btree. In general,
+ * a KeyNode should not be expected to be valid after a write.
+ */
+ class KeyNode {
+ public:
+ KeyNode(const BucketBasics<Version>& bb, const _KeyNode &k);
+ const Loc& prevChildBucket;
+ const Loc& recordLoc;
+ /* Points to the bson key storage for a _KeyNode */
+ Key key;
+ };
+ friend class KeyNode;
+
+ /** Assert write intent declared for this bucket already. */
+ void assertWritable();
+
+ void assertValid(const Ordering &order, bool force = false) const;
+ void assertValid(const BSONObj &orderObj, bool force = false) const { return assertValid(Ordering::make(orderObj),force); }
+
+ /**
+ * @return KeyNode for key at index i. The KeyNode will become invalid
+ * if the key is moved or reassigned, or if the node is packed. In general
+ * a KeyNode should not be expected to be valid after a write.
+ */
+ const KeyNode keyNode(int i) const {
+ if ( i >= this->n ) {
+ massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << this->n ).jsonString() , i < this->n );
+ }
+ return KeyNode(*this, k(i));
+ }
+
+ static int headerSize() {
+ const BucketBasics *d = 0;
+ return (char*)&(d->data) - (char*)&(d->parent);
+ }
+ static int bodySize() { return Version::BucketSize - headerSize(); }
+ static int lowWaterMark() { return bodySize() / 2 - Version::KeyMax - sizeof( _KeyNode ) + 1; } // see comment in btree.cpp
+
+ // for testing
+ int nKeys() const { return this->n; }
+ const DiskLoc getNextChild() const { return this->nextChild; }
+
+ protected:
+ char * dataAt(short ofs) { return this->data + ofs; }
+
+ /** Initialize the header for a new node. */
+ void init();
+
+ /**
+ * Preconditions:
+ * - 0 <= keypos <= n
+ * - If key is inserted at position keypos, the bucket's keys will still be
+ * in order.
+ * Postconditions:
+ * - If key can fit in the bucket, the bucket may be packed and keypos
+ * may be decreased to reflect deletion of earlier indexed keys during
+ * packing, the key will be inserted at the updated keypos index with
+ * a null prevChildBucket, the subsequent keys shifted to the right,
+ * and the function will return true.
+ * - If key cannot fit in the bucket, the bucket will be packed and
+ * the function will return false.
+ * Although this function is marked const, it modifies the underlying
+ * btree representation through an optimized write intent mechanism.
+ */
+ bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const Key& key, const Ordering &order) const;
+
+ /**
+ * Preconditions:
+ * - key / recordLoc are > all existing keys
+ * - The keys in prevChild and their descendents are between all existing
+ * keys and 'key'.
+ * Postconditions:
+ * - If there is space for key without packing, it is inserted as the
+ * last key with specified prevChild and true is returned.
+ * Importantly, nextChild is not updated!
+ * - Otherwise false is returned and there is no change.
+ */
+ bool _pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild);
+ void pushBack(const DiskLoc recordLoc, const Key& key, const Ordering &order, const DiskLoc prevChild) {
+ bool ok = _pushBack( recordLoc , key , order , prevChild );
+ assert(ok);
+ }
+
+ /**
+ * This is a special purpose function used by BtreeBuilder. The
+ * interface is quite dangerous if you're not careful. The bson key
+ * returned here points to bucket memory that has been invalidated but
+ * not yet reclaimed.
+ *
+ * TODO Maybe this could be replaced with two functions, one which
+ * returns the last key without deleting it and another which simply
+ * deletes the last key. Then the caller would have enough control to
+ * ensure proper memory integrity.
+ *
+ * Preconditions:
+ * - bucket is not empty
+ * - last key of bucket is used (not unused)
+ * - nextChild isNull()
+ * - _unalloc will work correctly as used - see code
+ * Postconditions:
+ * - The last key of the bucket is removed, and its key and recLoc are
+ * returned. As mentioned above, the key points to unallocated memory.
+ */
+ void popBack(DiskLoc& recLoc, Key &key);
+
+ /**
+ * Preconditions:
+ * - 0 <= keypos < n
+ * - there is no child bucket at keypos
+ * - n > 1
+ * - if mayEmpty == false or nextChild.isNull(), n > 0
+ * Postconditions:
+ * - The key at keypos is removed, and remaining keys are shifted over.
+ * - The bucket becomes unpacked.
+ * - if mayEmpty is true and nextChild.isNull(), the bucket may have no keys.
+ */
+ void _delKeyAtPos(int keypos, bool mayEmpty = false);
+
+ /* !Packed means there is deleted fragment space within the bucket.
+ We "repack" when we run out of space before considering the node
+ to be full.
+ */
+ enum Flags { Packed=1 };
+
+ /** n == 0 is ok */
+ const Loc& childForPos(int p) const { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
+ Loc& childForPos(int p) { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
+
+ /** Same as bodySize(). */
+ int totalDataSize() const;
+ /**
+ * @return true when a key may be dropped by pack()
+ * @param index index of the key that may be dropped
+ * @param refPos index of a particular key of interest, which must not
+ * be dropped; = 0 to safely ignore
+ */
+ bool mayDropKey( int index, int refPos ) const;
+
+ /**
+ * Pack the bucket to reclaim space from invalidated memory.
+ * @refPos is an index in the bucket which may be updated if we
+ * delete keys from the bucket
+ * This function may cast away const and perform a write.
+ * Preconditions: none
+ * Postconditions:
+ * - Bucket will be packed
+ * - Some unused nodes may be dropped, but not ones at index 0 or refPos
+ * - Some used nodes may be moved
+ * - If refPos is the index of an existing key, it will be updated to that
+ * key's new index if the key is moved.
+ */
+ void _pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const;
+ /** Pack when already writable */
+ void _packReadyForMod(const Ordering &order, int &refPos);
+
+ /** @return the size the bucket's body would have if we were to call pack() */
+ int packedDataSize( int refPos ) const;
+ void setNotPacked() { this->flags &= ~Packed; }
+ void setPacked() { this->flags |= Packed; }
+ /**
+ * Preconditions: 'bytes' is <= emptySize
+ * Postconditions: A buffer of size 'bytes' is allocated on the top side,
+ * and its offset is returned.
+ */
+ int _alloc(int bytes);
+ /**
+ * This function can be used to deallocate the lowest byte index bson
+ * buffer in the top region, which in some but not all cases is for the
+ * n - 1 index key. This function only works correctly in certain
+ * special cases, please be careful.
+ * Preconditions: 'bytes' <= topSize
+ * Postconditions: The top region is decreased
+ */
+ void _unalloc(int bytes);
+ /**
+ * Preconditions: 'N' <= n
+ * Postconditions:
+ * - All keys after the N index key are dropped.
+ * - Then bucket is packed, without dropping refPos if < refPos N.
+ */
+ void truncateTo(int N, const Ordering &order, int &refPos);
+ /**
+ * Preconditions:
+ * - 'nDrop' < n
+ * - for now, refPos should be zero.
+ * Postconditions:
+ * - All keys before the nDrop index key are dropped.
+ * - The bucket is packed.
+ */
+ void dropFront(int nDrop, const Ordering &order, int &refPos);
+ /**
+ * Preconditions: 0 <= keypos < n
+ * Postconditions: keypos indexed key is marked unused.
+ */
+ void markUnused(int keypos);
+
+ /**
+ * BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
+ * we use tempNext() when we do that to be less confusing. (one might have written a union in C)
+ */
+ DiskLoc tempNext() const { return this->parent; }
+ void setTempNext(DiskLoc l) { this->parent = l; }
+
+ void _shape(int level, stringstream&) const;
+ int Size() const;
+
+ /** @return i-indexed _KeyNode, without bounds checking */
+ public:
+ const _KeyNode& k(int i) const { return ((const _KeyNode*)this->data)[i]; }
+ _KeyNode& _k(int i) { return ((_KeyNode*)this->data)[i]; }
+ protected:
+ _KeyNode& k(int i) { return ((_KeyNode*)this->data)[i]; }
+
+ /**
+ * Preconditions: 'this' is packed
+ * @return the key index to be promoted on split
+ * @param keypos The requested index of a key to insert, which may affect
+ * the choice of split position.
+ */
+ int splitPos( int keypos ) const;
+
+ /**
+ * Preconditions: nAdd * sizeof( _KeyNode ) <= emptySize
+ * Postconditions:
+ * - Increases indexes of existing _KeyNode objects by nAdd, reserving
+ * space for additional _KeyNode objects at front.
+ * - Does not initialize ofs values for the bson data of these
+ * _KeyNode objects.
+ */
+ void reserveKeysFront( int nAdd );
+
+ /**
+ * Preconditions:
+ * - 0 <= i < n
+ * - The bson 'key' must fit in the bucket without packing.
+ * - If 'key' and 'prevChildBucket' are set at index i, the btree
+ * ordering properties will be maintained.
+ * Postconditions:
+ * - The specified key is set at index i, replacing the existing
+ * _KeyNode data and without shifting any other _KeyNode objects.
+ */
+ void setKey( int i, const DiskLoc recordLoc, const Key& key, const DiskLoc prevChildBucket );
+ };
+
+ template< class V>
+ struct Continuation;
+
+ /**
+ * This class adds functionality for manipulating buckets that are assembled
+ * in a tree. The requirements for const and non const functions and
+ * arguments are generally the same as in BtreeBucket. Because this class
+ * deals with tree structure, some functions that are marked const may
+ * trigger modification of another node in the btree or potentially of the
+ * current node. In such cases, the function's implementation explicitly
+ * casts away const when indicating an intent to write to the durability
+ * layer. The DiskLocs provided to such functions should be passed by
+ * value if they shadow pointers within the btree.
+ *
+ * To clarify enforcement of referential integrity in this implementation,
+ * we use the following pattern when deleting data we have a persistent
+ * pointer to. The pointer is cleared or removed explicitly, then the data
+ * it pointed to is cleaned up with a helper function.
+ *
+ * TODO It might make sense to put some of these functions in a class
+ * representing a full btree instead of a single btree bucket. That would
+ * allow us to use the const qualifier in a manner more consistent with
+ * standard usage. Right now the interface is for both a node and a tree,
+ * so assignment of const is sometimes nonideal.
+ *
+ * TODO There are several cases in which the 'this' pointer is invalidated
+ * as a result of deallocation. A seperate class representing a btree would
+ * alleviate some fragile cases where the implementation must currently
+ * behave correctly if the 'this' pointer is suddenly invalidated by a
+ * callee.
+ */
+ template< class V >
+ class BtreeBucket : public BucketBasics<V> {
+ friend class BtreeCursor;
+ friend struct Continuation<V>;
+ public:
+ // make compiler happy:
+ typedef typename V::Key Key;
+ typedef typename V::KeyOwned KeyOwned;
+ typedef typename BucketBasics<V>::KeyNode KeyNode;
+ typedef typename BucketBasics<V>::_KeyNode _KeyNode;
+ typedef typename BucketBasics<V>::Loc Loc;
+ const _KeyNode& k(int i) const { return static_cast< const BucketBasics<V> * >(this)->k(i); }
+ protected:
+ _KeyNode& k(int i) { return static_cast< BucketBasics<V> * >(this)->_k(i); }
+ public:
+ const KeyNode keyNode(int i) const { return static_cast< const BucketBasics<V> * >(this)->keyNode(i); }
+
+ bool isHead() const { return this->parent.isNull(); }
+ void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const;
+ long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order, long long *unusedCount = 0, bool strict = false, unsigned depth=0) const; /* traverses everything */
+
+ bool isUsed( int i ) const { return this->k(i).isUsed(); }
+ string bucketSummary() const;
+ void dump(unsigned depth=0) const;
+
+ /**
+ * @return true if key exists in index
+ *
+ * @order - indicates order of keys in the index. this is basically the index's key pattern, e.g.:
+ * BSONObj order = ((IndexDetails&)idx).keyPattern();
+ * likewise below in bt_insert() etc.
+ */
+ private:
+ bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const Key& key, const Ordering& order) const;
+ public:
+
+ /**
+ * @param self - Don't complain about ourself already being in the index case.
+ * @return true = There is a duplicate used key.
+ */
+ bool wouldCreateDup(
+ const IndexDetails& idx, const DiskLoc &thisLoc,
+ const Key& key, const Ordering& order,
+ const DiskLoc &self) const;
+
+ /**
+ * Preconditions: none
+ * Postconditions: @return a new bucket allocated from pdfile storage
+ * and init()-ed. This bucket is suitable to for use as a new root
+ * or any other new node in the tree.
+ */
+ static DiskLoc addBucket(const IndexDetails&);
+
+ /**
+ * Preconditions: none
+ * Postconditions:
+ * - Some header values in this bucket are cleared, and the bucket is
+ * deallocated from pdfile storage.
+ * - The memory at thisLoc is invalidated, and 'this' is invalidated.
+ */
+ void deallocBucket(const DiskLoc thisLoc, const IndexDetails &id);
+
+ /**
+ * Preconditions:
+ * - 'key' has a valid schema for this index.
+ * - All other paramenters are valid and consistent with this index if applicable.
+ * Postconditions:
+ * - If key is bigger than KeyMax, @return 2 or 3 and no change.
+ * - If key / recordLoc exist in the btree as an unused key, set them
+ * as used and @return 0
+ * - If key / recordLoc exist in the btree as a used key, @throw
+ * exception 10287 and no change.
+ * - If key / recordLoc do not exist in the btree, they are inserted
+ * and @return 0. The root of the btree may be changed, so
+ * 'this'/thisLoc may no longer be the root upon return.
+ */
+ int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
+ const BSONObj& key, const Ordering &order, bool dupsAllowed,
+ IndexDetails& idx, bool toplevel = true) const;
+
+ /** does the insert in two steps - can then use an upgradable lock for step 1, which
+ is the part which may have page faults. also that step is most of the computational work.
+ */
+ void twoStepInsert(DiskLoc thisLoc, Continuation<V> &c, bool dupsAllowed) const;
+
+ /**
+ * Preconditions:
+ * - 'key' has a valid schema for this index, and may have objsize() > KeyMax.
+ * Postconditions:
+ * - If key / recordLoc are in the btree, they are removed (possibly
+ * by being marked as an unused key), @return true, and potentially
+ * invalidate 'this' / thisLoc and change the head.
+ * - If key / recordLoc are not in the btree, @return false and do nothing.
+ */
+ bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const;
+
+ /**
+ * locate may return an "unused" key that is just a marker. so be careful.
+ * looks for a key:recordloc pair.
+ *
+ * @found - returns true if exact match found. note you can get back a position
+ * result even if found is false.
+ */
+ DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order,
+ int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const;
+ DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const Key& key, const Ordering &order,
+ int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const;
+
+ /**
+ * find the first instance of the key
+ * does not handle dups
+ * WARNING: findSingle may not be compound index safe. this may need to change. see notes in
+ * findSingle code.
+ * @return the record location of the first match
+ */
+ DiskLoc findSingle( const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const;
+
+ /**
+ * Advance to next or previous key in the index.
+ * @param direction to advance.
+ */
+ DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const;
+
+ /** Advance in specified direction to the specified key */
+ void advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const;
+
+ /** Locate a key with fields comprised of a combination of keyBegin fields and keyEnd fields. */
+ static void customLocate(DiskLoc &locInOut, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) ;
+
+ /** @return head of the btree by traversing from current bucket. */
+ const DiskLoc getHead(const DiskLoc& thisLoc) const;
+
+ /** get tree shape */
+ void shape(stringstream&) const;
+
+ static void a_test(IndexDetails&);
+
+ static int getKeyMax();
+
+ protected:
+ /**
+ * Preconditions:
+ * - 0 <= firstIndex <= n
+ * - -1 <= lastIndex <= n ( -1 is equivalent to n )
+ * Postconditions:
+ * - Any children at indexes firstIndex through lastIndex (inclusive)
+ * will have their parent pointers set to thisLoc.
+ */
+ void fixParentPtrs(const DiskLoc thisLoc, int firstIndex = 0, int lastIndex = -1) const;
+
+ /**
+ * Preconditions:
+ * - thisLoc is not the btree head.
+ * - n == 0 is ok
+ * Postconditions:
+ * - All cursors pointing to this bucket will be updated.
+ * - This bucket's parent's child pointer is set to null.
+ * - This bucket is deallocated from pdfile storage.
+ * - 'this' and thisLoc are invalidated.
+ */
+ void delBucket(const DiskLoc thisLoc, const IndexDetails&);
+
+ /**
+ * Preconditions: 0 <= p < n
+ * Postconditions:
+ * - The key at index p is removed from the btree.
+ * - 'this' and thisLoc may be invalidated.
+ * - The tree head may change.
+ */
+ void delKeyAtPos(const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order);
+
+ /**
+ * Preconditions:
+ * - n == 0 is ok
+ * Postconditions:
+ * - If thisLoc is head, or if its body has at least lowWaterMark bytes,
+ * return false and do nothing.
+ * - Otherwise, if thisLoc has left or right neighbors, either balance
+ * or merge with them and return true. Also, 'this' and thisLoc may
+ * be invalidated and the tree head may change.
+ */
+ bool mayBalanceWithNeighbors(const DiskLoc thisLoc, IndexDetails &id, const Ordering &order) const;
+
+ /**
+ * Preconditions:
+ * - 0 <= leftIndex < n
+ * - The child at leftIndex or the child at leftIndex + 1 contains
+ * fewer than lowWaterMark bytes.
+ * Postconditions:
+ * - If the child bucket at leftIndex can merge with the child index
+ * at leftIndex + 1, do nothing and return false.
+ * - Otherwise, balance keys between the leftIndex child and the
+ * leftIndex + 1 child, return true, and possibly change the tree head.
+ */
+ bool tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const;
+
+ /**
+ * Preconditions:
+ * - All preconditions of tryBalanceChildren.
+ * - The leftIndex child and leftIndex + 1 child cannot be merged.
+ * Postconditions:
+ * - Keys are moved between the leftIndex child and the leftIndex + 1
+ * child such that neither child has fewer than lowWaterMark bytes.
+ * The tree head may change.
+ */
+ void doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order );
+
+ /**
+ * Preconditions:
+ * - All preconditions of doBalanceChildren
+ * - The leftIndex and leftIndex + 1 children are packed.
+ * - The leftIndex + 1 child has fewer than lowWaterMark bytes.
+ * - split returned by rebalancedSeparatorPos()
+ * Postconditions:
+ * - The key in lchild at index split is set as thisLoc's key at index
+ * leftIndex, which may trigger a split and change the tree head.
+ * The previous key in thisLoc at index leftIndex and all keys with
+ * indexes greater than split in lchild are moved to rchild.
+ */
+ void doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split,
+ BtreeBucket<V> *l, const DiskLoc lchild,
+ BtreeBucket<V> *r, const DiskLoc rchild,
+ IndexDetails &id, const Ordering &order );
+ /**
+ * Preconditions:
+ * - All preconditions of doBalanceChildren
+ * - The leftIndex and leftIndex + 1 children are packed.
+ * - The leftIndex child has fewer than lowWaterMark bytes.
+ * - split returned by rebalancedSeparatorPos()
+ * Postconditions:
+ * - The key in rchild at index split - l->n - 1 is set as thisLoc's key
+ * at index leftIndex, which may trigger a split and change the tree
+ * head. The previous key in thisLoc at index leftIndex and all keys
+ * with indexes less than split - l->n - 1 in rchild are moved to
+ * lchild.
+ */
+ void doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split,
+ BtreeBucket<V> *l, const DiskLoc lchild,
+ BtreeBucket<V> *r, const DiskLoc rchild,
+ IndexDetails &id, const Ordering &order );
+
+ /**
+ * Preconditions:
+ * - 0 <= leftIndex < n
+ * - this->canMergeChildren( thisLoc, leftIndex ) == true
+ * Postconditions:
+ * - All of the above mentioned keys will be placed in the left child.
+ * - The tree may be updated recursively, resulting in 'this' and
+ * thisLoc being invalidated and the tree head being changed.
+ */
+ void doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order);
+
+ /**
+ * Preconditions:
+ * - n == 0
+ * - !nextChild.isNull()
+ * Postconditions:
+ * - 'this' and thisLoc are deallocated (and invalidated), any cursors
+ * to them are updated, and the tree head may change.
+ * - nextChild replaces thisLoc in the btree structure.
+ */
+ void replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id );
+
+ /**
+ * @return true iff the leftIndex and leftIndex + 1 children both exist,
+ * and if their body sizes when packed and the thisLoc key at leftIndex
+ * would fit in a single bucket body.
+ */
+ bool canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const;
+
+ /**
+ * Preconditions:
+ * - leftIndex and leftIndex + 1 children are packed
+ * - leftIndex or leftIndex + 1 child is below lowWaterMark
+ * @return index of the rebalanced separator; the index value is
+ * determined as if we had a bucket with body
+ * <left bucket keys array>.push( <old separator> ).concat( <right bucket keys array> )
+ * and called splitPos( 0 ) on it.
+ */
+ int rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const;
+
+ /**
+ * Preconditions: thisLoc has a parent
+ * @return parent's index of thisLoc.
+ */
+ int indexInParent( const DiskLoc &thisLoc ) const;
+
+ public:
+ Key keyAt(int i) const {
+ if( i >= this->n )
+ return Key();
+ return Key(this->data + k(i).keyDataOfs());
+ }
+ protected:
+
+ /**
+ * Preconditions:
+ * - This bucket is packed.
+ * - Cannot add a key of size KeyMax to this bucket.
+ * - 0 <= keypos <= n is the position of a new key that will be inserted
+ * - lchild is equal to the existing child at index keypos.
+ * Postconditions:
+ * - The thisLoc bucket is split into two packed buckets, possibly
+ * invalidating the initial position of keypos, with a split key
+ * promoted to the parent. The new key key/recordLoc will be inserted
+ * into one of the split buckets, and lchild/rchild set appropriately.
+ * Splitting may occur recursively, possibly changing the tree head.
+ */
+ void split(const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const Key& key,
+ const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx);
+
+ /**
+ * Preconditions:
+ * - 0 <= keypos <= n
+ * - If key / recordLoc are inserted at position keypos, with provided
+ * lchild and rchild, the btree ordering requirements will be
+ * maintained.
+ * - lchild is equal to the existing child at index keypos.
+ * - n == 0 is ok.
+ * Postconditions:
+ * - The key / recordLoc are inserted at position keypos, and the
+ * bucket is split if necessary, which may change the tree head.
+ * - The bucket may be packed or split, invalidating the specified value
+ * of keypos.
+ * This function will always modify thisLoc, but it's marked const because
+ * it commonly relies on the specialized writ]e intent mechanism of basicInsert().
+ */
+ void insertHere(const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const Key& key, const Ordering &order,
+ const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx) const;
+
+ /** bt_insert() is basically just a wrapper around this. */
+ int _insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
+ const Key& key, const Ordering &order, bool dupsAllowed,
+ const DiskLoc lChild, const DiskLoc rChild, IndexDetails &idx) const;
+
+ void insertStepOne(DiskLoc thisLoc, Continuation<V>& c, bool dupsAllowed) const;
+
+ bool find(const IndexDetails& idx, const Key& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const;
+ static bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) ;
+ static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
+ static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction );
+
+ /** If child is non null, set its parent to thisLoc */
+ static void fix(const DiskLoc thisLoc, const DiskLoc child);
+
+ /**
+ * Preconditions:
+ * - 0 <= keypos < n
+ * - If the specified key and recordLoc are placed in keypos of thisLoc,
+ * and lchild and rchild are set, the btree ordering properties will
+ * be maintained.
+ * - rchild == childForPos( keypos + 1 )
+ * - childForPos( keypos ) is referenced elsewhere if nonnull.
+ * Postconditions:
+ * - The key at keypos will be replaced with the specified key and
+ * lchild, potentially splitting this bucket and changing the tree
+ * head.
+ * - childForPos( keypos ) will be orphaned.
+ */
+ void setInternalKey( const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const Key &key, const Ordering &order,
+ const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx);
+
+ /**
+ * Preconditions:
+ * - 0 <= keypos < n
+ * - The keypos or keypos+1 indexed child is non null.
+ * Postconditions:
+ * - The specified key is deleted by replacing it with another key if
+ * possible. This replacement may cause a split and change the tree
+ * head. The replacement key will be deleted from its original
+ * location, potentially causing merges and splits that may invalidate
+ * 'this' and thisLoc and change the tree head.
+ * - If the key cannot be replaced, it will be marked as unused. This
+ * is only expected in legacy btrees.
+ */
+ void deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order );
+ public:
+ /** simply builds and returns a dup key error message string */
+ static string dupKeyError( const IndexDetails& idx , const Key& key );
+ };
+#pragma pack()
+
+ class FieldRangeVector;
+ class FieldRangeVectorIterator;
+
+ class BtreeCursor : public Cursor {
+ protected:
+ BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
+ BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
+ public:
+ virtual ~BtreeCursor();
+ /** makes an appropriate subclass depending on the index version */
+ static BtreeCursor* make( NamespaceDetails *_d, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
+ static BtreeCursor* make( NamespaceDetails *_d, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
+ static BtreeCursor* make( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
+ static BtreeCursor* make( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
+
+ virtual bool ok() { return !bucket.isNull(); }
+ virtual bool advance();
+ virtual void noteLocation(); // updates keyAtKeyOfs...
+ virtual void checkLocation() = 0;
+ virtual bool supportGetMore() { return true; }
+ virtual bool supportYields() { return true; }
+
+ /**
+ * used for multikey index traversal to avoid sending back dups. see Matcher::matches().
+ * if a multikey index traversal:
+ * if loc has already been sent, returns true.
+ * otherwise, marks loc as sent.
+ * @return false if the loc has not been seen
+ */
+ virtual bool getsetdup(DiskLoc loc) {
+ if( _multikey ) {
+ pair<set<DiskLoc>::iterator, bool> p = _dups.insert(loc);
+ return !p.second;
+ }
+ return false;
+ }
+
+ virtual bool modifiedKeys() const { return _multikey; }
+ virtual bool isMultiKey() const { return _multikey; }
+
+ /*const _KeyNode& _currKeyNode() const {
+ assert( !bucket.isNull() );
+ const _KeyNode& kn = keyNode(keyOfs);
+ assert( kn.isUsed() );
+ return kn;
+ }*/
+
+ /** returns BSONObj() if ofs is out of range */
+ virtual BSONObj keyAt(int ofs) const = 0;
+
+ virtual BSONObj currKey() const = 0;
+ virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); }
+
+ virtual void aboutToDeleteBucket(const DiskLoc& b) {
+ if ( bucket == b )
+ keyOfs = -1;
+ }
+
+ virtual DiskLoc currLoc() = 0; // { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); }
+ virtual DiskLoc refLoc() { return currLoc(); }
+ virtual Record* _current() { return currLoc().rec(); }
+ virtual BSONObj current() { return BSONObj(_current()); }
+ virtual string toString();
+
+ BSONObj prettyKey( const BSONObj &key ) const {
+ return key.replaceFieldNames( indexDetails.keyPattern() ).clientReadable();
+ }
+
+ virtual BSONObj prettyIndexBounds() const;
+
+ virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
+ virtual shared_ptr< CoveredIndexMatcher > matcherPtr() const { return _matcher; }
+
+ virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { _matcher = matcher; }
+
+ virtual long long nscanned() { return _nscanned; }
+
+ /** for debugging only */
+ const DiskLoc getBucket() const { return bucket; }
+ int getKeyOfs() const { return keyOfs; }
+
+ // just for unit tests
+ virtual bool curKeyHasChild() = 0;
+
+ protected:
+ /**
+ * Our btrees may (rarely) have "unused" keys when items are deleted.
+ * Skip past them.
+ */
+ virtual bool skipUnusedKeys() = 0;
+
+ bool skipOutOfRangeKeysAndCheckEnd();
+ void skipAndCheck();
+ void checkEnd();
+
+ /** selective audits on construction */
+ void audit();
+
+ virtual void _audit() = 0;
+ virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc) = 0;
+ virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) = 0;
+ virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) = 0;
+
+ /** set initial bucket */
+ void initWithoutIndependentFieldRanges();
+
+ /** if afterKey is true, we want the first key with values of the keyBegin fields greater than keyBegin */
+ void advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive );
+
+ set<DiskLoc> _dups;
+ NamespaceDetails * const d;
+ const int idxNo;
+ BSONObj startKey;
+ BSONObj endKey;
+ bool _endKeyInclusive;
+ bool _multikey; // this must be updated every getmore batch in case someone added a multikey
+ const IndexDetails& indexDetails;
+ const BSONObj _order;
+ const Ordering _ordering;
+ DiskLoc bucket;
+ int keyOfs;
+ const int _direction; // 1=fwd,-1=reverse
+ BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call
+ DiskLoc locAtKeyOfs;
+ const shared_ptr< FieldRangeVector > _bounds;
+ auto_ptr< FieldRangeVectorIterator > _boundsIterator;
+ shared_ptr< CoveredIndexMatcher > _matcher;
+ bool _independentFieldRanges;
+ long long _nscanned;
+ };
+
+ template< class V >
+ struct Continuation {
+ //Continuation(const typename V::Key & k);
+ Continuation(DiskLoc thisLoc, DiskLoc _recordLoc, const BSONObj &_key,
+ Ordering _order, IndexDetails& _idx) :
+ bLoc(thisLoc), recordLoc(_recordLoc), key(_key), order(_order), idx(_idx) {
+ op = Nothing;
+ }
+
+ DiskLoc bLoc;
+ DiskLoc recordLoc;
+ typename V::KeyOwned key;
+ const Ordering order;
+ IndexDetails& idx;
+ enum Op { Nothing, SetUsed, InsertHere } op;
+
+ int pos;
+ const BtreeBucket<V> *b;
+
+ void stepTwo() {
+ if( op == Nothing )
+ return;
+ else if( op == SetUsed ) {
+ const typename V::_KeyNode& kn = b->k(pos);
+ kn.writing().setUsed();
+ }
+ else {
+ b->insertHere(bLoc, pos, recordLoc, key, order, DiskLoc(), DiskLoc(), idx);
+ }
+ }
+ };
+
+ /** Renames the index namespace for this btree's index. */
+ void renameIndexNamespace(const char *oldNs, const char *newNs);
+
+ /**
+ * give us a writable version of the btree bucket (declares write intent).
+ * note it is likely more efficient to declare write intent on something smaller when you can.
+ */
+ template< class V >
+ BtreeBucket<V> * DiskLoc::btreemod() const {
+ assert( _a != -1 );
+ BtreeBucket<V> *b = const_cast< BtreeBucket<V> * >( btree<V>() );
+ return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, V::BucketSize ) );
+ }
+
+ template< class V >
+ BucketBasics<V>::KeyNode::KeyNode(const BucketBasics<V>& bb, const _KeyNode &k) :
+ prevChildBucket(k.prevChildBucket),
+ recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
+ { }
+
+} // namespace mongo;