summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h')
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h519
1 files changed, 262 insertions, 257 deletions
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
index a5ddec6bccd..3238ec64179 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
@@ -34,337 +34,342 @@
namespace mongo {
- const int OldBucketSize = 8192;
+const int OldBucketSize = 8192;
+//
+// On-disk index format
+//
+
+#pragma pack(1)
+/**
+ * This is the fixed width data component for storage of a key within a bucket. It contains an
+ * offset pointer to the variable width bson data component. This may be 'unused', please see
+ * below.
+ *
+ * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
+ * the same layout.
+ */
+template <class LocType>
+struct FixedWidthKey {
//
- // On-disk index format
+ // Data
//
-#pragma pack(1)
/**
- * This is the fixed width data component for storage of a key within a bucket. It contains an
- * offset pointer to the variable width bson data component. This may be 'unused', please see
- * below.
- *
- * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
- * the same layout.
+ * The 'left' child bucket of this key. If this is the i-th key, it points to the i index
+ * child bucket.
*/
- template <class LocType>
- struct FixedWidthKey {
- //
- // Data
- //
-
- /**
- * The 'left' child bucket of this key. If this is the i-th key, it points to the i index
- * child bucket.
- */
- LocType prevChildBucket;
-
- /**
- * The location of the record associated with this key.
- */
- LocType recordLoc;
-
- /**
- * Offset within current bucket of the variable width bson key for this _KeyNode.
- */
- unsigned short _kdo;
-
- //
- // Accessors / mutators
- //
-
- short keyDataOfs() const {
- return static_cast<short>(_kdo);
- }
+ LocType prevChildBucket;
- void setKeyDataOfs(short s) {
- _kdo = s;
- invariant(s>=0);
- }
+ /**
+ * The location of the record associated with this key.
+ */
+ LocType recordLoc;
- void setKeyDataOfsSavingUse(short s) {
- // XXX kill this func
- setKeyDataOfs(s);
- }
+ /**
+ * Offset within current bucket of the variable width bson key for this _KeyNode.
+ */
+ unsigned short _kdo;
- /**
- * Unused keys are not returned by read operations. Keys may be marked
- * as unused in cases where it is difficult to delete them while
- * maintaining the constraints required of a btree.
- *
- * Setting ofs to odd is the sentinel for unused, as real recordLoc's
- * are always even numbers. Note we need to keep its value basically
- * the same as we use the recordLoc as part of the key in the index
- * (to handle duplicate keys efficiently).
- *
- * Flagging keys as unused is a feature that is being phased out in favor
- * of deleting the keys outright. The current btree implementation is
- * not expected to mark a key as unused in a non legacy btree.
- */
- void setUnused() {
- recordLoc.GETOFS() |= 1;
- }
+ //
+ // Accessors / mutators
+ //
- void setUsed() { recordLoc.GETOFS() &= ~1; }
+ short keyDataOfs() const {
+ return static_cast<short>(_kdo);
+ }
- int isUnused() const {
- return recordLoc.getOfs() & 1;
- }
+ void setKeyDataOfs(short s) {
+ _kdo = s;
+ invariant(s >= 0);
+ }
- int isUsed() const {
- return !isUnused();
- }
- };
+ void setKeyDataOfsSavingUse(short s) {
+ // XXX kill this func
+ setKeyDataOfs(s);
+ }
/**
- * This structure represents header data for a btree bucket. An object of
- * this type is typically allocated inside of a buffer of size BucketSize,
- * resulting in a full bucket with an appropriate header.
+ * Unused keys are not returned by read operations. Keys may be marked
+ * as unused in cases where it is difficult to delete them while
+ * maintaining the constraints required of a btree.
*
- * The body of a btree bucket contains an array of _KeyNode objects starting
- * from its lowest indexed bytes and growing to higher indexed bytes. The
- * body also contains variable width bson keys, which are allocated from the
- * highest indexed bytes toward lower indexed bytes.
+ * Setting ofs to odd is the sentinel for unused, as real recordLoc's
+ * are always even numbers. Note we need to keep its value basically
+ * the same as we use the recordLoc as part of the key in the index
+ * (to handle duplicate keys efficiently).
*
- * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
- * h = header data
- * k = KeyNode data
- * - = empty space
- * b = bson key data
- * u = unused (old) bson key data, that may be garbage collected
+ * Flagging keys as unused is a feature that is being phased out in favor
+ * of deleting the keys outright. The current btree implementation is
+ * not expected to mark a key as unused in a non legacy btree.
*/
- struct BtreeBucketV0 {
- /**
- * Parent bucket of this bucket, which isNull() for the root bucket.
- */
- DiskLoc parent;
+ void setUnused() {
+ recordLoc.GETOFS() |= 1;
+ }
- /**
- * Given that there are n keys, this is the n index child.
- */
- DiskLoc nextChild;
+ void setUsed() {
+ recordLoc.GETOFS() &= ~1;
+ }
- /**
- * Can be reused, value is 8192 in current pdfile version Apr2010
- */
- unsigned short _wasSize;
+ int isUnused() const {
+ return recordLoc.getOfs() & 1;
+ }
- /**
- * zero
- */
- unsigned short _reserved1;
+ int isUsed() const {
+ return !isUnused();
+ }
+};
- int flags;
+/**
+ * This structure represents header data for a btree bucket. An object of
+ * this type is typically allocated inside of a buffer of size BucketSize,
+ * resulting in a full bucket with an appropriate header.
+ *
+ * The body of a btree bucket contains an array of _KeyNode objects starting
+ * from its lowest indexed bytes and growing to higher indexed bytes. The
+ * body also contains variable width bson keys, which are allocated from the
+ * highest indexed bytes toward lower indexed bytes.
+ *
+ * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
+ * h = header data
+ * k = KeyNode data
+ * - = empty space
+ * b = bson key data
+ * u = unused (old) bson key data, that may be garbage collected
+ */
+struct BtreeBucketV0 {
+ /**
+ * Parent bucket of this bucket, which isNull() for the root bucket.
+ */
+ DiskLoc parent;
- /** basicInsert() assumes the next three members are consecutive and in this order: */
+ /**
+ * Given that there are n keys, this is the n index child.
+ */
+ DiskLoc nextChild;
- /** Size of the empty region. */
- int emptySize;
+ /**
+ * Can be reused, value is 8192 in current pdfile version Apr2010
+ */
+ unsigned short _wasSize;
- /** Size used for bson storage, including storage of old keys. */
- int topSize;
+ /**
+ * zero
+ */
+ unsigned short _reserved1;
- /* Number of keys in the bucket. */
- int n;
+ int flags;
- int reserved;
+ /** basicInsert() assumes the next three members are consecutive and in this order: */
- /* Beginning of the bucket's body */
- char data[4];
+ /** Size of the empty region. */
+ int emptySize;
- // Precalculated size constants
- enum { HeaderSize = 40 };
- };
+ /** Size used for bson storage, including storage of old keys. */
+ int topSize;
- // BtreeBucketV0 is part of the on-disk format, so it should never be changed
- BOOST_STATIC_ASSERT(
- sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data)
- == BtreeBucketV0::HeaderSize);
+ /* Number of keys in the bucket. */
+ int n;
- /**
- * A variant of DiskLoc Used by the V1 bucket type.
- */
- struct DiskLoc56Bit {
- //
- // Data
- //
+ int reserved;
- int ofs;
+ /* Beginning of the bucket's body */
+ char data[4];
- unsigned char _a[3];
+ // Precalculated size constants
+ enum { HeaderSize = 40 };
+};
- //
- // Accessors XXX rename these, this is terrible
- //
+// BtreeBucketV0 is part of the on-disk format, so it should never be changed
+BOOST_STATIC_ASSERT(sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data) ==
+ BtreeBucketV0::HeaderSize);
- int& GETOFS() { return ofs; }
+/**
+ * A variant of DiskLoc Used by the V1 bucket type.
+ */
+struct DiskLoc56Bit {
+ //
+ // Data
+ //
- int getOfs() const { return ofs; }
+ int ofs;
- //
- // Comparison
- //
+ unsigned char _a[3];
- bool isNull() const { return ofs < 0; }
+ //
+ // Accessors XXX rename these, this is terrible
+ //
- unsigned long long toLongLong() const {
- // endian
- unsigned long long result = ofs;
- char* cursor = reinterpret_cast<char *>(&result);
- *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
- *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
- *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
- return result;
- }
+ int& GETOFS() {
+ return ofs;
+ }
- bool operator<(const DiskLoc56Bit& rhs) const {
- // the orderering of dup keys in btrees isn't too critical, but we'd like to put items
- // that are close together on disk close together in the tree, so we do want the file #
- // to be the most significant bytes
- return toLongLong() < rhs.toLongLong();
- }
+ int getOfs() const {
+ return ofs;
+ }
- int compare(const DiskLoc56Bit& rhs) const {
- unsigned long long a = toLongLong();
- unsigned long long b = rhs.toLongLong();
- if ( a < b ) {
- return -1;
- }
- else {
- return a == b ? 0 : 1;
- }
- }
+ //
+ // Comparison
+ //
- bool operator==(const DiskLoc56Bit& rhs) const {
- return toLongLong() == rhs.toLongLong();
+ bool isNull() const {
+ return ofs < 0;
+ }
+
+ unsigned long long toLongLong() const {
+ // endian
+ unsigned long long result = ofs;
+ char* cursor = reinterpret_cast<char*>(&result);
+ *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
+ *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
+ *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
+ return result;
+ }
+
+ bool operator<(const DiskLoc56Bit& rhs) const {
+ // the orderering of dup keys in btrees isn't too critical, but we'd like to put items
+ // that are close together on disk close together in the tree, so we do want the file #
+ // to be the most significant bytes
+ return toLongLong() < rhs.toLongLong();
+ }
+
+ int compare(const DiskLoc56Bit& rhs) const {
+ unsigned long long a = toLongLong();
+ unsigned long long b = rhs.toLongLong();
+ if (a < b) {
+ return -1;
+ } else {
+ return a == b ? 0 : 1;
}
+ }
- bool operator!=(const DiskLoc56Bit& rhs) const {
- return toLongLong() != rhs.toLongLong();
- }
+ bool operator==(const DiskLoc56Bit& rhs) const {
+ return toLongLong() == rhs.toLongLong();
+ }
- bool operator==(const DiskLoc& rhs) const {
- return DiskLoc(*this) == rhs;
- }
+ bool operator!=(const DiskLoc56Bit& rhs) const {
+ return toLongLong() != rhs.toLongLong();
+ }
- bool operator!=(const DiskLoc& rhs) const {
- return !(*this==rhs);
- }
+ bool operator==(const DiskLoc& rhs) const {
+ return DiskLoc(*this) == rhs;
+ }
- //
- // Mutation
- //
+ bool operator!=(const DiskLoc& rhs) const {
+ return !(*this == rhs);
+ }
- enum {
- OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here
- OurMaxA = 0xffffff, // highest 3-byte value
- };
+ //
+ // Mutation
+ //
- void Null() {
- ofs = OurNullOfs;
- _a[0] = _a[1] = _a[2] = 0;
- }
+ enum {
+ OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here
+ OurMaxA = 0xffffff, // highest 3-byte value
+ };
- void operator=(const DiskLoc& loc);
+ void Null() {
+ ofs = OurNullOfs;
+ _a[0] = _a[1] = _a[2] = 0;
+ }
- //
- // Type Conversion
- //
+ void operator=(const DiskLoc& loc);
- RecordId toRecordId() const {
- return DiskLoc(*this).toRecordId();
- }
+ //
+ // Type Conversion
+ //
- operator DiskLoc() const {
- // endian
- if( isNull() ) return DiskLoc();
- unsigned a = *((unsigned *) (_a-1));
- return DiskLoc(a >> 8, ofs);
- }
+ RecordId toRecordId() const {
+ return DiskLoc(*this).toRecordId();
+ }
- std::string toString() const { return DiskLoc(*this).toString(); }
- };
+ operator DiskLoc() const {
+ // endian
+ if (isNull())
+ return DiskLoc();
+ unsigned a = *((unsigned*)(_a - 1));
+ return DiskLoc(a >> 8, ofs);
+ }
- struct BtreeBucketV1 {
- /** Parent bucket of this bucket, which isNull() for the root bucket. */
- DiskLoc56Bit parent;
+ std::string toString() const {
+ return DiskLoc(*this).toString();
+ }
+};
- /** Given that there are n keys, this is the n index child. */
- DiskLoc56Bit nextChild;
+struct BtreeBucketV1 {
+ /** Parent bucket of this bucket, which isNull() for the root bucket. */
+ DiskLoc56Bit parent;
- unsigned short flags;
+ /** Given that there are n keys, this is the n index child. */
+ DiskLoc56Bit nextChild;
- /** Size of the empty region. */
- unsigned short emptySize;
+ unsigned short flags;
- /** Size used for bson storage, including storage of old keys. */
- unsigned short topSize;
+ /** Size of the empty region. */
+ unsigned short emptySize;
- /* Number of keys in the bucket. */
- unsigned short n;
+ /** Size used for bson storage, including storage of old keys. */
+ unsigned short topSize;
- /* Beginning of the bucket's body */
- char data[4];
+ /* Number of keys in the bucket. */
+ unsigned short n;
- // Precalculated size constants
- enum { HeaderSize = 22 };
- };
+ /* Beginning of the bucket's body */
+ char data[4];
- // BtreeBucketV1 is part of the on-disk format, so it should never be changed
- BOOST_STATIC_ASSERT(
- sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data)
- == BtreeBucketV1::HeaderSize);
+ // Precalculated size constants
+ enum { HeaderSize = 22 };
+};
- enum Flags {
- Packed = 1
- };
+// BtreeBucketV1 is part of the on-disk format, so it should never be changed
+BOOST_STATIC_ASSERT(sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data) ==
+ BtreeBucketV1::HeaderSize);
- struct BtreeLayoutV0 {
- typedef FixedWidthKey<DiskLoc> FixedWidthKeyType;
- typedef DiskLoc LocType;
- typedef KeyBson KeyType;
- typedef KeyBson KeyOwnedType;
- typedef BtreeBucketV0 BucketType;
+enum Flags { Packed = 1 };
- enum { BucketSize = 8192,
- BucketBodySize = BucketSize - BucketType::HeaderSize
- };
+struct BtreeLayoutV0 {
+ typedef FixedWidthKey<DiskLoc> FixedWidthKeyType;
+ typedef DiskLoc LocType;
+ typedef KeyBson KeyType;
+ typedef KeyBson KeyOwnedType;
+ typedef BtreeBucketV0 BucketType;
- // largest key size we allow. note we very much need to support bigger keys (somehow) in
- // the future.
+ enum { BucketSize = 8192, BucketBodySize = BucketSize - BucketType::HeaderSize };
- static const int KeyMax = OldBucketSize / 10;
+ // largest key size we allow. note we very much need to support bigger keys (somehow) in
+ // the future.
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const int INVALID_N_SENTINEL = -1;
+ static const int KeyMax = OldBucketSize / 10;
- static void initBucket(BucketType* bucket) {
- bucket->_reserved1 = 0;
- bucket->_wasSize = BucketSize;
- bucket->reserved = 0;
- }
- };
+ // A sentinel value sometimes used to identify a deallocated bucket.
+ static const int INVALID_N_SENTINEL = -1;
+
+ static void initBucket(BucketType* bucket) {
+ bucket->_reserved1 = 0;
+ bucket->_wasSize = BucketSize;
+ bucket->reserved = 0;
+ }
+};
- struct BtreeLayoutV1 {
- typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType;
- typedef KeyV1 KeyType;
- typedef KeyV1Owned KeyOwnedType;
- typedef DiskLoc56Bit LocType;
- typedef BtreeBucketV1 BucketType;
+struct BtreeLayoutV1 {
+ typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType;
+ typedef KeyV1 KeyType;
+ typedef KeyV1Owned KeyOwnedType;
+ typedef DiskLoc56Bit LocType;
+ typedef BtreeBucketV1 BucketType;
- enum { BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header
- BucketBodySize = BucketSize - BucketType::HeaderSize
- };
+ enum {
+ BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header
+ BucketBodySize = BucketSize - BucketType::HeaderSize
+ };
- static const int KeyMax = 1024;
+ static const int KeyMax = 1024;
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const unsigned short INVALID_N_SENTINEL = 0xffff;
+ // A sentinel value sometimes used to identify a deallocated bucket.
+ static const unsigned short INVALID_N_SENTINEL = 0xffff;
- static void initBucket(BucketType* bucket) { }
- };
+ static void initBucket(BucketType* bucket) {}
+};
#pragma pack()