diff options
Diffstat (limited to 'src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h')
-rw-r--r-- | src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h | 519 |
1 files changed, 262 insertions, 257 deletions
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h index a5ddec6bccd..3238ec64179 100644 --- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h +++ b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h @@ -34,337 +34,342 @@ namespace mongo { - const int OldBucketSize = 8192; +const int OldBucketSize = 8192; +// +// On-disk index format +// + +#pragma pack(1) +/** + * This is the fixed width data component for storage of a key within a bucket. It contains an + * offset pointer to the variable width bson data component. This may be 'unused', please see + * below. + * + * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise + * the same layout. + */ +template <class LocType> +struct FixedWidthKey { // - // On-disk index format + // Data // -#pragma pack(1) /** - * This is the fixed width data component for storage of a key within a bucket. It contains an - * offset pointer to the variable width bson data component. This may be 'unused', please see - * below. - * - * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise - * the same layout. + * The 'left' child bucket of this key. If this is the i-th key, it points to the i index + * child bucket. */ - template <class LocType> - struct FixedWidthKey { - // - // Data - // - - /** - * The 'left' child bucket of this key. If this is the i-th key, it points to the i index - * child bucket. - */ - LocType prevChildBucket; - - /** - * The location of the record associated with this key. - */ - LocType recordLoc; - - /** - * Offset within current bucket of the variable width bson key for this _KeyNode. - */ - unsigned short _kdo; - - // - // Accessors / mutators - // - - short keyDataOfs() const { - return static_cast<short>(_kdo); - } + LocType prevChildBucket; - void setKeyDataOfs(short s) { - _kdo = s; - invariant(s>=0); - } + /** + * The location of the record associated with this key. + */ + LocType recordLoc; - void setKeyDataOfsSavingUse(short s) { - // XXX kill this func - setKeyDataOfs(s); - } + /** + * Offset within current bucket of the variable width bson key for this _KeyNode. + */ + unsigned short _kdo; - /** - * Unused keys are not returned by read operations. Keys may be marked - * as unused in cases where it is difficult to delete them while - * maintaining the constraints required of a btree. - * - * Setting ofs to odd is the sentinel for unused, as real recordLoc's - * are always even numbers. Note we need to keep its value basically - * the same as we use the recordLoc as part of the key in the index - * (to handle duplicate keys efficiently). - * - * Flagging keys as unused is a feature that is being phased out in favor - * of deleting the keys outright. The current btree implementation is - * not expected to mark a key as unused in a non legacy btree. - */ - void setUnused() { - recordLoc.GETOFS() |= 1; - } + // + // Accessors / mutators + // - void setUsed() { recordLoc.GETOFS() &= ~1; } + short keyDataOfs() const { + return static_cast<short>(_kdo); + } - int isUnused() const { - return recordLoc.getOfs() & 1; - } + void setKeyDataOfs(short s) { + _kdo = s; + invariant(s >= 0); + } - int isUsed() const { - return !isUnused(); - } - }; + void setKeyDataOfsSavingUse(short s) { + // XXX kill this func + setKeyDataOfs(s); + } /** - * This structure represents header data for a btree bucket. An object of - * this type is typically allocated inside of a buffer of size BucketSize, - * resulting in a full bucket with an appropriate header. + * Unused keys are not returned by read operations. Keys may be marked + * as unused in cases where it is difficult to delete them while + * maintaining the constraints required of a btree. * - * The body of a btree bucket contains an array of _KeyNode objects starting - * from its lowest indexed bytes and growing to higher indexed bytes. The - * body also contains variable width bson keys, which are allocated from the - * highest indexed bytes toward lower indexed bytes. + * Setting ofs to odd is the sentinel for unused, as real recordLoc's + * are always even numbers. Note we need to keep its value basically + * the same as we use the recordLoc as part of the key in the index + * (to handle duplicate keys efficiently). * - * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb| - * h = header data - * k = KeyNode data - * - = empty space - * b = bson key data - * u = unused (old) bson key data, that may be garbage collected + * Flagging keys as unused is a feature that is being phased out in favor + * of deleting the keys outright. The current btree implementation is + * not expected to mark a key as unused in a non legacy btree. */ - struct BtreeBucketV0 { - /** - * Parent bucket of this bucket, which isNull() for the root bucket. - */ - DiskLoc parent; + void setUnused() { + recordLoc.GETOFS() |= 1; + } - /** - * Given that there are n keys, this is the n index child. - */ - DiskLoc nextChild; + void setUsed() { + recordLoc.GETOFS() &= ~1; + } - /** - * Can be reused, value is 8192 in current pdfile version Apr2010 - */ - unsigned short _wasSize; + int isUnused() const { + return recordLoc.getOfs() & 1; + } - /** - * zero - */ - unsigned short _reserved1; + int isUsed() const { + return !isUnused(); + } +}; - int flags; +/** + * This structure represents header data for a btree bucket. An object of + * this type is typically allocated inside of a buffer of size BucketSize, + * resulting in a full bucket with an appropriate header. + * + * The body of a btree bucket contains an array of _KeyNode objects starting + * from its lowest indexed bytes and growing to higher indexed bytes. The + * body also contains variable width bson keys, which are allocated from the + * highest indexed bytes toward lower indexed bytes. + * + * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb| + * h = header data + * k = KeyNode data + * - = empty space + * b = bson key data + * u = unused (old) bson key data, that may be garbage collected + */ +struct BtreeBucketV0 { + /** + * Parent bucket of this bucket, which isNull() for the root bucket. + */ + DiskLoc parent; - /** basicInsert() assumes the next three members are consecutive and in this order: */ + /** + * Given that there are n keys, this is the n index child. + */ + DiskLoc nextChild; - /** Size of the empty region. */ - int emptySize; + /** + * Can be reused, value is 8192 in current pdfile version Apr2010 + */ + unsigned short _wasSize; - /** Size used for bson storage, including storage of old keys. */ - int topSize; + /** + * zero + */ + unsigned short _reserved1; - /* Number of keys in the bucket. */ - int n; + int flags; - int reserved; + /** basicInsert() assumes the next three members are consecutive and in this order: */ - /* Beginning of the bucket's body */ - char data[4]; + /** Size of the empty region. */ + int emptySize; - // Precalculated size constants - enum { HeaderSize = 40 }; - }; + /** Size used for bson storage, including storage of old keys. */ + int topSize; - // BtreeBucketV0 is part of the on-disk format, so it should never be changed - BOOST_STATIC_ASSERT( - sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data) - == BtreeBucketV0::HeaderSize); + /* Number of keys in the bucket. */ + int n; - /** - * A variant of DiskLoc Used by the V1 bucket type. - */ - struct DiskLoc56Bit { - // - // Data - // + int reserved; - int ofs; + /* Beginning of the bucket's body */ + char data[4]; - unsigned char _a[3]; + // Precalculated size constants + enum { HeaderSize = 40 }; +}; - // - // Accessors XXX rename these, this is terrible - // +// BtreeBucketV0 is part of the on-disk format, so it should never be changed +BOOST_STATIC_ASSERT(sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data) == + BtreeBucketV0::HeaderSize); - int& GETOFS() { return ofs; } +/** + * A variant of DiskLoc Used by the V1 bucket type. + */ +struct DiskLoc56Bit { + // + // Data + // - int getOfs() const { return ofs; } + int ofs; - // - // Comparison - // + unsigned char _a[3]; - bool isNull() const { return ofs < 0; } + // + // Accessors XXX rename these, this is terrible + // - unsigned long long toLongLong() const { - // endian - unsigned long long result = ofs; - char* cursor = reinterpret_cast<char *>(&result); - *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]); - *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]); - *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0); - return result; - } + int& GETOFS() { + return ofs; + } - bool operator<(const DiskLoc56Bit& rhs) const { - // the orderering of dup keys in btrees isn't too critical, but we'd like to put items - // that are close together on disk close together in the tree, so we do want the file # - // to be the most significant bytes - return toLongLong() < rhs.toLongLong(); - } + int getOfs() const { + return ofs; + } - int compare(const DiskLoc56Bit& rhs) const { - unsigned long long a = toLongLong(); - unsigned long long b = rhs.toLongLong(); - if ( a < b ) { - return -1; - } - else { - return a == b ? 0 : 1; - } - } + // + // Comparison + // - bool operator==(const DiskLoc56Bit& rhs) const { - return toLongLong() == rhs.toLongLong(); + bool isNull() const { + return ofs < 0; + } + + unsigned long long toLongLong() const { + // endian + unsigned long long result = ofs; + char* cursor = reinterpret_cast<char*>(&result); + *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]); + *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]); + *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0); + return result; + } + + bool operator<(const DiskLoc56Bit& rhs) const { + // the orderering of dup keys in btrees isn't too critical, but we'd like to put items + // that are close together on disk close together in the tree, so we do want the file # + // to be the most significant bytes + return toLongLong() < rhs.toLongLong(); + } + + int compare(const DiskLoc56Bit& rhs) const { + unsigned long long a = toLongLong(); + unsigned long long b = rhs.toLongLong(); + if (a < b) { + return -1; + } else { + return a == b ? 0 : 1; } + } - bool operator!=(const DiskLoc56Bit& rhs) const { - return toLongLong() != rhs.toLongLong(); - } + bool operator==(const DiskLoc56Bit& rhs) const { + return toLongLong() == rhs.toLongLong(); + } - bool operator==(const DiskLoc& rhs) const { - return DiskLoc(*this) == rhs; - } + bool operator!=(const DiskLoc56Bit& rhs) const { + return toLongLong() != rhs.toLongLong(); + } - bool operator!=(const DiskLoc& rhs) const { - return !(*this==rhs); - } + bool operator==(const DiskLoc& rhs) const { + return DiskLoc(*this) == rhs; + } - // - // Mutation - // + bool operator!=(const DiskLoc& rhs) const { + return !(*this == rhs); + } - enum { - OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here - OurMaxA = 0xffffff, // highest 3-byte value - }; + // + // Mutation + // - void Null() { - ofs = OurNullOfs; - _a[0] = _a[1] = _a[2] = 0; - } + enum { + OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here + OurMaxA = 0xffffff, // highest 3-byte value + }; - void operator=(const DiskLoc& loc); + void Null() { + ofs = OurNullOfs; + _a[0] = _a[1] = _a[2] = 0; + } - // - // Type Conversion - // + void operator=(const DiskLoc& loc); - RecordId toRecordId() const { - return DiskLoc(*this).toRecordId(); - } + // + // Type Conversion + // - operator DiskLoc() const { - // endian - if( isNull() ) return DiskLoc(); - unsigned a = *((unsigned *) (_a-1)); - return DiskLoc(a >> 8, ofs); - } + RecordId toRecordId() const { + return DiskLoc(*this).toRecordId(); + } - std::string toString() const { return DiskLoc(*this).toString(); } - }; + operator DiskLoc() const { + // endian + if (isNull()) + return DiskLoc(); + unsigned a = *((unsigned*)(_a - 1)); + return DiskLoc(a >> 8, ofs); + } - struct BtreeBucketV1 { - /** Parent bucket of this bucket, which isNull() for the root bucket. */ - DiskLoc56Bit parent; + std::string toString() const { + return DiskLoc(*this).toString(); + } +}; - /** Given that there are n keys, this is the n index child. */ - DiskLoc56Bit nextChild; +struct BtreeBucketV1 { + /** Parent bucket of this bucket, which isNull() for the root bucket. */ + DiskLoc56Bit parent; - unsigned short flags; + /** Given that there are n keys, this is the n index child. */ + DiskLoc56Bit nextChild; - /** Size of the empty region. */ - unsigned short emptySize; + unsigned short flags; - /** Size used for bson storage, including storage of old keys. */ - unsigned short topSize; + /** Size of the empty region. */ + unsigned short emptySize; - /* Number of keys in the bucket. */ - unsigned short n; + /** Size used for bson storage, including storage of old keys. */ + unsigned short topSize; - /* Beginning of the bucket's body */ - char data[4]; + /* Number of keys in the bucket. */ + unsigned short n; - // Precalculated size constants - enum { HeaderSize = 22 }; - }; + /* Beginning of the bucket's body */ + char data[4]; - // BtreeBucketV1 is part of the on-disk format, so it should never be changed - BOOST_STATIC_ASSERT( - sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data) - == BtreeBucketV1::HeaderSize); + // Precalculated size constants + enum { HeaderSize = 22 }; +}; - enum Flags { - Packed = 1 - }; +// BtreeBucketV1 is part of the on-disk format, so it should never be changed +BOOST_STATIC_ASSERT(sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data) == + BtreeBucketV1::HeaderSize); - struct BtreeLayoutV0 { - typedef FixedWidthKey<DiskLoc> FixedWidthKeyType; - typedef DiskLoc LocType; - typedef KeyBson KeyType; - typedef KeyBson KeyOwnedType; - typedef BtreeBucketV0 BucketType; +enum Flags { Packed = 1 }; - enum { BucketSize = 8192, - BucketBodySize = BucketSize - BucketType::HeaderSize - }; +struct BtreeLayoutV0 { + typedef FixedWidthKey<DiskLoc> FixedWidthKeyType; + typedef DiskLoc LocType; + typedef KeyBson KeyType; + typedef KeyBson KeyOwnedType; + typedef BtreeBucketV0 BucketType; - // largest key size we allow. note we very much need to support bigger keys (somehow) in - // the future. + enum { BucketSize = 8192, BucketBodySize = BucketSize - BucketType::HeaderSize }; - static const int KeyMax = OldBucketSize / 10; + // largest key size we allow. note we very much need to support bigger keys (somehow) in + // the future. - // A sentinel value sometimes used to identify a deallocated bucket. - static const int INVALID_N_SENTINEL = -1; + static const int KeyMax = OldBucketSize / 10; - static void initBucket(BucketType* bucket) { - bucket->_reserved1 = 0; - bucket->_wasSize = BucketSize; - bucket->reserved = 0; - } - }; + // A sentinel value sometimes used to identify a deallocated bucket. + static const int INVALID_N_SENTINEL = -1; + + static void initBucket(BucketType* bucket) { + bucket->_reserved1 = 0; + bucket->_wasSize = BucketSize; + bucket->reserved = 0; + } +}; - struct BtreeLayoutV1 { - typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType; - typedef KeyV1 KeyType; - typedef KeyV1Owned KeyOwnedType; - typedef DiskLoc56Bit LocType; - typedef BtreeBucketV1 BucketType; +struct BtreeLayoutV1 { + typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType; + typedef KeyV1 KeyType; + typedef KeyV1Owned KeyOwnedType; + typedef DiskLoc56Bit LocType; + typedef BtreeBucketV1 BucketType; - enum { BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header - BucketBodySize = BucketSize - BucketType::HeaderSize - }; + enum { + BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header + BucketBodySize = BucketSize - BucketType::HeaderSize + }; - static const int KeyMax = 1024; + static const int KeyMax = 1024; - // A sentinel value sometimes used to identify a deallocated bucket. - static const unsigned short INVALID_N_SENTINEL = 0xffff; + // A sentinel value sometimes used to identify a deallocated bucket. + static const unsigned short INVALID_N_SENTINEL = 0xffff; - static void initBucket(BucketType* bucket) { } - }; + static void initBucket(BucketType* bucket) {} +}; #pragma pack() |