diff options
Diffstat (limited to 'src/mongo/db/storage/key_string.h')
-rw-r--r-- | src/mongo/db/storage/key_string.h | 500 |
1 files changed, 264 insertions, 236 deletions
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 04b341521bf..7ef8ee0e723 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -39,287 +39,315 @@ namespace mongo { - class KeyString { +class KeyString { +public: + /** + * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be + * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as + * equal). + */ + class TypeBits { public: + // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB. + // The encoding format will need to change if we raise this limit. + static const uint8_t kMaxBytesNeeded = 127; + + TypeBits() { + reset(); + } /** - * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be - * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as - * equal). + * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the + * BufReader in the format described on the getBuffer() method. */ - class TypeBits { - public: - // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB. - // The encoding format will need to change if we raise this limit. - static const uint8_t kMaxBytesNeeded = 127; - - TypeBits() { reset(); } - - /** - * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the - * BufReader in the format described on the getBuffer() method. - */ - void resetFromBuffer(BufReader* reader); - static TypeBits fromBuffer(BufReader* reader) { - TypeBits out; - out.resetFromBuffer(reader); - return out; - } - - /** - * If true, no bits have been set to one. This is true if no bits have been set at all. - */ - bool isAllZeros() const { return _isAllZeros; } - - /** - * These methods return a buffer and size which encodes all of the type bits in this - * instance. - * - * Encoded format: - * Case 1 (first byte has high bit set to 1): - * Remaining bits of first byte encode number of follow-up bytes that are data - * bytes. Note that _buf is always maintained in this format but these methods may - * return one of the other formats, if possible, by skipping over the first byte. - * - * Case 2 (first byte is 0x0): - * This encodes the "AllZeros" state which represents an infinite stream of bits set - * to 0. Callers may optionally encode this case as an empty buffer if they have - * another way to mark the end of the buffer. There are no follow-up bytes. - * - * Case 3 (first byte isn't 0x0 but has high bit set to 0): - * The first byte is the only data byte. This can represent any 7-bit sequence or an - * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that - * is 1 if the first byte is the size byte. There are no follow-up bytes. - * - * Within data bytes (ie everything excluding the size byte if there is one), bits are - * packed in from low to high. - */ - const uint8_t* getBuffer() const { return getSize() == 1 ? _buf + 1 : _buf; } - size_t getSize() const { - if (_isAllZeros) { // Case 2 - dassert(_buf[1] == 0); - return 1; - } - - uint8_t rawSize = getSizeByte(); - dassert(rawSize >= 1); // 0 should be handled as isAllZeros. - if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3 - return 1; - } - - return rawSize + 1; // Case 1 - } - - // - // Everything below is only for use by KeyString. - // - - // Note: No space is used if all bits are 0 so the most common cases should be 0x0. - static const uint8_t kString = 0x0; - static const uint8_t kSymbol = 0x1; - - static const uint8_t kInt = 0x0; - static const uint8_t kDouble = 0x1; - static const uint8_t kLong = 0x2; - static const uint8_t kNegativeZero = 0x3; // decodes as a double - - void reset() { - _curBit = 0; - _isAllZeros = true; - setSizeByte(0); - _buf[1] = 0; - } + void resetFromBuffer(BufReader* reader); + static TypeBits fromBuffer(BufReader* reader) { + TypeBits out; + out.resetFromBuffer(reader); + return out; + } - void appendString() { appendBit(kString); } - void appendSymbol() { appendBit(kSymbol); } + /** + * If true, no bits have been set to one. This is true if no bits have been set at all. + */ + bool isAllZeros() const { + return _isAllZeros; + } - void appendNumberDouble() { appendBit(kDouble & 1); appendBit(kDouble >> 1); } - void appendNumberInt() { appendBit(kInt & 1); appendBit(kInt >> 1); } - void appendNumberLong() { appendBit(kLong & 1); appendBit(kLong >> 1); } - void appendNegativeZero() { - appendBit(kNegativeZero & 1); - appendBit(kNegativeZero >> 1); + /** + * These methods return a buffer and size which encodes all of the type bits in this + * instance. + * + * Encoded format: + * Case 1 (first byte has high bit set to 1): + * Remaining bits of first byte encode number of follow-up bytes that are data + * bytes. Note that _buf is always maintained in this format but these methods may + * return one of the other formats, if possible, by skipping over the first byte. + * + * Case 2 (first byte is 0x0): + * This encodes the "AllZeros" state which represents an infinite stream of bits set + * to 0. Callers may optionally encode this case as an empty buffer if they have + * another way to mark the end of the buffer. There are no follow-up bytes. + * + * Case 3 (first byte isn't 0x0 but has high bit set to 0): + * The first byte is the only data byte. This can represent any 7-bit sequence or an + * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that + * is 1 if the first byte is the size byte. There are no follow-up bytes. + * + * Within data bytes (ie everything excluding the size byte if there is one), bits are + * packed in from low to high. + */ + const uint8_t* getBuffer() const { + return getSize() == 1 ? _buf + 1 : _buf; + } + size_t getSize() const { + if (_isAllZeros) { // Case 2 + dassert(_buf[1] == 0); + return 1; } - class Reader { - public: - /** - * Passed in TypeBits must outlive this Reader instance. - */ - explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {} - - uint8_t readStringLike() { return readBit(); } - uint8_t readNumeric() { - uint8_t lowBit = readBit(); - return lowBit | (readBit() << 1); - } - - private: - uint8_t readBit(); - - size_t _curBit; - const TypeBits& _typeBits; - }; - - private: - /** - * size only includes data bytes, not the size byte itself. - */ - uint8_t getSizeByte() const { return _buf[0] & 0x3f; } - void setSizeByte(uint8_t size) { - dassert(size < kMaxBytesNeeded); - _buf[0] = 0x80 | size; + uint8_t rawSize = getSizeByte(); + dassert(rawSize >= 1); // 0 should be handled as isAllZeros. + if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3 + return 1; } - void appendBit(uint8_t oneOrZero); - - size_t _curBit; - bool _isAllZeros; + return rawSize + 1; // Case 1 + } - // See getBuffer()/getSize() documentation for a description of how data is encoded. - // Currently whole buffer is copied in default copy methods. If they ever show up as hot - // in profiling, we should add copy operations that only copy the parts of _buf that are - // in use. - uint8_t _buf[1/*size*/ + kMaxBytesNeeded]; - }; + // + // Everything below is only for use by KeyString. + // - enum Discriminator { - kInclusive, // Anything to be stored in an index must use this. - kExclusiveBefore, - kExclusiveAfter, - }; + // Note: No space is used if all bits are 0 so the most common cases should be 0x0. + static const uint8_t kString = 0x0; + static const uint8_t kSymbol = 0x1; - KeyString() {} + static const uint8_t kInt = 0x0; + static const uint8_t kDouble = 0x1; + static const uint8_t kLong = 0x2; + static const uint8_t kNegativeZero = 0x3; // decodes as a double - KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) { - resetToKey(obj, ord, recordId); + void reset() { + _curBit = 0; + _isAllZeros = true; + setSizeByte(0); + _buf[1] = 0; } - KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) { - resetToKey(obj, ord, discriminator); + void appendString() { + appendBit(kString); + } + void appendSymbol() { + appendBit(kSymbol); } - explicit KeyString(RecordId rid) { - appendRecordId(rid); + void appendNumberDouble() { + appendBit(kDouble & 1); + appendBit(kDouble >> 1); + } + void appendNumberInt() { + appendBit(kInt & 1); + appendBit(kInt >> 1); + } + void appendNumberLong() { + appendBit(kLong & 1); + appendBit(kLong >> 1); + } + void appendNegativeZero() { + appendBit(kNegativeZero & 1); + appendBit(kNegativeZero >> 1); } - static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types); - static BSONObj toBson(const char* buffer, size_t len, Ordering ord, - const TypeBits& types); + class Reader { + public: + /** + * Passed in TypeBits must outlive this Reader instance. + */ + explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {} - /** - * Decodes a RecordId from the end of a buffer. - */ - static RecordId decodeRecordIdAtEnd(const void* buf, size_t size); + uint8_t readStringLike() { + return readBit(); + } + uint8_t readNumeric() { + uint8_t lowBit = readBit(); + return lowBit | (readBit() << 1); + } - /** - * Decodes a RecordId, consuming all bytes needed from reader. - */ - static RecordId decodeRecordId(BufReader* reader); + private: + uint8_t readBit(); - void appendRecordId(RecordId loc); - void appendTypeBits(const TypeBits& bits); + size_t _curBit; + const TypeBits& _typeBits; + }; + private: /** - * Resets to an empty state. - * Equivalent to but faster than *this = KeyString() + * size only includes data bytes, not the size byte itself. */ - void resetToEmpty() { - _buffer.reset(); - _typeBits.reset(); + uint8_t getSizeByte() const { + return _buf[0] & 0x3f; } - - void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId); - void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive); - void resetFromBuffer(const void* buffer, size_t size) { - _buffer.reset(); - memcpy(_buffer.skip(size), buffer, size); + void setSizeByte(uint8_t size) { + dassert(size < kMaxBytesNeeded); + _buf[0] = 0x80 | size; } - const char* getBuffer() const { return _buffer.buf(); } - size_t getSize() const { return _buffer.len(); } - bool isEmpty() const { return _buffer.len() == 0; } - - const TypeBits& getTypeBits() const { return _typeBits; } + void appendBit(uint8_t oneOrZero); - int compare(const KeyString& other) const; + size_t _curBit; + bool _isAllZeros; - /** - * @return a hex encoding of this key - */ - std::string toString() const; - - private: - - void _appendAllElementsForIndexing(const BSONObj& obj, Ordering ord, - Discriminator discriminator); - - void _appendBool(bool val, bool invert); - void _appendDate(Date_t val, bool invert); - void _appendTimestamp(Timestamp val, bool invert); - void _appendOID(OID val, bool invert); - void _appendString(StringData val, bool invert); - void _appendSymbol(StringData val, bool invert); - void _appendCode(StringData val, bool invert); - void _appendCodeWString(const BSONCodeWScope& val, bool invert); - void _appendBinData(const BSONBinData& val, bool invert); - void _appendRegex(const BSONRegEx& val, bool invert); - void _appendDBRef(const BSONDBRef& val, bool invert); - void _appendArray(const BSONArray& val, bool invert); - void _appendObject(const BSONObj& val, bool invert); - void _appendNumberDouble(const double num, bool invert); - void _appendNumberLong(const long long num, bool invert); - void _appendNumberInt(const int num, bool invert); + // See getBuffer()/getSize() documentation for a description of how data is encoded. + // Currently whole buffer is copied in default copy methods. If they ever show up as hot + // in profiling, we should add copy operations that only copy the parts of _buf that are + // in use. + uint8_t _buf[1 /*size*/ + kMaxBytesNeeded]; + }; - /** - * @param name - optional, can be NULL - * if NULL, not included in encoding - * if not NULL, put in after type, before value - */ - void _appendBsonValue(const BSONElement& elem, - bool invert, - const StringData* name); - - void _appendStringLike(StringData str, bool invert); - void _appendBson(const BSONObj& obj, bool invert); - void _appendSmallDouble(double value, bool invert); - void _appendLargeDouble(double value, bool invert); - void _appendInteger(const long long num, bool invert); - void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert); - - template <typename T> void _append(const T& thing, bool invert); - void _appendBytes(const void* source, size_t bytes, bool invert); - - TypeBits _typeBits; - StackBufBuilder _buffer; + enum Discriminator { + kInclusive, // Anything to be stored in an index must use this. + kExclusiveBefore, + kExclusiveAfter, }; - inline bool operator<(const KeyString& lhs, const KeyString& rhs) { - return lhs.compare(rhs) < 0; + KeyString() {} + + KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) { + resetToKey(obj, ord, recordId); } - inline bool operator<=(const KeyString& lhs, const KeyString& rhs) { - return lhs.compare(rhs) <= 0; + KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) { + resetToKey(obj, ord, discriminator); } - inline bool operator==(const KeyString& lhs, const KeyString& rhs) { - return lhs.compare(rhs) == 0; + explicit KeyString(RecordId rid) { + appendRecordId(rid); } - inline bool operator>(const KeyString& lhs, const KeyString& rhs) { - return lhs.compare(rhs) > 0; + static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types); + static BSONObj toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& types); + + /** + * Decodes a RecordId from the end of a buffer. + */ + static RecordId decodeRecordIdAtEnd(const void* buf, size_t size); + + /** + * Decodes a RecordId, consuming all bytes needed from reader. + */ + static RecordId decodeRecordId(BufReader* reader); + + void appendRecordId(RecordId loc); + void appendTypeBits(const TypeBits& bits); + + /** + * Resets to an empty state. + * Equivalent to but faster than *this = KeyString() + */ + void resetToEmpty() { + _buffer.reset(); + _typeBits.reset(); } - inline bool operator>=(const KeyString& lhs, const KeyString& rhs) { - return lhs.compare(rhs) >= 0; + void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId); + void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive); + void resetFromBuffer(const void* buffer, size_t size) { + _buffer.reset(); + memcpy(_buffer.skip(size), buffer, size); } - inline bool operator!=(const KeyString& lhs, const KeyString& rhs) { - return !(lhs == rhs); + const char* getBuffer() const { + return _buffer.buf(); + } + size_t getSize() const { + return _buffer.len(); + } + bool isEmpty() const { + return _buffer.len() == 0; } - inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) { - return stream << value.toString(); + const TypeBits& getTypeBits() const { + return _typeBits; } -} // namespace mongo + int compare(const KeyString& other) const; + + /** + * @return a hex encoding of this key + */ + std::string toString() const; + +private: + void _appendAllElementsForIndexing(const BSONObj& obj, + Ordering ord, + Discriminator discriminator); + + void _appendBool(bool val, bool invert); + void _appendDate(Date_t val, bool invert); + void _appendTimestamp(Timestamp val, bool invert); + void _appendOID(OID val, bool invert); + void _appendString(StringData val, bool invert); + void _appendSymbol(StringData val, bool invert); + void _appendCode(StringData val, bool invert); + void _appendCodeWString(const BSONCodeWScope& val, bool invert); + void _appendBinData(const BSONBinData& val, bool invert); + void _appendRegex(const BSONRegEx& val, bool invert); + void _appendDBRef(const BSONDBRef& val, bool invert); + void _appendArray(const BSONArray& val, bool invert); + void _appendObject(const BSONObj& val, bool invert); + void _appendNumberDouble(const double num, bool invert); + void _appendNumberLong(const long long num, bool invert); + void _appendNumberInt(const int num, bool invert); + + /** + * @param name - optional, can be NULL + * if NULL, not included in encoding + * if not NULL, put in after type, before value + */ + void _appendBsonValue(const BSONElement& elem, bool invert, const StringData* name); + + void _appendStringLike(StringData str, bool invert); + void _appendBson(const BSONObj& obj, bool invert); + void _appendSmallDouble(double value, bool invert); + void _appendLargeDouble(double value, bool invert); + void _appendInteger(const long long num, bool invert); + void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert); + + template <typename T> + void _append(const T& thing, bool invert); + void _appendBytes(const void* source, size_t bytes, bool invert); + + TypeBits _typeBits; + StackBufBuilder _buffer; +}; + +inline bool operator<(const KeyString& lhs, const KeyString& rhs) { + return lhs.compare(rhs) < 0; +} + +inline bool operator<=(const KeyString& lhs, const KeyString& rhs) { + return lhs.compare(rhs) <= 0; +} + +inline bool operator==(const KeyString& lhs, const KeyString& rhs) { + return lhs.compare(rhs) == 0; +} + +inline bool operator>(const KeyString& lhs, const KeyString& rhs) { + return lhs.compare(rhs) > 0; +} + +inline bool operator>=(const KeyString& lhs, const KeyString& rhs) { + return lhs.compare(rhs) >= 0; +} + +inline bool operator!=(const KeyString& lhs, const KeyString& rhs) { + return !(lhs == rhs); +} + +inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) { + return stream << value.toString(); +} + +} // namespace mongo |