summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/key_string.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/key_string.h')
-rw-r--r--src/mongo/db/storage/key_string.h500
1 files changed, 264 insertions, 236 deletions
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 04b341521bf..7ef8ee0e723 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -39,287 +39,315 @@
namespace mongo {
- class KeyString {
+class KeyString {
+public:
+ /**
+ * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
+ * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
+ * equal).
+ */
+ class TypeBits {
public:
+ // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
+ // The encoding format will need to change if we raise this limit.
+ static const uint8_t kMaxBytesNeeded = 127;
+
+ TypeBits() {
+ reset();
+ }
/**
- * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
- * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
- * equal).
+ * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
+ * BufReader in the format described on the getBuffer() method.
*/
- class TypeBits {
- public:
- // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
- // The encoding format will need to change if we raise this limit.
- static const uint8_t kMaxBytesNeeded = 127;
-
- TypeBits() { reset(); }
-
- /**
- * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
- * BufReader in the format described on the getBuffer() method.
- */
- void resetFromBuffer(BufReader* reader);
- static TypeBits fromBuffer(BufReader* reader) {
- TypeBits out;
- out.resetFromBuffer(reader);
- return out;
- }
-
- /**
- * If true, no bits have been set to one. This is true if no bits have been set at all.
- */
- bool isAllZeros() const { return _isAllZeros; }
-
- /**
- * These methods return a buffer and size which encodes all of the type bits in this
- * instance.
- *
- * Encoded format:
- * Case 1 (first byte has high bit set to 1):
- * Remaining bits of first byte encode number of follow-up bytes that are data
- * bytes. Note that _buf is always maintained in this format but these methods may
- * return one of the other formats, if possible, by skipping over the first byte.
- *
- * Case 2 (first byte is 0x0):
- * This encodes the "AllZeros" state which represents an infinite stream of bits set
- * to 0. Callers may optionally encode this case as an empty buffer if they have
- * another way to mark the end of the buffer. There are no follow-up bytes.
- *
- * Case 3 (first byte isn't 0x0 but has high bit set to 0):
- * The first byte is the only data byte. This can represent any 7-bit sequence or an
- * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
- * is 1 if the first byte is the size byte. There are no follow-up bytes.
- *
- * Within data bytes (ie everything excluding the size byte if there is one), bits are
- * packed in from low to high.
- */
- const uint8_t* getBuffer() const { return getSize() == 1 ? _buf + 1 : _buf; }
- size_t getSize() const {
- if (_isAllZeros) { // Case 2
- dassert(_buf[1] == 0);
- return 1;
- }
-
- uint8_t rawSize = getSizeByte();
- dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
- if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
- return 1;
- }
-
- return rawSize + 1; // Case 1
- }
-
- //
- // Everything below is only for use by KeyString.
- //
-
- // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
- static const uint8_t kString = 0x0;
- static const uint8_t kSymbol = 0x1;
-
- static const uint8_t kInt = 0x0;
- static const uint8_t kDouble = 0x1;
- static const uint8_t kLong = 0x2;
- static const uint8_t kNegativeZero = 0x3; // decodes as a double
-
- void reset() {
- _curBit = 0;
- _isAllZeros = true;
- setSizeByte(0);
- _buf[1] = 0;
- }
+ void resetFromBuffer(BufReader* reader);
+ static TypeBits fromBuffer(BufReader* reader) {
+ TypeBits out;
+ out.resetFromBuffer(reader);
+ return out;
+ }
- void appendString() { appendBit(kString); }
- void appendSymbol() { appendBit(kSymbol); }
+ /**
+ * If true, no bits have been set to one. This is true if no bits have been set at all.
+ */
+ bool isAllZeros() const {
+ return _isAllZeros;
+ }
- void appendNumberDouble() { appendBit(kDouble & 1); appendBit(kDouble >> 1); }
- void appendNumberInt() { appendBit(kInt & 1); appendBit(kInt >> 1); }
- void appendNumberLong() { appendBit(kLong & 1); appendBit(kLong >> 1); }
- void appendNegativeZero() {
- appendBit(kNegativeZero & 1);
- appendBit(kNegativeZero >> 1);
+ /**
+ * These methods return a buffer and size which encodes all of the type bits in this
+ * instance.
+ *
+ * Encoded format:
+ * Case 1 (first byte has high bit set to 1):
+ * Remaining bits of first byte encode number of follow-up bytes that are data
+ * bytes. Note that _buf is always maintained in this format but these methods may
+ * return one of the other formats, if possible, by skipping over the first byte.
+ *
+ * Case 2 (first byte is 0x0):
+ * This encodes the "AllZeros" state which represents an infinite stream of bits set
+ * to 0. Callers may optionally encode this case as an empty buffer if they have
+ * another way to mark the end of the buffer. There are no follow-up bytes.
+ *
+ * Case 3 (first byte isn't 0x0 but has high bit set to 0):
+ * The first byte is the only data byte. This can represent any 7-bit sequence or an
+ * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
+ * is 1 if the first byte is the size byte. There are no follow-up bytes.
+ *
+ * Within data bytes (ie everything excluding the size byte if there is one), bits are
+ * packed in from low to high.
+ */
+ const uint8_t* getBuffer() const {
+ return getSize() == 1 ? _buf + 1 : _buf;
+ }
+ size_t getSize() const {
+ if (_isAllZeros) { // Case 2
+ dassert(_buf[1] == 0);
+ return 1;
}
- class Reader {
- public:
- /**
- * Passed in TypeBits must outlive this Reader instance.
- */
- explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}
-
- uint8_t readStringLike() { return readBit(); }
- uint8_t readNumeric() {
- uint8_t lowBit = readBit();
- return lowBit | (readBit() << 1);
- }
-
- private:
- uint8_t readBit();
-
- size_t _curBit;
- const TypeBits& _typeBits;
- };
-
- private:
- /**
- * size only includes data bytes, not the size byte itself.
- */
- uint8_t getSizeByte() const { return _buf[0] & 0x3f; }
- void setSizeByte(uint8_t size) {
- dassert(size < kMaxBytesNeeded);
- _buf[0] = 0x80 | size;
+ uint8_t rawSize = getSizeByte();
+ dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
+ if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
+ return 1;
}
- void appendBit(uint8_t oneOrZero);
-
- size_t _curBit;
- bool _isAllZeros;
+ return rawSize + 1; // Case 1
+ }
- // See getBuffer()/getSize() documentation for a description of how data is encoded.
- // Currently whole buffer is copied in default copy methods. If they ever show up as hot
- // in profiling, we should add copy operations that only copy the parts of _buf that are
- // in use.
- uint8_t _buf[1/*size*/ + kMaxBytesNeeded];
- };
+ //
+ // Everything below is only for use by KeyString.
+ //
- enum Discriminator {
- kInclusive, // Anything to be stored in an index must use this.
- kExclusiveBefore,
- kExclusiveAfter,
- };
+ // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
+ static const uint8_t kString = 0x0;
+ static const uint8_t kSymbol = 0x1;
- KeyString() {}
+ static const uint8_t kInt = 0x0;
+ static const uint8_t kDouble = 0x1;
+ static const uint8_t kLong = 0x2;
+ static const uint8_t kNegativeZero = 0x3; // decodes as a double
- KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) {
- resetToKey(obj, ord, recordId);
+ void reset() {
+ _curBit = 0;
+ _isAllZeros = true;
+ setSizeByte(0);
+ _buf[1] = 0;
}
- KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) {
- resetToKey(obj, ord, discriminator);
+ void appendString() {
+ appendBit(kString);
+ }
+ void appendSymbol() {
+ appendBit(kSymbol);
}
- explicit KeyString(RecordId rid) {
- appendRecordId(rid);
+ void appendNumberDouble() {
+ appendBit(kDouble & 1);
+ appendBit(kDouble >> 1);
+ }
+ void appendNumberInt() {
+ appendBit(kInt & 1);
+ appendBit(kInt >> 1);
+ }
+ void appendNumberLong() {
+ appendBit(kLong & 1);
+ appendBit(kLong >> 1);
+ }
+ void appendNegativeZero() {
+ appendBit(kNegativeZero & 1);
+ appendBit(kNegativeZero >> 1);
}
- static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
- static BSONObj toBson(const char* buffer, size_t len, Ordering ord,
- const TypeBits& types);
+ class Reader {
+ public:
+ /**
+ * Passed in TypeBits must outlive this Reader instance.
+ */
+ explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}
- /**
- * Decodes a RecordId from the end of a buffer.
- */
- static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);
+ uint8_t readStringLike() {
+ return readBit();
+ }
+ uint8_t readNumeric() {
+ uint8_t lowBit = readBit();
+ return lowBit | (readBit() << 1);
+ }
- /**
- * Decodes a RecordId, consuming all bytes needed from reader.
- */
- static RecordId decodeRecordId(BufReader* reader);
+ private:
+ uint8_t readBit();
- void appendRecordId(RecordId loc);
- void appendTypeBits(const TypeBits& bits);
+ size_t _curBit;
+ const TypeBits& _typeBits;
+ };
+ private:
/**
- * Resets to an empty state.
- * Equivalent to but faster than *this = KeyString()
+ * size only includes data bytes, not the size byte itself.
*/
- void resetToEmpty() {
- _buffer.reset();
- _typeBits.reset();
+ uint8_t getSizeByte() const {
+ return _buf[0] & 0x3f;
}
-
- void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
- void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
- void resetFromBuffer(const void* buffer, size_t size) {
- _buffer.reset();
- memcpy(_buffer.skip(size), buffer, size);
+ void setSizeByte(uint8_t size) {
+ dassert(size < kMaxBytesNeeded);
+ _buf[0] = 0x80 | size;
}
- const char* getBuffer() const { return _buffer.buf(); }
- size_t getSize() const { return _buffer.len(); }
- bool isEmpty() const { return _buffer.len() == 0; }
-
- const TypeBits& getTypeBits() const { return _typeBits; }
+ void appendBit(uint8_t oneOrZero);
- int compare(const KeyString& other) const;
+ size_t _curBit;
+ bool _isAllZeros;
- /**
- * @return a hex encoding of this key
- */
- std::string toString() const;
-
- private:
-
- void _appendAllElementsForIndexing(const BSONObj& obj, Ordering ord,
- Discriminator discriminator);
-
- void _appendBool(bool val, bool invert);
- void _appendDate(Date_t val, bool invert);
- void _appendTimestamp(Timestamp val, bool invert);
- void _appendOID(OID val, bool invert);
- void _appendString(StringData val, bool invert);
- void _appendSymbol(StringData val, bool invert);
- void _appendCode(StringData val, bool invert);
- void _appendCodeWString(const BSONCodeWScope& val, bool invert);
- void _appendBinData(const BSONBinData& val, bool invert);
- void _appendRegex(const BSONRegEx& val, bool invert);
- void _appendDBRef(const BSONDBRef& val, bool invert);
- void _appendArray(const BSONArray& val, bool invert);
- void _appendObject(const BSONObj& val, bool invert);
- void _appendNumberDouble(const double num, bool invert);
- void _appendNumberLong(const long long num, bool invert);
- void _appendNumberInt(const int num, bool invert);
+ // See getBuffer()/getSize() documentation for a description of how data is encoded.
+ // Currently whole buffer is copied in default copy methods. If they ever show up as hot
+ // in profiling, we should add copy operations that only copy the parts of _buf that are
+ // in use.
+ uint8_t _buf[1 /*size*/ + kMaxBytesNeeded];
+ };
- /**
- * @param name - optional, can be NULL
- * if NULL, not included in encoding
- * if not NULL, put in after type, before value
- */
- void _appendBsonValue(const BSONElement& elem,
- bool invert,
- const StringData* name);
-
- void _appendStringLike(StringData str, bool invert);
- void _appendBson(const BSONObj& obj, bool invert);
- void _appendSmallDouble(double value, bool invert);
- void _appendLargeDouble(double value, bool invert);
- void _appendInteger(const long long num, bool invert);
- void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);
-
- template <typename T> void _append(const T& thing, bool invert);
- void _appendBytes(const void* source, size_t bytes, bool invert);
-
- TypeBits _typeBits;
- StackBufBuilder _buffer;
+ enum Discriminator {
+ kInclusive, // Anything to be stored in an index must use this.
+ kExclusiveBefore,
+ kExclusiveAfter,
};
- inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) < 0;
+ KeyString() {}
+
+ KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) {
+ resetToKey(obj, ord, recordId);
}
- inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) <= 0;
+ KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) {
+ resetToKey(obj, ord, discriminator);
}
- inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) == 0;
+ explicit KeyString(RecordId rid) {
+ appendRecordId(rid);
}
- inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) > 0;
+ static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
+ static BSONObj toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& types);
+
+ /**
+ * Decodes a RecordId from the end of a buffer.
+ */
+ static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);
+
+ /**
+ * Decodes a RecordId, consuming all bytes needed from reader.
+ */
+ static RecordId decodeRecordId(BufReader* reader);
+
+ void appendRecordId(RecordId loc);
+ void appendTypeBits(const TypeBits& bits);
+
+ /**
+ * Resets to an empty state.
+ * Equivalent to but faster than *this = KeyString()
+ */
+ void resetToEmpty() {
+ _buffer.reset();
+ _typeBits.reset();
}
- inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) >= 0;
+ void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
+ void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
+ void resetFromBuffer(const void* buffer, size_t size) {
+ _buffer.reset();
+ memcpy(_buffer.skip(size), buffer, size);
}
- inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
- return !(lhs == rhs);
+ const char* getBuffer() const {
+ return _buffer.buf();
+ }
+ size_t getSize() const {
+ return _buffer.len();
+ }
+ bool isEmpty() const {
+ return _buffer.len() == 0;
}
- inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
- return stream << value.toString();
+ const TypeBits& getTypeBits() const {
+ return _typeBits;
}
-} // namespace mongo
+ int compare(const KeyString& other) const;
+
+ /**
+ * @return a hex encoding of this key
+ */
+ std::string toString() const;
+
+private:
+ void _appendAllElementsForIndexing(const BSONObj& obj,
+ Ordering ord,
+ Discriminator discriminator);
+
+ void _appendBool(bool val, bool invert);
+ void _appendDate(Date_t val, bool invert);
+ void _appendTimestamp(Timestamp val, bool invert);
+ void _appendOID(OID val, bool invert);
+ void _appendString(StringData val, bool invert);
+ void _appendSymbol(StringData val, bool invert);
+ void _appendCode(StringData val, bool invert);
+ void _appendCodeWString(const BSONCodeWScope& val, bool invert);
+ void _appendBinData(const BSONBinData& val, bool invert);
+ void _appendRegex(const BSONRegEx& val, bool invert);
+ void _appendDBRef(const BSONDBRef& val, bool invert);
+ void _appendArray(const BSONArray& val, bool invert);
+ void _appendObject(const BSONObj& val, bool invert);
+ void _appendNumberDouble(const double num, bool invert);
+ void _appendNumberLong(const long long num, bool invert);
+ void _appendNumberInt(const int num, bool invert);
+
+ /**
+ * @param name - optional, can be NULL
+ * if NULL, not included in encoding
+ * if not NULL, put in after type, before value
+ */
+ void _appendBsonValue(const BSONElement& elem, bool invert, const StringData* name);
+
+ void _appendStringLike(StringData str, bool invert);
+ void _appendBson(const BSONObj& obj, bool invert);
+ void _appendSmallDouble(double value, bool invert);
+ void _appendLargeDouble(double value, bool invert);
+ void _appendInteger(const long long num, bool invert);
+ void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);
+
+ template <typename T>
+ void _append(const T& thing, bool invert);
+ void _appendBytes(const void* source, size_t bytes, bool invert);
+
+ TypeBits _typeBits;
+ StackBufBuilder _buffer;
+};
+
+inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) < 0;
+}
+
+inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) <= 0;
+}
+
+inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) == 0;
+}
+
+inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) > 0;
+}
+
+inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) >= 0;
+}
+
+inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
+ return !(lhs == rhs);
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
+ return stream << value.toString();
+}
+
+} // namespace mongo