summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/key_string.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/key_string.h')
-rw-r--r--src/mongo/db/storage/key_string.h110
1 files changed, 67 insertions, 43 deletions
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 31be4a833c6..7c30dcffb28 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -66,17 +66,11 @@ public:
*/
class TypeBits {
public:
- // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
- // The encoding format will need to change if we raise this limit.
- static const uint8_t kMaxBytesNeeded = 127;
+ // See comments in getBuffer() about short/long encoding schemes.
+ static const uint8_t kMaxBytesForShortEncoding = 127;
+ static const uint8_t kPrefixBytes = 5;
+ // TODO SERVER-36385: Remove this 1KB limit.
static const uint32_t kMaxKeyBytes = 1024;
- static const uint32_t kMaxTypeBitsPerDecimal = 17;
- static const uint32_t kBytesForTypeAndEmptyKey = 2;
- static const uint32_t kMaxDecimalsPerKey =
- kMaxKeyBytes / (sizeof(Decimal128::Value) + kBytesForTypeAndEmptyKey);
- MONGO_STATIC_ASSERT_MSG(
- kMaxTypeBitsPerDecimal* kMaxDecimalsPerKey < kMaxBytesNeeded * 8UL,
- "encoding needs change to contain all type bits for worst case key");
static const uint8_t kStoredDecimalExponentBits = 6;
static const uint32_t kStoredDecimalExponentMask = (1U << kStoredDecimalExponentBits) - 1;
@@ -84,6 +78,15 @@ public:
reset();
}
+ TypeBits(const TypeBits& tb)
+ : version(tb.version), _curBit(tb._curBit), _isAllZeros(tb._isAllZeros) {
+ _buf.reset();
+ _buf.appendBuf(tb._buf.buf(), tb._buf.len());
+ }
+ TypeBits& operator=(const TypeBits& tb) = delete;
+ TypeBits(TypeBits&&) = default;
+ TypeBits& operator=(TypeBits&&) = delete;
+
/**
* If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
* BufReader in the format described on the getBuffer() method.
@@ -107,40 +110,59 @@ public:
* instance.
*
* Encoded format:
- * Case 1 (first byte has high bit set to 1):
- * Remaining bits of first byte encode number of follow-up bytes that are data
- * bytes. Note that _buf is always maintained in this format but these methods may
- * return one of the other formats, if possible, by skipping over the first byte.
- *
- * Case 2 (first byte is 0x0):
+ * Case 1 (first byte is 0x0):
* This encodes the "AllZeros" state which represents an infinite stream of bits set
* to 0. Callers may optionally encode this case as an empty buffer if they have
* another way to mark the end of the buffer. There are no follow-up bytes.
*
- * Case 3 (first byte isn't 0x0 but has high bit set to 0):
+ * Case 2 (first byte isn't 0x0 but has high bit set to 0):
* The first byte is the only data byte. This can represent any 7-bit sequence or an
* 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
* is 1 if the first byte is the size byte. There are no follow-up bytes.
*
+ * Case 3 (first byte has high bit set to 1 but it's not 0x80):
+ * Remaining bits of first byte encode number of follow-up bytes that are data
+ * bytes.
+ *
+ * Case 4 (first byte is 0x80)
+ * The first byte is the signal byte indicating that this TypeBits is encoded with long
+ * encoding scheme: the next four bytes (in little endian order) represent the number of
+ * data bytes.
+ *
* Within data bytes (ie everything excluding the size byte if there is one), bits are
* packed in from low to high.
*/
- const uint8_t* getBuffer() const {
- return getSize() == 1 ? _buf + 1 : _buf;
+ const char* getBuffer() const {
+ if (_isAllZeros)
+ return ""; // Case 1: pointer to a zero byte.
+
+ if (getSize() == 1)
+ return getDataBuffer(); // Case 2: all bits in one byte; no size byte.
+
+ // Case 3 & 4: size byte(s) + data bytes.
+ return isLongEncoding() ? _buf.buf() : (getDataBuffer() - 1);
}
size_t getSize() const {
- if (_isAllZeros) { // Case 2
- dassert(_buf[1] == 0);
+ if (_isAllZeros) { // Case 1
+ dassert(getDataBufferLen() == 0 || getDataBuffer()[0] == 0);
return 1;
}
- uint8_t rawSize = getSizeByte();
- dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
- if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
+ uint32_t rawSize = getDataBufferLen();
+ dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
+ if (rawSize > kMaxBytesForShortEncoding) { // Case 4
+ return rawSize + kPrefixBytes;
+ }
+ if (rawSize == 1 && !(getDataBuffer()[0] & 0x80)) { // Case 2
return 1;
}
- return rawSize + 1; // Case 1
+ return rawSize + 1; // Case 3
+ }
+
+ bool isLongEncoding() const {
+ // TypeBits with all zeros is in short encoding regardless of the data buffer length.
+ return !_isAllZeros && getDataBufferLen() > kMaxBytesForShortEncoding;
}
//
@@ -176,8 +198,7 @@ public:
void reset() {
_curBit = 0;
_isAllZeros = true;
- setSizeByte(0);
- _buf[1] = 0;
+ _buf.setlen(kPrefixBytes);
}
void appendString() {
@@ -240,29 +261,32 @@ public:
const Version version;
private:
- /**
- * size only includes data bytes, not the size byte itself.
- */
- uint8_t getSizeByte() const {
- return _buf[0] & 0x7f;
+ static uint32_t readSizeFromBuffer(BufReader* reader);
+
+ void setRawSize(uint32_t size);
+
+ const char* getDataBuffer() const {
+ return _buf.buf() + kPrefixBytes;
+ }
+ char* getDataBuffer() {
+ return _buf.buf() + kPrefixBytes;
}
- void setSizeByte(uint8_t size) {
- // This error can only occur in cases where the key is not only too long, but also
- // has too many fields requiring type bits.
- uassert(ErrorCodes::KeyTooLong, "The key is too long", size < kMaxBytesNeeded);
- _buf[0] = 0x80 | size;
+ uint32_t getDataBufferLen() const {
+ return _buf.len() - kPrefixBytes;
}
void appendBit(uint8_t oneOrZero);
size_t _curBit;
bool _isAllZeros;
-
- // See getBuffer()/getSize() documentation for a description of how data is encoded.
- // Currently whole buffer is copied in default copy methods. If they ever show up as hot
- // in profiling, we should add copy operations that only copy the parts of _buf that are
- // in use.
- uint8_t _buf[1 /*size*/ + kMaxBytesNeeded];
+ /**
+ * See getBuffer()/getSize() documentation for a description of how data is encoded. When
+ * the TypeBits size is in short encoding range(<=127), the bytes starting from the fifth
+ * byte are the complete TypeBits in short encoding scheme (1 size byte + data bytes). When
+ * the TypeBits size is in long encoding range(>127), all the bytes are used for the long
+ * encoding format (first byte + 4 size bytes + data bytes).
+ */
+ StackBufBuilder _buf;
};
enum Discriminator {