diff options
Diffstat (limited to 'src/mongo/db/storage/key_string.cpp')
-rw-r--r-- | src/mongo/db/storage/key_string.cpp | 124 |
1 files changed, 35 insertions, 89 deletions
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index eb69641f957..17c5a90c14d 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -238,6 +238,8 @@ const uint8_t kEnd = 0x4; const uint8_t kLess = 1; const uint8_t kGreater = 254; +// The maximum length of a RecordId binary string that may be appended to a KeyString. +const int8_t kMaxRecordIdStrLen = 127; } // namespace // some utility functions @@ -515,45 +517,23 @@ void BuilderBase<BufferT>::_appendRecordIdLong(int64_t val) { template <class BufferT> void BuilderBase<BufferT>::_appendRecordIdStr(const char* str, int size) { - // Append the RecordId binary string as-is, then append the encoded binary string size. - // The binary string size is encoded in 7-bit increments over one or more size bytes. - // The 8th bit of a size byte is a continuation bit that is set on all size bytes except - // the leftmost (i.e. the last) one. This allows decoding the size right-to-left until there are - // no more size bytes remaining with continuation bits. See decodeRecordIdStrAtEnd for the - // decoding algorithm. This 7-bit size encoding ensures backward compatibility with 5.0, which - // supports RecordId binary strings up to 127 bytes, or what fits in 7 bits. - + // This encoding for RecordId binary strings stores the size at the end. This means that a + // RecordId may only be appended at the end of a KeyString. That is, it cannot be appended in + // the middle of a KeyString and also be binary-comparable. + + // The current maximum string length is 127. The high bit is reserved for future usage. + keyStringAssert(5994901, + fmt::format("cannot generate key for RecordId longer than maximum of {} bytes", + kMaxRecordIdStrLen), + size <= kMaxRecordIdStrLen); invariant(size > 0); - invariant(size <= RecordId::kBigStrMaxSize); const bool invert = false; - // Encode size - uint8_t encodedSize[kRecordIdStrEncodedSizeMaxBytes] = {0}; - int highestSizeByte = 0; - bool highestSizeByteSet = false; - - for (int sizeBytes = kRecordIdStrEncodedSizeMaxBytes - 1; sizeBytes >= 0; sizeBytes--) { - encodedSize[sizeBytes] = (size >> (sizeBytes * 7)) & 0x7F; - if (encodedSize[sizeBytes] && highestSizeByteSet == false) { - highestSizeByteSet = true; - highestSizeByte = sizeBytes; - } - } - for (int i = highestSizeByte; i > 0; i--) { - encodedSize[i] |= 0x80; - } - - const int encodedSizeLen = highestSizeByte + 1; - - // Preallocate room for the RecordId binary string and its encoded size - // to reduce the number of potential reallocs - _buffer().reserveBytes(size + encodedSizeLen); - _buffer().claimReservedBytes(size + encodedSizeLen); - - // Append RecordId and its encoded size + // String is encoded with a single byte for the size at the end. _appendBytes(str, size, invert); - _appendBytes(encodedSize, encodedSizeLen, invert); + auto encodedSize = static_cast<uint8_t>(size); + _append(encodedSize, invert); } template <class BufferT> @@ -2526,34 +2506,19 @@ size_t sizeWithoutRecordIdLongAtEnd(const void* bufferRaw, size_t bufSize) { } size_t sizeWithoutRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) { - // See decodeRecordIdStrAtEnd for the size decoding algorithm invariant(bufSize > 0); const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw); - // Decode RecordId binary string size - size_t ridSize = 0; - uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0}; - - // Continuation bytes - size_t sizeByteId = 0; - for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) { - invariant(bufSize >= sizeByteId + 1 /* non-cont bytes */); - invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); - sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F; - } - // Last (non-continuation) byte - invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); - sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId]; - - const size_t numSegments = sizeByteId + 1; - - for (; sizeByteId > 0; sizeByteId--) { - ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); - } - ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); - - invariant(bufSize >= ridSize + numSegments); - return bufSize - ridSize - numSegments; + // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved + // for future usage. + uint8_t len = buffer[bufSize - 1]; + keyStringAssert(5566400, + fmt::format("Cannot decode record id string longer than {} bytes; size is {}", + kMaxRecordIdStrLen, + len), + len <= kMaxRecordIdStrLen); + invariant(bufSize > static_cast<size_t>(len + 1)); + return bufSize - len - 1; } RecordId decodeRecordIdLong(BufReader* reader) { @@ -2571,39 +2536,20 @@ RecordId decodeRecordIdLong(BufReader* reader) { } RecordId decodeRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) { - // See _appendRecordIdStr for the encoding scheme. - // The RecordId binary string size is decoded right-to-left, up to the size byte - // without continuation bit. - invariant(bufSize > 0); const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw); - // Decode RecordId binary string size - size_t ridSize = 0; - uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0}; - - // Continuation bytes - size_t sizeByteId = 0; - for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) { - invariant(bufSize >= sizeByteId + 1 /* non-cont byte */); - invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); - sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F; - } - // Last (non-continuation) byte - invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); - sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId]; - - const size_t numSegments = sizeByteId + 1; - - for (; sizeByteId > 0; sizeByteId--) { - ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); - } - ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); - - invariant(bufSize >= ridSize + numSegments); - - return RecordId(reinterpret_cast<const char*>(buffer) + (bufSize - ridSize - numSegments), - ridSize); + // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved + // for future usage. + uint8_t len = buffer[bufSize - 1]; + keyStringAssert(5577900, + fmt::format("Cannot decode record id string longer than {} bytes; size is {}", + kMaxRecordIdStrLen, + len), + len <= kMaxRecordIdStrLen); + invariant(bufSize > len); + const uint8_t* firstBytePtr = (buffer + bufSize - len - 1); + return RecordId(reinterpret_cast<const char*>(firstBytePtr), len); } int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize) { |