summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/key_string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/key_string.cpp')
-rw-r--r--src/mongo/db/storage/key_string.cpp124
1 files changed, 35 insertions, 89 deletions
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index eb69641f957..17c5a90c14d 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -238,6 +238,8 @@ const uint8_t kEnd = 0x4;
const uint8_t kLess = 1;
const uint8_t kGreater = 254;
+// The maximum length of a RecordId binary string that may be appended to a KeyString.
+const int8_t kMaxRecordIdStrLen = 127;
} // namespace
// some utility functions
@@ -515,45 +517,23 @@ void BuilderBase<BufferT>::_appendRecordIdLong(int64_t val) {
template <class BufferT>
void BuilderBase<BufferT>::_appendRecordIdStr(const char* str, int size) {
- // Append the RecordId binary string as-is, then append the encoded binary string size.
- // The binary string size is encoded in 7-bit increments over one or more size bytes.
- // The 8th bit of a size byte is a continuation bit that is set on all size bytes except
- // the leftmost (i.e. the last) one. This allows decoding the size right-to-left until there are
- // no more size bytes remaining with continuation bits. See decodeRecordIdStrAtEnd for the
- // decoding algorithm. This 7-bit size encoding ensures backward compatibility with 5.0, which
- // supports RecordId binary strings up to 127 bytes, or what fits in 7 bits.
-
+ // This encoding for RecordId binary strings stores the size at the end. This means that a
+ // RecordId may only be appended at the end of a KeyString. That is, it cannot be appended in
+ // the middle of a KeyString and also be binary-comparable.
+
+ // The current maximum string length is 127. The high bit is reserved for future usage.
+ keyStringAssert(5994901,
+ fmt::format("cannot generate key for RecordId longer than maximum of {} bytes",
+ kMaxRecordIdStrLen),
+ size <= kMaxRecordIdStrLen);
invariant(size > 0);
- invariant(size <= RecordId::kBigStrMaxSize);
const bool invert = false;
- // Encode size
- uint8_t encodedSize[kRecordIdStrEncodedSizeMaxBytes] = {0};
- int highestSizeByte = 0;
- bool highestSizeByteSet = false;
-
- for (int sizeBytes = kRecordIdStrEncodedSizeMaxBytes - 1; sizeBytes >= 0; sizeBytes--) {
- encodedSize[sizeBytes] = (size >> (sizeBytes * 7)) & 0x7F;
- if (encodedSize[sizeBytes] && highestSizeByteSet == false) {
- highestSizeByteSet = true;
- highestSizeByte = sizeBytes;
- }
- }
- for (int i = highestSizeByte; i > 0; i--) {
- encodedSize[i] |= 0x80;
- }
-
- const int encodedSizeLen = highestSizeByte + 1;
-
- // Preallocate room for the RecordId binary string and its encoded size
- // to reduce the number of potential reallocs
- _buffer().reserveBytes(size + encodedSizeLen);
- _buffer().claimReservedBytes(size + encodedSizeLen);
-
- // Append RecordId and its encoded size
+ // String is encoded with a single byte for the size at the end.
_appendBytes(str, size, invert);
- _appendBytes(encodedSize, encodedSizeLen, invert);
+ auto encodedSize = static_cast<uint8_t>(size);
+ _append(encodedSize, invert);
}
template <class BufferT>
@@ -2526,34 +2506,19 @@ size_t sizeWithoutRecordIdLongAtEnd(const void* bufferRaw, size_t bufSize) {
}
size_t sizeWithoutRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
- // See decodeRecordIdStrAtEnd for the size decoding algorithm
invariant(bufSize > 0);
const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
- // Decode RecordId binary string size
- size_t ridSize = 0;
- uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
-
- // Continuation bytes
- size_t sizeByteId = 0;
- for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
- invariant(bufSize >= sizeByteId + 1 /* non-cont bytes */);
- invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
- sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
- }
- // Last (non-continuation) byte
- invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
- sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
-
- const size_t numSegments = sizeByteId + 1;
-
- for (; sizeByteId > 0; sizeByteId--) {
- ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
- }
- ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
-
- invariant(bufSize >= ridSize + numSegments);
- return bufSize - ridSize - numSegments;
+ // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
+ // for future usage.
+ uint8_t len = buffer[bufSize - 1];
+ keyStringAssert(5566400,
+ fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
+ kMaxRecordIdStrLen,
+ len),
+ len <= kMaxRecordIdStrLen);
+ invariant(bufSize > static_cast<size_t>(len + 1));
+ return bufSize - len - 1;
}
RecordId decodeRecordIdLong(BufReader* reader) {
@@ -2571,39 +2536,20 @@ RecordId decodeRecordIdLong(BufReader* reader) {
}
RecordId decodeRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
- // See _appendRecordIdStr for the encoding scheme.
- // The RecordId binary string size is decoded right-to-left, up to the size byte
- // without continuation bit.
-
invariant(bufSize > 0);
const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
- // Decode RecordId binary string size
- size_t ridSize = 0;
- uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
-
- // Continuation bytes
- size_t sizeByteId = 0;
- for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
- invariant(bufSize >= sizeByteId + 1 /* non-cont byte */);
- invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
- sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
- }
- // Last (non-continuation) byte
- invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
- sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
-
- const size_t numSegments = sizeByteId + 1;
-
- for (; sizeByteId > 0; sizeByteId--) {
- ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
- }
- ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
-
- invariant(bufSize >= ridSize + numSegments);
-
- return RecordId(reinterpret_cast<const char*>(buffer) + (bufSize - ridSize - numSegments),
- ridSize);
+ // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
+ // for future usage.
+ uint8_t len = buffer[bufSize - 1];
+ keyStringAssert(5577900,
+ fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
+ kMaxRecordIdStrLen,
+ len),
+ len <= kMaxRecordIdStrLen);
+ invariant(bufSize > len);
+ const uint8_t* firstBytePtr = (buffer + bufSize - len - 1);
+ return RecordId(reinterpret_cast<const char*>(firstBytePtr), len);
}
int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize) {