diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/bson/util/builder.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.cpp | 124 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.h | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string_bm.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string_test.cpp | 145 |
5 files changed, 270 insertions, 36 deletions
diff --git a/src/mongo/bson/util/builder.h b/src/mongo/bson/util/builder.h index e2a7fc8d74c..58a09f3cbc9 100644 --- a/src/mongo/bson/util/builder.h +++ b/src/mongo/bson/util/builder.h @@ -410,7 +410,7 @@ public: } /** - * Reserve room for some number of bytes to be claimed at a later time. + * Reserve room for some number of bytes to be claimed at a later time via claimReservedBytes. */ void reserveBytes(size_t bytes) { size_t minSize = l + reservedBytes + bytes; diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index 17c5a90c14d..eb69641f957 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -238,8 +238,6 @@ const uint8_t kEnd = 0x4; const uint8_t kLess = 1; const uint8_t kGreater = 254; -// The maximum length of a RecordId binary string that may be appended to a KeyString. -const int8_t kMaxRecordIdStrLen = 127; } // namespace // some utility functions @@ -517,23 +515,45 @@ void BuilderBase<BufferT>::_appendRecordIdLong(int64_t val) { template <class BufferT> void BuilderBase<BufferT>::_appendRecordIdStr(const char* str, int size) { - // This encoding for RecordId binary strings stores the size at the end. This means that a - // RecordId may only be appended at the end of a KeyString. That is, it cannot be appended in - // the middle of a KeyString and also be binary-comparable. - - // The current maximum string length is 127. The high bit is reserved for future usage. - keyStringAssert(5994901, - fmt::format("cannot generate key for RecordId longer than maximum of {} bytes", - kMaxRecordIdStrLen), - size <= kMaxRecordIdStrLen); + // Append the RecordId binary string as-is, then append the encoded binary string size. + // The binary string size is encoded in 7-bit increments over one or more size bytes. + // The 8th bit of a size byte is a continuation bit that is set on all size bytes except + // the leftmost (i.e. the last) one. This allows decoding the size right-to-left until there are + // no more size bytes remaining with continuation bits. See decodeRecordIdStrAtEnd for the + // decoding algorithm. This 7-bit size encoding ensures backward compatibility with 5.0, which + // supports RecordId binary strings up to 127 bytes, or what fits in 7 bits. + invariant(size > 0); + invariant(size <= RecordId::kBigStrMaxSize); const bool invert = false; - // String is encoded with a single byte for the size at the end. + // Encode size + uint8_t encodedSize[kRecordIdStrEncodedSizeMaxBytes] = {0}; + int highestSizeByte = 0; + bool highestSizeByteSet = false; + + for (int sizeBytes = kRecordIdStrEncodedSizeMaxBytes - 1; sizeBytes >= 0; sizeBytes--) { + encodedSize[sizeBytes] = (size >> (sizeBytes * 7)) & 0x7F; + if (encodedSize[sizeBytes] && highestSizeByteSet == false) { + highestSizeByteSet = true; + highestSizeByte = sizeBytes; + } + } + for (int i = highestSizeByte; i > 0; i--) { + encodedSize[i] |= 0x80; + } + + const int encodedSizeLen = highestSizeByte + 1; + + // Preallocate room for the RecordId binary string and its encoded size + // to reduce the number of potential reallocs + _buffer().reserveBytes(size + encodedSizeLen); + _buffer().claimReservedBytes(size + encodedSizeLen); + + // Append RecordId and its encoded size _appendBytes(str, size, invert); - auto encodedSize = static_cast<uint8_t>(size); - _append(encodedSize, invert); + _appendBytes(encodedSize, encodedSizeLen, invert); } template <class BufferT> @@ -2506,19 +2526,34 @@ size_t sizeWithoutRecordIdLongAtEnd(const void* bufferRaw, size_t bufSize) { } size_t sizeWithoutRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) { + // See decodeRecordIdStrAtEnd for the size decoding algorithm invariant(bufSize > 0); const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw); - // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved - // for future usage. - uint8_t len = buffer[bufSize - 1]; - keyStringAssert(5566400, - fmt::format("Cannot decode record id string longer than {} bytes; size is {}", - kMaxRecordIdStrLen, - len), - len <= kMaxRecordIdStrLen); - invariant(bufSize > static_cast<size_t>(len + 1)); - return bufSize - len - 1; + // Decode RecordId binary string size + size_t ridSize = 0; + uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0}; + + // Continuation bytes + size_t sizeByteId = 0; + for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) { + invariant(bufSize >= sizeByteId + 1 /* non-cont bytes */); + invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); + sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F; + } + // Last (non-continuation) byte + invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); + sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId]; + + const size_t numSegments = sizeByteId + 1; + + for (; sizeByteId > 0; sizeByteId--) { + ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); + } + ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); + + invariant(bufSize >= ridSize + numSegments); + return bufSize - ridSize - numSegments; } RecordId decodeRecordIdLong(BufReader* reader) { @@ -2536,20 +2571,39 @@ RecordId decodeRecordIdLong(BufReader* reader) { } RecordId decodeRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) { + // See _appendRecordIdStr for the encoding scheme. + // The RecordId binary string size is decoded right-to-left, up to the size byte + // without continuation bit. + invariant(bufSize > 0); const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw); - // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved - // for future usage. - uint8_t len = buffer[bufSize - 1]; - keyStringAssert(5577900, - fmt::format("Cannot decode record id string longer than {} bytes; size is {}", - kMaxRecordIdStrLen, - len), - len <= kMaxRecordIdStrLen); - invariant(bufSize > len); - const uint8_t* firstBytePtr = (buffer + bufSize - len - 1); - return RecordId(reinterpret_cast<const char*>(firstBytePtr), len); + // Decode RecordId binary string size + size_t ridSize = 0; + uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0}; + + // Continuation bytes + size_t sizeByteId = 0; + for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) { + invariant(bufSize >= sizeByteId + 1 /* non-cont byte */); + invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); + sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F; + } + // Last (non-continuation) byte + invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes); + sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId]; + + const size_t numSegments = sizeByteId + 1; + + for (; sizeByteId > 0; sizeByteId--) { + ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); + } + ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7); + + invariant(bufSize >= ridSize + numSegments); + + return RecordId(reinterpret_cast<const char*>(buffer) + (bufSize - ridSize - numSegments), + ridSize); } int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize) { diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 2484ac83b1d..87a6b74c742 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -62,6 +62,12 @@ static StringData keyStringVersionToString(Version version) { static const Ordering ALL_ASCENDING = Ordering::make(BSONObj()); +// Encode the size of a RecordId binary string using up to 4 bytes, 7 bits per byte. +// This supports encoding sizes that fit into 28 bits, which largely covers the +// maximum BSON size. +static const int kRecordIdStrEncodedSizeMaxBytes = 4; +MONGO_STATIC_ASSERT(RecordId::kBigStrMaxSize < 1 << (7 * kRecordIdStrEncodedSizeMaxBytes)); + /** * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as diff --git a/src/mongo/db/storage/key_string_bm.cpp b/src/mongo/db/storage/key_string_bm.cpp index e03e48222e0..263a6030ec5 100644 --- a/src/mongo/db/storage/key_string_bm.cpp +++ b/src/mongo/db/storage/key_string_bm.cpp @@ -207,6 +207,26 @@ void BM_KeyStringStackBuilderCopy(benchmark::State& state, BsonValueType bsonTyp state.SetItemsProcessed(state.iterations() * kSampleSize); } +void BM_KeyStringRecordIdStrAppend(benchmark::State& state, const size_t size) { + const auto buf = std::string(size, 'a'); + auto rid = RecordId(buf.c_str(), size); + for (auto _ : state) { + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(KeyString::Builder(KeyString::Version::V1, rid)); + } +} + +void BM_KeyStringRecordIdStrDecode(benchmark::State& state, const size_t size) { + const auto buf = std::string(size, 'a'); + KeyString::Builder ks(KeyString::Version::V1, RecordId(buf.c_str(), size)); + auto ksBuf = ks.getBuffer(); + auto ksSize = ks.getSize(); + for (auto _ : state) { + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(KeyString::decodeRecordIdStrAtEnd(ksBuf, ksSize)); + } +} + BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Int, INT); BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Double, DOUBLE); BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Decimal, DECIMAL); @@ -245,5 +265,14 @@ BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_String, KeyString::Version::V1, STRING) BENCHMARK_CAPTURE(BM_KeyStringToBSON, V0_Array, KeyString::Version::V0, ARRAY); BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_Array, KeyString::Version::V1, ARRAY); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 16B, 16); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 512B, 512); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1kB, 1024); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1MB, 1024 * 1024); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 16B, 16); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 512B, 512); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1kB, 1024); +BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1MB, 1024 * 1024); + } // namespace } // namespace mongo diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp index 9567fa9e128..fc58aadbaf3 100644 --- a/src/mongo/db/storage/key_string_test.cpp +++ b/src/mongo/db/storage/key_string_test.cpp @@ -1341,6 +1341,151 @@ TEST_F(KeyStringBuilderTest, RecordIdStr) { } } +namespace { + +RecordId ridFromStr(const char* str, size_t len) { + KeyString::Builder builder(KeyString::Version::kLatestVersion); + builder.appendString(mongo::StringData(str, len)); + return RecordId(builder.getBuffer(), builder.getSize()); +} +} // namespace + + +TEST_F(KeyStringBuilderTest, RecordIdStrBig1SizeSegment) { + const int pad = 3; // kStringLike CType + StringData terminator + RecordId len + { + const int size = 90; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad); + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + // Max 1-byte encoded string size is 127B: 1B CType + ridStr + string terminator + const int size = 125; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad); + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } +} + +TEST_F(KeyStringBuilderTest, RecordIdStrBig2SizeSegments) { + const int pad = 3; // kStringLike CType + StringData terminator + RecordId len + { + // Min 2-byte encoded string size is 128B: 1B CType + ridStr + string terminator + const int size = 126; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + const int size = 128; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + // Max 2-byte encoded string size is 16383B: 1B CType + ridStr + string terminator + const int size = 16381; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } +} + +TEST_F(KeyStringBuilderTest, RecordIdStrBig3SizeSegments) { + const int pad = 3; // kStringLike CType + StringData terminator + RecordId len + { + // Min 3-byte encoded string size is 16384B: 1B CType + ridStr + string terminator + const int size = 16382; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 2); // 2 bytes with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + // Max 3-byte encoded string size is 2097151B: 1B CType + ridStr + string terminator + const int size = 2097149; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 2); // 2 bytes with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } +} + +TEST_F(KeyStringBuilderTest, RecordIdStrBig4SizeSegments) { + const int pad = 3; // kStringLike CType + StringData terminator + RecordId len + { + // Min 4-byte encoded string size is 2097152B: 1B CType + ridStr + string terminator + const int size = 2097150; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 3); // 3 bytes with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + // Support up to RecordId::kBigStrMaxSize + const int size = RecordId::kBigStrMaxSize - 2 /* CType + string terminator */; + const auto ridStr = std::string(size, 'a'); + auto rid = ridFromStr(ridStr.c_str(), size); + const KeyString::Builder ks(version, rid); + ASSERT_EQ(ks.getSize(), size + pad + 3); // 3 bytes with continuation bit + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } +} + +TEST_F(KeyStringBuilderTest, RecordIdStrBigSizeWithoutRecordIdStr) { + const int pad = 3; // kStringLike CType + StringData terminator + RecordId len + const char str[] = "keyval"; + const int padStr = 3; // kStringLike CType + string terminator + discriminator + { + const int ridStrlen = 90; + const auto ridStr = std::string(ridStrlen, 'a'); + auto rid = ridFromStr(ridStr.c_str(), ridStrlen); + KeyString::Builder ks(version); + ks.appendString(mongo::StringData(str, strlen(str))); + ks.appendRecordId(rid); + ASSERT_EQ(ks.getSize(), strlen(str) + padStr + ridStrlen + pad); + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(strlen(str) + padStr, + KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } + { + const int ridStrlen = 260; + const auto ridStr = std::string(ridStrlen, 'a'); + auto rid = ridFromStr(ridStr.c_str(), ridStrlen); + KeyString::Builder ks(version); + ks.appendString(mongo::StringData(str, strlen(str))); + ks.appendRecordId(rid); + ASSERT_EQ(ks.getSize(), + strlen(str) + padStr + ridStrlen + pad + 1); // 1 0x80 cont byte + ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid); + ASSERT_EQ(strlen(str) + padStr, + KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize())); + } +} + TEST_F(KeyStringBuilderTest, AllPermCompare) { std::vector<BSONObj> elements = getInterestingElements(version); |