SERVER-59199 KeyString: support large RecordId binary strings

author: Josef Ahmad <josef.ahmad@mongodb.com> 2021-09-21 14:32:13 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-09-21 15:35:06 +0000
commit: 5f5f4ede9151d2da43a92145e3b3b6f735a09d43 (patch)
tree: bd123cdb39884820aa68a8a643e2a62286f911b1
parent: ed6372937bb6d10c440f016333ed41ccb67e9139 (diff)
download: mongo-5f5f4ede9151d2da43a92145e3b3b6f735a09d43.tar.gz
7 files changed, 348 insertions, 38 deletions
diff --git a/jstests/core/timeseries/clustered_index_types.js b/jstests/core/timeseries/clustered_index_types.js
index 2186c228fc9..158f516b387 100644
--- a/jstests/core/timeseries/clustered_index_types.js
+++ b/jstests/core/timeseries/clustered_index_types.js
@@ -50,15 +50,30 @@ assert.eq(1, coll.find({a: 8}).itcount());
 assert.commandWorked(coll.insert({_id: null, a: 9}));
 assert.eq(1, coll.find({_id: null}).itcount());
 assert.commandWorked(coll.insert({_id: 'x'.repeat(100), a: 10}));
+assert.commandWorked(coll.insert({}));
 
 assert.commandWorked(coll.createIndex({a: 1}));
 assert.commandWorked(coll.dropIndex({a: 1}));
 
 // This key is too large.
 assert.commandFailedWithCode(coll.insert({_id: 'x'.repeat(8 * 1024 * 1024), a: 11}), 5894900);
-// This large key should prevent the index from being created.
+// Large key but within the upper bound
 assert.commandWorked(coll.insert({_id: 'x'.repeat(3 * 1024 * 1024), a: 12}));
-assert.commandFailedWithCode(coll.createIndex({a: 1}), 5994901);
+// Can build a secondary index with a 3MB RecordId doc.
+assert.commandWorked(coll.createIndex({a: 1}));
+
+// Look up using the secondary index on {a: 1}
+assert.eq(1, coll.find({a: null}).itcount());
+assert.eq(0, coll.find({a: 0}).itcount());
+assert.eq(2, coll.find({a: 1}).itcount());
+assert.eq(1, coll.find({a: 2}).itcount());
+assert.eq(1, coll.find({a: 8}).itcount());
+assert.eq(1, coll.find({a: 9}).itcount());
+assert.eq(null, coll.findOne({a: 9})['_id']);
+assert.eq(1, coll.find({a: 10}).itcount());
+assert.eq(100, coll.findOne({a: 10})['_id'].length);
+assert.eq(1, coll.find({a: 12}).itcount());
+assert.eq(3 * 1024 * 1024, coll.findOne({a: 12})['_id'].length);
 
 // No support for numeric type differentiation.
 assert.commandWorked(coll.insert({_id: 42.0}));
diff --git a/jstests/multiVersion/clustered_collection.js b/jstests/multiVersion/clustered_collection.js
new file mode 100644
index 00000000000..300cf68cc62
--- /dev/null
+++ b/jstests/multiVersion/clustered_collection.js
@@ -0,0 +1,61 @@
+/**
+ * Regression test validating that on 5.0, documents in a clustered collections whose RecordId
+ * exceeds 127 bytes return an error code on access.
+ */
+
+(function() {
+'use strict';
+
+const dbpath = MongoRunner.dataPath + 'clustered_collection';
+resetDbpath(dbpath);
+
+const defaultOptions = {
+    dbpath: dbpath,
+    noCleanData: true
+};
+
+const kCollName = 'system.buckets.clusteredColl';
+
+let mongodOptions5dot0 = Object.extend({binVersion: '5.0'}, defaultOptions);
+let mongodOptions5dot1 = Object.extend({binVersion: '5.1'}, defaultOptions);
+
+// Create a clustered collection in 5.1 and insert a RecordId > 127 bytes
+// Then downgrade to 5.0 and validate the RecordId cannot be decoded as too large
+
+jsTestLog("Starting version: 5.1");
+let conn = MongoRunner.runMongod(mongodOptions5dot1);
+assert.neq(
+    null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot1));
+
+let db = conn.getDB('test');
+assert.commandWorked(db.createCollection(kCollName, {clusteredIndex: true}));
+assert.commandWorked(db[kCollName].createIndex({a: 1}));
+// 126 characters + kStringLike CType + NULL terminator == 128 bytes
+assert.commandWorked(db[kCollName].insertOne({_id: 'x'.repeat(126), a: 1}));
+assert.eq(1, db[kCollName].find({a: 1}).itcount());
+
+assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: "5.0"}));
+MongoRunner.stopMongod(conn);
+
+jsTestLog("Starting version: 5.0");
+conn = MongoRunner.runMongod(mongodOptions5dot0);
+assert.neq(
+    null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot0));
+
+db = conn.getDB('test');
+assert.commandFailedWithCode(db.runCommand({find: kCollName, filter: {a: 1}}), 5577900);
+assert.commandWorked(db[kCollName].insertOne({_id: 'x'.repeat(12), a: 2}));
+
+let stopOptions = {skipValidation: true};
+MongoRunner.stopMongod(conn, 15, stopOptions);
+
+jsTestLog("Starting version: 5.1");
+conn = MongoRunner.runMongod(mongodOptions5dot1);
+assert.neq(
+    null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot1));
+
+db = conn.getDB('test');
+assert.eq(1, db[kCollName].find({a: 1}).itcount());
+assert.eq(1, db[kCollName].find({a: 2}).itcount());
+MongoRunner.stopMongod(conn);
+})();
diff --git a/src/mongo/bson/util/builder.h b/src/mongo/bson/util/builder.h
index e2a7fc8d74c..58a09f3cbc9 100644
--- a/src/mongo/bson/util/builder.h
+++ b/src/mongo/bson/util/builder.h
@@ -410,7 +410,7 @@ public:
     }
 
     /**
-     * Reserve room for some number of bytes to be claimed at a later time.
+     * Reserve room for some number of bytes to be claimed at a later time via claimReservedBytes.
      */
     void reserveBytes(size_t bytes) {
         size_t minSize = l + reservedBytes + bytes;
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index 17c5a90c14d..eb69641f957 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -238,8 +238,6 @@ const uint8_t kEnd = 0x4;
 const uint8_t kLess = 1;
 const uint8_t kGreater = 254;
 
-// The maximum length of a RecordId binary string that may be appended to a KeyString.
-const int8_t kMaxRecordIdStrLen = 127;
 }  // namespace
 
 // some utility functions
@@ -517,23 +515,45 @@ void BuilderBase<BufferT>::_appendRecordIdLong(int64_t val) {
 
 template <class BufferT>
 void BuilderBase<BufferT>::_appendRecordIdStr(const char* str, int size) {
-    // This encoding for RecordId binary strings stores the size at the end. This means that a
-    // RecordId may only be appended at the end of a KeyString. That is, it cannot be appended in
-    // the middle of a KeyString and also be binary-comparable.
-
-    // The current maximum string length is 127. The high bit is reserved for future usage.
-    keyStringAssert(5994901,
-                    fmt::format("cannot generate key for RecordId longer than maximum of {} bytes",
-                                kMaxRecordIdStrLen),
-                    size <= kMaxRecordIdStrLen);
+    // Append the RecordId binary string as-is, then append the encoded binary string size.
+    // The binary string size is encoded in 7-bit increments over one or more size bytes.
+    // The 8th bit of a size byte is a continuation bit that is set on all size bytes except
+    // the leftmost (i.e. the last) one. This allows decoding the size right-to-left until there are
+    // no more size bytes remaining with continuation bits. See decodeRecordIdStrAtEnd for the
+    // decoding algorithm. This 7-bit size encoding ensures backward compatibility with 5.0, which
+    // supports RecordId binary strings up to 127 bytes, or what fits in 7 bits.
+
     invariant(size > 0);
+    invariant(size <= RecordId::kBigStrMaxSize);
 
     const bool invert = false;
 
-    // String is encoded with a single byte for the size at the end.
+    // Encode size
+    uint8_t encodedSize[kRecordIdStrEncodedSizeMaxBytes] = {0};
+    int highestSizeByte = 0;
+    bool highestSizeByteSet = false;
+
+    for (int sizeBytes = kRecordIdStrEncodedSizeMaxBytes - 1; sizeBytes >= 0; sizeBytes--) {
+        encodedSize[sizeBytes] = (size >> (sizeBytes * 7)) & 0x7F;
+        if (encodedSize[sizeBytes] && highestSizeByteSet == false) {
+            highestSizeByteSet = true;
+            highestSizeByte = sizeBytes;
+        }
+    }
+    for (int i = highestSizeByte; i > 0; i--) {
+        encodedSize[i] |= 0x80;
+    }
+
+    const int encodedSizeLen = highestSizeByte + 1;
+
+    // Preallocate room for the RecordId binary string and its encoded size
+    // to reduce the number of potential reallocs
+    _buffer().reserveBytes(size + encodedSizeLen);
+    _buffer().claimReservedBytes(size + encodedSizeLen);
+
+    // Append RecordId and its encoded size
     _appendBytes(str, size, invert);
-    auto encodedSize = static_cast<uint8_t>(size);
-    _append(encodedSize, invert);
+    _appendBytes(encodedSize, encodedSizeLen, invert);
 }
 
 template <class BufferT>
@@ -2506,19 +2526,34 @@ size_t sizeWithoutRecordIdLongAtEnd(const void* bufferRaw, size_t bufSize) {
 }
 
 size_t sizeWithoutRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
+    // See decodeRecordIdStrAtEnd for the size decoding algorithm
     invariant(bufSize > 0);
     const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
 
-    // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
-    // for future usage.
-    uint8_t len = buffer[bufSize - 1];
-    keyStringAssert(5566400,
-                    fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
-                                kMaxRecordIdStrLen,
-                                len),
-                    len <= kMaxRecordIdStrLen);
-    invariant(bufSize > static_cast<size_t>(len + 1));
-    return bufSize - len - 1;
+    // Decode RecordId binary string size
+    size_t ridSize = 0;
+    uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
+
+    // Continuation bytes
+    size_t sizeByteId = 0;
+    for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
+        invariant(bufSize >= sizeByteId + 1 /* non-cont bytes */);
+        invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+        sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
+    }
+    // Last (non-continuation) byte
+    invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+    sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
+
+    const size_t numSegments = sizeByteId + 1;
+
+    for (; sizeByteId > 0; sizeByteId--) {
+        ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+    }
+    ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+
+    invariant(bufSize >= ridSize + numSegments);
+    return bufSize - ridSize - numSegments;
 }
 
 RecordId decodeRecordIdLong(BufReader* reader) {
@@ -2536,20 +2571,39 @@ RecordId decodeRecordIdLong(BufReader* reader) {
 }
 
 RecordId decodeRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
+    // See _appendRecordIdStr for the encoding scheme.
+    // The RecordId binary string size is decoded right-to-left, up to the size byte
+    // without continuation bit.
+
     invariant(bufSize > 0);
     const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
 
-    // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
-    // for future usage.
-    uint8_t len = buffer[bufSize - 1];
-    keyStringAssert(5577900,
-                    fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
-                                kMaxRecordIdStrLen,
-                                len),
-                    len <= kMaxRecordIdStrLen);
-    invariant(bufSize > len);
-    const uint8_t* firstBytePtr = (buffer + bufSize - len - 1);
-    return RecordId(reinterpret_cast<const char*>(firstBytePtr), len);
+    // Decode RecordId binary string size
+    size_t ridSize = 0;
+    uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
+
+    // Continuation bytes
+    size_t sizeByteId = 0;
+    for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
+        invariant(bufSize >= sizeByteId + 1 /* non-cont byte */);
+        invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+        sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
+    }
+    // Last (non-continuation) byte
+    invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+    sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
+
+    const size_t numSegments = sizeByteId + 1;
+
+    for (; sizeByteId > 0; sizeByteId--) {
+        ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+    }
+    ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+
+    invariant(bufSize >= ridSize + numSegments);
+
+    return RecordId(reinterpret_cast<const char*>(buffer) + (bufSize - ridSize - numSegments),
+                    ridSize);
 }
 
 int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize) {
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 2484ac83b1d..87a6b74c742 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -62,6 +62,12 @@ static StringData keyStringVersionToString(Version version) {
 
 static const Ordering ALL_ASCENDING = Ordering::make(BSONObj());
 
+// Encode the size of a RecordId binary string using up to 4 bytes, 7 bits per byte.
+// This supports encoding sizes that fit into 28 bits, which largely covers the
+// maximum BSON size.
+static const int kRecordIdStrEncodedSizeMaxBytes = 4;
+MONGO_STATIC_ASSERT(RecordId::kBigStrMaxSize < 1 << (7 * kRecordIdStrEncodedSizeMaxBytes));
+
 /**
  * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
  * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
diff --git a/src/mongo/db/storage/key_string_bm.cpp b/src/mongo/db/storage/key_string_bm.cpp
index e03e48222e0..263a6030ec5 100644
--- a/src/mongo/db/storage/key_string_bm.cpp
+++ b/src/mongo/db/storage/key_string_bm.cpp
@@ -207,6 +207,26 @@ void BM_KeyStringStackBuilderCopy(benchmark::State& state, BsonValueType bsonTyp
     state.SetItemsProcessed(state.iterations() * kSampleSize);
 }
 
+void BM_KeyStringRecordIdStrAppend(benchmark::State& state, const size_t size) {
+    const auto buf = std::string(size, 'a');
+    auto rid = RecordId(buf.c_str(), size);
+    for (auto _ : state) {
+        benchmark::ClobberMemory();
+        benchmark::DoNotOptimize(KeyString::Builder(KeyString::Version::V1, rid));
+    }
+}
+
+void BM_KeyStringRecordIdStrDecode(benchmark::State& state, const size_t size) {
+    const auto buf = std::string(size, 'a');
+    KeyString::Builder ks(KeyString::Version::V1, RecordId(buf.c_str(), size));
+    auto ksBuf = ks.getBuffer();
+    auto ksSize = ks.getSize();
+    for (auto _ : state) {
+        benchmark::ClobberMemory();
+        benchmark::DoNotOptimize(KeyString::decodeRecordIdStrAtEnd(ksBuf, ksSize));
+    }
+}
+
 BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Int, INT);
 BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Double, DOUBLE);
 BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Decimal, DECIMAL);
@@ -245,5 +265,14 @@ BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_String, KeyString::Version::V1, STRING)
 BENCHMARK_CAPTURE(BM_KeyStringToBSON, V0_Array, KeyString::Version::V0, ARRAY);
 BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_Array, KeyString::Version::V1, ARRAY);
 
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 16B, 16);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 512B, 512);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1kB, 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1MB, 1024 * 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 16B, 16);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 512B, 512);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1kB, 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1MB, 1024 * 1024);
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp
index 9567fa9e128..fc58aadbaf3 100644
--- a/src/mongo/db/storage/key_string_test.cpp
+++ b/src/mongo/db/storage/key_string_test.cpp
@@ -1341,6 +1341,151 @@ TEST_F(KeyStringBuilderTest, RecordIdStr) {
     }
 }
 
+namespace {
+
+RecordId ridFromStr(const char* str, size_t len) {
+    KeyString::Builder builder(KeyString::Version::kLatestVersion);
+    builder.appendString(mongo::StringData(str, len));
+    return RecordId(builder.getBuffer(), builder.getSize());
+}
+}  // namespace
+
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig1SizeSegment) {
+    const int pad = 3;  // kStringLike CType + StringData terminator + RecordId len
+    {
+        const int size = 90;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad);
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        // Max 1-byte encoded string size is 127B: 1B CType + ridStr + string terminator
+        const int size = 125;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad);
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig2SizeSegments) {
+    const int pad = 3;  // kStringLike CType + StringData terminator + RecordId len
+    {
+        // Min 2-byte encoded string size is 128B: 1B CType + ridStr + string terminator
+        const int size = 126;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 1);  // 1 byte with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        const int size = 128;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 1);  // 1 byte with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        // Max 2-byte encoded string size is 16383B: 1B CType + ridStr + string terminator
+        const int size = 16381;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 1);  // 1 byte with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig3SizeSegments) {
+    const int pad = 3;  // kStringLike CType + StringData terminator + RecordId len
+    {
+        // Min 3-byte encoded string size is 16384B: 1B CType + ridStr + string terminator
+        const int size = 16382;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 2);  // 2 bytes with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        // Max 3-byte encoded string size is 2097151B: 1B CType + ridStr + string terminator
+        const int size = 2097149;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 2);  // 2 bytes with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig4SizeSegments) {
+    const int pad = 3;  // kStringLike CType + StringData terminator + RecordId len
+    {
+        // Min 4-byte encoded string size is 2097152B: 1B CType + ridStr + string terminator
+        const int size = 2097150;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 3);  // 3 bytes with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        // Support up to RecordId::kBigStrMaxSize
+        const int size = RecordId::kBigStrMaxSize - 2 /* CType + string terminator */;
+        const auto ridStr = std::string(size, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), size);
+        const KeyString::Builder ks(version, rid);
+        ASSERT_EQ(ks.getSize(), size + pad + 3);  // 3 bytes with continuation bit
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBigSizeWithoutRecordIdStr) {
+    const int pad = 3;  // kStringLike CType + StringData terminator + RecordId len
+    const char str[] = "keyval";
+    const int padStr = 3;  // kStringLike CType + string terminator + discriminator
+    {
+        const int ridStrlen = 90;
+        const auto ridStr = std::string(ridStrlen, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), ridStrlen);
+        KeyString::Builder ks(version);
+        ks.appendString(mongo::StringData(str, strlen(str)));
+        ks.appendRecordId(rid);
+        ASSERT_EQ(ks.getSize(), strlen(str) + padStr + ridStrlen + pad);
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(strlen(str) + padStr,
+                  KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+    {
+        const int ridStrlen = 260;
+        const auto ridStr = std::string(ridStrlen, 'a');
+        auto rid = ridFromStr(ridStr.c_str(), ridStrlen);
+        KeyString::Builder ks(version);
+        ks.appendString(mongo::StringData(str, strlen(str)));
+        ks.appendRecordId(rid);
+        ASSERT_EQ(ks.getSize(),
+                  strlen(str) + padStr + ridStrlen + pad + 1);  // 1 0x80 cont byte
+        ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+        ASSERT_EQ(strlen(str) + padStr,
+                  KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+    }
+}
+
 TEST_F(KeyStringBuilderTest, AllPermCompare) {
     std::vector<BSONObj> elements = getInterestingElements(version);
author	Josef Ahmad <josef.ahmad@mongodb.com>	2021-09-21 14:32:13 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-09-21 15:35:06 +0000
commit	5f5f4ede9151d2da43a92145e3b3b6f735a09d43 (patch)
tree	bd123cdb39884820aa68a8a643e2a62286f911b1
parent	ed6372937bb6d10c440f016333ed41ccb67e9139 (diff)
download	mongo-5f5f4ede9151d2da43a92145e3b3b6f735a09d43.tar.gz