summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosef Ahmad <josef.ahmad@mongodb.com>2021-09-21 14:32:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-09-21 15:35:06 +0000
commit5f5f4ede9151d2da43a92145e3b3b6f735a09d43 (patch)
treebd123cdb39884820aa68a8a643e2a62286f911b1
parented6372937bb6d10c440f016333ed41ccb67e9139 (diff)
downloadmongo-5f5f4ede9151d2da43a92145e3b3b6f735a09d43.tar.gz
SERVER-59199 KeyString: support large RecordId binary strings
-rw-r--r--jstests/core/timeseries/clustered_index_types.js19
-rw-r--r--jstests/multiVersion/clustered_collection.js61
-rw-r--r--src/mongo/bson/util/builder.h2
-rw-r--r--src/mongo/db/storage/key_string.cpp124
-rw-r--r--src/mongo/db/storage/key_string.h6
-rw-r--r--src/mongo/db/storage/key_string_bm.cpp29
-rw-r--r--src/mongo/db/storage/key_string_test.cpp145
7 files changed, 348 insertions, 38 deletions
diff --git a/jstests/core/timeseries/clustered_index_types.js b/jstests/core/timeseries/clustered_index_types.js
index 2186c228fc9..158f516b387 100644
--- a/jstests/core/timeseries/clustered_index_types.js
+++ b/jstests/core/timeseries/clustered_index_types.js
@@ -50,15 +50,30 @@ assert.eq(1, coll.find({a: 8}).itcount());
assert.commandWorked(coll.insert({_id: null, a: 9}));
assert.eq(1, coll.find({_id: null}).itcount());
assert.commandWorked(coll.insert({_id: 'x'.repeat(100), a: 10}));
+assert.commandWorked(coll.insert({}));
assert.commandWorked(coll.createIndex({a: 1}));
assert.commandWorked(coll.dropIndex({a: 1}));
// This key is too large.
assert.commandFailedWithCode(coll.insert({_id: 'x'.repeat(8 * 1024 * 1024), a: 11}), 5894900);
-// This large key should prevent the index from being created.
+// Large key but within the upper bound
assert.commandWorked(coll.insert({_id: 'x'.repeat(3 * 1024 * 1024), a: 12}));
-assert.commandFailedWithCode(coll.createIndex({a: 1}), 5994901);
+// Can build a secondary index with a 3MB RecordId doc.
+assert.commandWorked(coll.createIndex({a: 1}));
+
+// Look up using the secondary index on {a: 1}
+assert.eq(1, coll.find({a: null}).itcount());
+assert.eq(0, coll.find({a: 0}).itcount());
+assert.eq(2, coll.find({a: 1}).itcount());
+assert.eq(1, coll.find({a: 2}).itcount());
+assert.eq(1, coll.find({a: 8}).itcount());
+assert.eq(1, coll.find({a: 9}).itcount());
+assert.eq(null, coll.findOne({a: 9})['_id']);
+assert.eq(1, coll.find({a: 10}).itcount());
+assert.eq(100, coll.findOne({a: 10})['_id'].length);
+assert.eq(1, coll.find({a: 12}).itcount());
+assert.eq(3 * 1024 * 1024, coll.findOne({a: 12})['_id'].length);
// No support for numeric type differentiation.
assert.commandWorked(coll.insert({_id: 42.0}));
diff --git a/jstests/multiVersion/clustered_collection.js b/jstests/multiVersion/clustered_collection.js
new file mode 100644
index 00000000000..300cf68cc62
--- /dev/null
+++ b/jstests/multiVersion/clustered_collection.js
@@ -0,0 +1,61 @@
+/**
+ * Regression test validating that on 5.0, documents in a clustered collections whose RecordId
+ * exceeds 127 bytes return an error code on access.
+ */
+
+(function() {
+'use strict';
+
+const dbpath = MongoRunner.dataPath + 'clustered_collection';
+resetDbpath(dbpath);
+
+const defaultOptions = {
+ dbpath: dbpath,
+ noCleanData: true
+};
+
+const kCollName = 'system.buckets.clusteredColl';
+
+let mongodOptions5dot0 = Object.extend({binVersion: '5.0'}, defaultOptions);
+let mongodOptions5dot1 = Object.extend({binVersion: '5.1'}, defaultOptions);
+
+// Create a clustered collection in 5.1 and insert a RecordId > 127 bytes
+// Then downgrade to 5.0 and validate the RecordId cannot be decoded as too large
+
+jsTestLog("Starting version: 5.1");
+let conn = MongoRunner.runMongod(mongodOptions5dot1);
+assert.neq(
+ null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot1));
+
+let db = conn.getDB('test');
+assert.commandWorked(db.createCollection(kCollName, {clusteredIndex: true}));
+assert.commandWorked(db[kCollName].createIndex({a: 1}));
+// 126 characters + kStringLike CType + NULL terminator == 128 bytes
+assert.commandWorked(db[kCollName].insertOne({_id: 'x'.repeat(126), a: 1}));
+assert.eq(1, db[kCollName].find({a: 1}).itcount());
+
+assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: "5.0"}));
+MongoRunner.stopMongod(conn);
+
+jsTestLog("Starting version: 5.0");
+conn = MongoRunner.runMongod(mongodOptions5dot0);
+assert.neq(
+ null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot0));
+
+db = conn.getDB('test');
+assert.commandFailedWithCode(db.runCommand({find: kCollName, filter: {a: 1}}), 5577900);
+assert.commandWorked(db[kCollName].insertOne({_id: 'x'.repeat(12), a: 2}));
+
+let stopOptions = {skipValidation: true};
+MongoRunner.stopMongod(conn, 15, stopOptions);
+
+jsTestLog("Starting version: 5.1");
+conn = MongoRunner.runMongod(mongodOptions5dot1);
+assert.neq(
+ null, conn, 'mongod was unable able to start with version ' + tojson(mongodOptions5dot1));
+
+db = conn.getDB('test');
+assert.eq(1, db[kCollName].find({a: 1}).itcount());
+assert.eq(1, db[kCollName].find({a: 2}).itcount());
+MongoRunner.stopMongod(conn);
+})();
diff --git a/src/mongo/bson/util/builder.h b/src/mongo/bson/util/builder.h
index e2a7fc8d74c..58a09f3cbc9 100644
--- a/src/mongo/bson/util/builder.h
+++ b/src/mongo/bson/util/builder.h
@@ -410,7 +410,7 @@ public:
}
/**
- * Reserve room for some number of bytes to be claimed at a later time.
+ * Reserve room for some number of bytes to be claimed at a later time via claimReservedBytes.
*/
void reserveBytes(size_t bytes) {
size_t minSize = l + reservedBytes + bytes;
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index 17c5a90c14d..eb69641f957 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -238,8 +238,6 @@ const uint8_t kEnd = 0x4;
const uint8_t kLess = 1;
const uint8_t kGreater = 254;
-// The maximum length of a RecordId binary string that may be appended to a KeyString.
-const int8_t kMaxRecordIdStrLen = 127;
} // namespace
// some utility functions
@@ -517,23 +515,45 @@ void BuilderBase<BufferT>::_appendRecordIdLong(int64_t val) {
template <class BufferT>
void BuilderBase<BufferT>::_appendRecordIdStr(const char* str, int size) {
- // This encoding for RecordId binary strings stores the size at the end. This means that a
- // RecordId may only be appended at the end of a KeyString. That is, it cannot be appended in
- // the middle of a KeyString and also be binary-comparable.
-
- // The current maximum string length is 127. The high bit is reserved for future usage.
- keyStringAssert(5994901,
- fmt::format("cannot generate key for RecordId longer than maximum of {} bytes",
- kMaxRecordIdStrLen),
- size <= kMaxRecordIdStrLen);
+ // Append the RecordId binary string as-is, then append the encoded binary string size.
+ // The binary string size is encoded in 7-bit increments over one or more size bytes.
+ // The 8th bit of a size byte is a continuation bit that is set on all size bytes except
+ // the leftmost (i.e. the last) one. This allows decoding the size right-to-left until there are
+ // no more size bytes remaining with continuation bits. See decodeRecordIdStrAtEnd for the
+ // decoding algorithm. This 7-bit size encoding ensures backward compatibility with 5.0, which
+ // supports RecordId binary strings up to 127 bytes, or what fits in 7 bits.
+
invariant(size > 0);
+ invariant(size <= RecordId::kBigStrMaxSize);
const bool invert = false;
- // String is encoded with a single byte for the size at the end.
+ // Encode size
+ uint8_t encodedSize[kRecordIdStrEncodedSizeMaxBytes] = {0};
+ int highestSizeByte = 0;
+ bool highestSizeByteSet = false;
+
+ for (int sizeBytes = kRecordIdStrEncodedSizeMaxBytes - 1; sizeBytes >= 0; sizeBytes--) {
+ encodedSize[sizeBytes] = (size >> (sizeBytes * 7)) & 0x7F;
+ if (encodedSize[sizeBytes] && highestSizeByteSet == false) {
+ highestSizeByteSet = true;
+ highestSizeByte = sizeBytes;
+ }
+ }
+ for (int i = highestSizeByte; i > 0; i--) {
+ encodedSize[i] |= 0x80;
+ }
+
+ const int encodedSizeLen = highestSizeByte + 1;
+
+ // Preallocate room for the RecordId binary string and its encoded size
+ // to reduce the number of potential reallocs
+ _buffer().reserveBytes(size + encodedSizeLen);
+ _buffer().claimReservedBytes(size + encodedSizeLen);
+
+ // Append RecordId and its encoded size
_appendBytes(str, size, invert);
- auto encodedSize = static_cast<uint8_t>(size);
- _append(encodedSize, invert);
+ _appendBytes(encodedSize, encodedSizeLen, invert);
}
template <class BufferT>
@@ -2506,19 +2526,34 @@ size_t sizeWithoutRecordIdLongAtEnd(const void* bufferRaw, size_t bufSize) {
}
size_t sizeWithoutRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
+ // See decodeRecordIdStrAtEnd for the size decoding algorithm
invariant(bufSize > 0);
const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
- // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
- // for future usage.
- uint8_t len = buffer[bufSize - 1];
- keyStringAssert(5566400,
- fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
- kMaxRecordIdStrLen,
- len),
- len <= kMaxRecordIdStrLen);
- invariant(bufSize > static_cast<size_t>(len + 1));
- return bufSize - len - 1;
+ // Decode RecordId binary string size
+ size_t ridSize = 0;
+ uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
+
+ // Continuation bytes
+ size_t sizeByteId = 0;
+ for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
+ invariant(bufSize >= sizeByteId + 1 /* non-cont bytes */);
+ invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+ sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
+ }
+ // Last (non-continuation) byte
+ invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+ sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
+
+ const size_t numSegments = sizeByteId + 1;
+
+ for (; sizeByteId > 0; sizeByteId--) {
+ ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+ }
+ ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+
+ invariant(bufSize >= ridSize + numSegments);
+ return bufSize - ridSize - numSegments;
}
RecordId decodeRecordIdLong(BufReader* reader) {
@@ -2536,20 +2571,39 @@ RecordId decodeRecordIdLong(BufReader* reader) {
}
RecordId decodeRecordIdStrAtEnd(const void* bufferRaw, size_t bufSize) {
+ // See _appendRecordIdStr for the encoding scheme.
+ // The RecordId binary string size is decoded right-to-left, up to the size byte
+ // without continuation bit.
+
invariant(bufSize > 0);
const uint8_t* buffer = static_cast<const uint8_t*>(bufferRaw);
- // The current encoding for strings supports strings up to 128 bytes. The high bit is reserved
- // for future usage.
- uint8_t len = buffer[bufSize - 1];
- keyStringAssert(5577900,
- fmt::format("Cannot decode record id string longer than {} bytes; size is {}",
- kMaxRecordIdStrLen,
- len),
- len <= kMaxRecordIdStrLen);
- invariant(bufSize > len);
- const uint8_t* firstBytePtr = (buffer + bufSize - len - 1);
- return RecordId(reinterpret_cast<const char*>(firstBytePtr), len);
+ // Decode RecordId binary string size
+ size_t ridSize = 0;
+ uint8_t sizes[kRecordIdStrEncodedSizeMaxBytes] = {0};
+
+ // Continuation bytes
+ size_t sizeByteId = 0;
+ for (; buffer[bufSize - 1 - sizeByteId] & 0x80; sizeByteId++) {
+ invariant(bufSize >= sizeByteId + 1 /* non-cont byte */);
+ invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+ sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId] & 0x7F;
+ }
+ // Last (non-continuation) byte
+ invariant(sizeByteId < kRecordIdStrEncodedSizeMaxBytes);
+ sizes[sizeByteId] = buffer[bufSize - 1 - sizeByteId];
+
+ const size_t numSegments = sizeByteId + 1;
+
+ for (; sizeByteId > 0; sizeByteId--) {
+ ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+ }
+ ridSize += sizes[sizeByteId] << ((numSegments - sizeByteId - 1) * 7);
+
+ invariant(bufSize >= ridSize + numSegments);
+
+ return RecordId(reinterpret_cast<const char*>(buffer) + (bufSize - ridSize - numSegments),
+ ridSize);
}
int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize) {
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 2484ac83b1d..87a6b74c742 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -62,6 +62,12 @@ static StringData keyStringVersionToString(Version version) {
static const Ordering ALL_ASCENDING = Ordering::make(BSONObj());
+// Encode the size of a RecordId binary string using up to 4 bytes, 7 bits per byte.
+// This supports encoding sizes that fit into 28 bits, which largely covers the
+// maximum BSON size.
+static const int kRecordIdStrEncodedSizeMaxBytes = 4;
+MONGO_STATIC_ASSERT(RecordId::kBigStrMaxSize < 1 << (7 * kRecordIdStrEncodedSizeMaxBytes));
+
/**
* Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
* stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
diff --git a/src/mongo/db/storage/key_string_bm.cpp b/src/mongo/db/storage/key_string_bm.cpp
index e03e48222e0..263a6030ec5 100644
--- a/src/mongo/db/storage/key_string_bm.cpp
+++ b/src/mongo/db/storage/key_string_bm.cpp
@@ -207,6 +207,26 @@ void BM_KeyStringStackBuilderCopy(benchmark::State& state, BsonValueType bsonTyp
state.SetItemsProcessed(state.iterations() * kSampleSize);
}
+void BM_KeyStringRecordIdStrAppend(benchmark::State& state, const size_t size) {
+ const auto buf = std::string(size, 'a');
+ auto rid = RecordId(buf.c_str(), size);
+ for (auto _ : state) {
+ benchmark::ClobberMemory();
+ benchmark::DoNotOptimize(KeyString::Builder(KeyString::Version::V1, rid));
+ }
+}
+
+void BM_KeyStringRecordIdStrDecode(benchmark::State& state, const size_t size) {
+ const auto buf = std::string(size, 'a');
+ KeyString::Builder ks(KeyString::Version::V1, RecordId(buf.c_str(), size));
+ auto ksBuf = ks.getBuffer();
+ auto ksSize = ks.getSize();
+ for (auto _ : state) {
+ benchmark::ClobberMemory();
+ benchmark::DoNotOptimize(KeyString::decodeRecordIdStrAtEnd(ksBuf, ksSize));
+ }
+}
+
BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Int, INT);
BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Double, DOUBLE);
BENCHMARK_CAPTURE(BM_KeyStringValueAssign, Decimal, DECIMAL);
@@ -245,5 +265,14 @@ BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_String, KeyString::Version::V1, STRING)
BENCHMARK_CAPTURE(BM_KeyStringToBSON, V0_Array, KeyString::Version::V0, ARRAY);
BENCHMARK_CAPTURE(BM_KeyStringToBSON, V1_Array, KeyString::Version::V1, ARRAY);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 16B, 16);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 512B, 512);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1kB, 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrAppend, 1MB, 1024 * 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 16B, 16);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 512B, 512);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1kB, 1024);
+BENCHMARK_CAPTURE(BM_KeyStringRecordIdStrDecode, 1MB, 1024 * 1024);
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp
index 9567fa9e128..fc58aadbaf3 100644
--- a/src/mongo/db/storage/key_string_test.cpp
+++ b/src/mongo/db/storage/key_string_test.cpp
@@ -1341,6 +1341,151 @@ TEST_F(KeyStringBuilderTest, RecordIdStr) {
}
}
+namespace {
+
+RecordId ridFromStr(const char* str, size_t len) {
+ KeyString::Builder builder(KeyString::Version::kLatestVersion);
+ builder.appendString(mongo::StringData(str, len));
+ return RecordId(builder.getBuffer(), builder.getSize());
+}
+} // namespace
+
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig1SizeSegment) {
+ const int pad = 3; // kStringLike CType + StringData terminator + RecordId len
+ {
+ const int size = 90;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad);
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ // Max 1-byte encoded string size is 127B: 1B CType + ridStr + string terminator
+ const int size = 125;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad);
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig2SizeSegments) {
+ const int pad = 3; // kStringLike CType + StringData terminator + RecordId len
+ {
+ // Min 2-byte encoded string size is 128B: 1B CType + ridStr + string terminator
+ const int size = 126;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ const int size = 128;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ // Max 2-byte encoded string size is 16383B: 1B CType + ridStr + string terminator
+ const int size = 16381;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 1); // 1 byte with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig3SizeSegments) {
+ const int pad = 3; // kStringLike CType + StringData terminator + RecordId len
+ {
+ // Min 3-byte encoded string size is 16384B: 1B CType + ridStr + string terminator
+ const int size = 16382;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 2); // 2 bytes with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ // Max 3-byte encoded string size is 2097151B: 1B CType + ridStr + string terminator
+ const int size = 2097149;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 2); // 2 bytes with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBig4SizeSegments) {
+ const int pad = 3; // kStringLike CType + StringData terminator + RecordId len
+ {
+ // Min 4-byte encoded string size is 2097152B: 1B CType + ridStr + string terminator
+ const int size = 2097150;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 3); // 3 bytes with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ // Support up to RecordId::kBigStrMaxSize
+ const int size = RecordId::kBigStrMaxSize - 2 /* CType + string terminator */;
+ const auto ridStr = std::string(size, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), size);
+ const KeyString::Builder ks(version, rid);
+ ASSERT_EQ(ks.getSize(), size + pad + 3); // 3 bytes with continuation bit
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(0, KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+}
+
+TEST_F(KeyStringBuilderTest, RecordIdStrBigSizeWithoutRecordIdStr) {
+ const int pad = 3; // kStringLike CType + StringData terminator + RecordId len
+ const char str[] = "keyval";
+ const int padStr = 3; // kStringLike CType + string terminator + discriminator
+ {
+ const int ridStrlen = 90;
+ const auto ridStr = std::string(ridStrlen, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), ridStrlen);
+ KeyString::Builder ks(version);
+ ks.appendString(mongo::StringData(str, strlen(str)));
+ ks.appendRecordId(rid);
+ ASSERT_EQ(ks.getSize(), strlen(str) + padStr + ridStrlen + pad);
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(strlen(str) + padStr,
+ KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+ {
+ const int ridStrlen = 260;
+ const auto ridStr = std::string(ridStrlen, 'a');
+ auto rid = ridFromStr(ridStr.c_str(), ridStrlen);
+ KeyString::Builder ks(version);
+ ks.appendString(mongo::StringData(str, strlen(str)));
+ ks.appendRecordId(rid);
+ ASSERT_EQ(ks.getSize(),
+ strlen(str) + padStr + ridStrlen + pad + 1); // 1 0x80 cont byte
+ ASSERT_EQ(KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()), rid);
+ ASSERT_EQ(strlen(str) + padStr,
+ KeyString::sizeWithoutRecordIdStrAtEnd(ks.getBuffer(), ks.getSize()));
+ }
+}
+
TEST_F(KeyStringBuilderTest, AllPermCompare) {
std::vector<BSONObj> elements = getInterestingElements(version);