diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/storage/key_string.cpp | 84 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.h | 110 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string_test.cpp | 204 |
3 files changed, 312 insertions, 86 deletions
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index 085d439e5e5..dacf96a2564 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -37,6 +37,7 @@ #include <cmath> #include <type_traits> +#include "mongo/base/data_cursor.h" #include "mongo/base/data_view.h" #include "mongo/platform/bits.h" #include "mongo/platform/strnlen.h" @@ -2135,35 +2136,62 @@ int KeyString::compare(const KeyString& other) const { return a < b ? -1 : 1; } -void KeyString::TypeBits::resetFromBuffer(BufReader* reader) { - if (!reader->remaining()) { - // This means AllZeros state was encoded as an empty buffer. - reset(); - return; +uint32_t KeyString::TypeBits::readSizeFromBuffer(BufReader* reader) { + const uint8_t firstByte = reader->peek<uint8_t>(); + + // Case 2: all bits in one byte; no size byte. + if (firstByte > 0 && firstByte < 0x80) { + return 1; } - const uint8_t firstByte = readType<uint8_t>(reader, false); - if (firstByte & 0x80) { - // firstByte is the size byte. - _isAllZeros = false; // it wouldn't be encoded like this if it was. + // Skip the indicator byte. + reader->skip(1); - _buf[0] = firstByte; - const uint8_t remainingBytes = getSizeByte(); - memcpy(_buf + 1, reader->skip(remainingBytes), remainingBytes); - return; + // Case 3: <= 127 bytes; use one size byte. + if (firstByte > 0x80) { + return firstByte & 0x7f; } - // In remaining cases, firstByte is the only byte. + // Case 4: > 127 bytes; needs 4 size bytes. + if (firstByte == 0x80) { + // The next 4 bytes represent the size in little endian order. + uint32_t s = reader->read<LittleEndian<uint32_t>>(); + uassert(50910, "Invalid overlong encoding.", s > kMaxBytesForShortEncoding); + return s; + } - if (firstByte == 0) { - // This means AllZeros state was encoded as a single 0 byte. - reset(); - return; + // Case 1: all zeros. + dassert(firstByte == 0); + return 0; +} + +void KeyString::TypeBits::setRawSize(uint32_t size) { + // Grow the data buffer if needed. + if (size > getDataBufferLen()) { + _buf.grow(size - getDataBufferLen()); } - _isAllZeros = false; - setSizeByte(1); - _buf[1] = firstByte; + if (size > kMaxBytesForShortEncoding) { + DataCursor(_buf.buf()) + .writeAndAdvance<uint8_t>(0x80) + .writeAndAdvance<LittleEndian<uint32_t>>(size); + } else { + DataView(getDataBuffer() - 1).write<uint8_t>(0x80 | size); + } +} + +void KeyString::TypeBits::resetFromBuffer(BufReader* reader) { + reset(); + + if (!reader->remaining()) + // This means AllZeros state was encoded as an empty buffer. + return; + + uint32_t size = readSizeFromBuffer(reader); + if (size > 0) + _isAllZeros = false; + setRawSize(size); + memcpy(getDataBuffer(), reader->skip(size), size); } void KeyString::TypeBits::appendBit(uint8_t oneOrZero) { @@ -2172,13 +2200,13 @@ void KeyString::TypeBits::appendBit(uint8_t oneOrZero) { if (oneOrZero == 1) _isAllZeros = false; - const uint8_t byte = (_curBit / 8) + 1; + const uint32_t byte = _curBit / 8; const uint8_t offsetInByte = _curBit % 8; if (offsetInByte == 0) { - setSizeByte(byte); - _buf[byte] = oneOrZero; // zeros bits 1-7 + setRawSize(byte + 1); + getDataBuffer()[byte] = oneOrZero; // zeros bits 1-7 } else { - _buf[byte] |= (oneOrZero << offsetInByte); + getDataBuffer()[byte] |= (oneOrZero << offsetInByte); } _curBit++; @@ -2234,13 +2262,13 @@ uint8_t KeyString::TypeBits::Reader::readBit() { if (_typeBits._isAllZeros) return 0; - const uint8_t byte = (_curBit / 8) + 1; + const uint32_t byte = _curBit / 8; const uint8_t offsetInByte = _curBit % 8; _curBit++; - uassert(50615, "Invalid size byte.", byte <= _typeBits.getSizeByte()); + uassert(50615, "Invalid size byte(s).", byte < _typeBits.getDataBufferLen()); - return (_typeBits._buf[byte] & (1 << offsetInByte)) ? 1 : 0; + return (_typeBits.getDataBuffer()[byte] & (1 << offsetInByte)) ? 1 : 0; } uint8_t KeyString::TypeBits::Reader::readZero() { diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 31be4a833c6..7c30dcffb28 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -66,17 +66,11 @@ public: */ class TypeBits { public: - // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB. - // The encoding format will need to change if we raise this limit. - static const uint8_t kMaxBytesNeeded = 127; + // See comments in getBuffer() about short/long encoding schemes. + static const uint8_t kMaxBytesForShortEncoding = 127; + static const uint8_t kPrefixBytes = 5; + // TODO SERVER-36385: Remove this 1KB limit. static const uint32_t kMaxKeyBytes = 1024; - static const uint32_t kMaxTypeBitsPerDecimal = 17; - static const uint32_t kBytesForTypeAndEmptyKey = 2; - static const uint32_t kMaxDecimalsPerKey = - kMaxKeyBytes / (sizeof(Decimal128::Value) + kBytesForTypeAndEmptyKey); - MONGO_STATIC_ASSERT_MSG( - kMaxTypeBitsPerDecimal* kMaxDecimalsPerKey < kMaxBytesNeeded * 8UL, - "encoding needs change to contain all type bits for worst case key"); static const uint8_t kStoredDecimalExponentBits = 6; static const uint32_t kStoredDecimalExponentMask = (1U << kStoredDecimalExponentBits) - 1; @@ -84,6 +78,15 @@ public: reset(); } + TypeBits(const TypeBits& tb) + : version(tb.version), _curBit(tb._curBit), _isAllZeros(tb._isAllZeros) { + _buf.reset(); + _buf.appendBuf(tb._buf.buf(), tb._buf.len()); + } + TypeBits& operator=(const TypeBits& tb) = delete; + TypeBits(TypeBits&&) = default; + TypeBits& operator=(TypeBits&&) = delete; + /** * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the * BufReader in the format described on the getBuffer() method. @@ -107,40 +110,59 @@ public: * instance. * * Encoded format: - * Case 1 (first byte has high bit set to 1): - * Remaining bits of first byte encode number of follow-up bytes that are data - * bytes. Note that _buf is always maintained in this format but these methods may - * return one of the other formats, if possible, by skipping over the first byte. - * - * Case 2 (first byte is 0x0): + * Case 1 (first byte is 0x0): * This encodes the "AllZeros" state which represents an infinite stream of bits set * to 0. Callers may optionally encode this case as an empty buffer if they have * another way to mark the end of the buffer. There are no follow-up bytes. * - * Case 3 (first byte isn't 0x0 but has high bit set to 0): + * Case 2 (first byte isn't 0x0 but has high bit set to 0): * The first byte is the only data byte. This can represent any 7-bit sequence or an * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that * is 1 if the first byte is the size byte. There are no follow-up bytes. * + * Case 3 (first byte has high bit set to 1 but it's not 0x80): + * Remaining bits of first byte encode number of follow-up bytes that are data + * bytes. + * + * Case 4 (first byte is 0x80) + * The first byte is the signal byte indicating that this TypeBits is encoded with long + * encoding scheme: the next four bytes (in little endian order) represent the number of + * data bytes. + * * Within data bytes (ie everything excluding the size byte if there is one), bits are * packed in from low to high. */ - const uint8_t* getBuffer() const { - return getSize() == 1 ? _buf + 1 : _buf; + const char* getBuffer() const { + if (_isAllZeros) + return ""; // Case 1: pointer to a zero byte. + + if (getSize() == 1) + return getDataBuffer(); // Case 2: all bits in one byte; no size byte. + + // Case 3 & 4: size byte(s) + data bytes. + return isLongEncoding() ? _buf.buf() : (getDataBuffer() - 1); } size_t getSize() const { - if (_isAllZeros) { // Case 2 - dassert(_buf[1] == 0); + if (_isAllZeros) { // Case 1 + dassert(getDataBufferLen() == 0 || getDataBuffer()[0] == 0); return 1; } - uint8_t rawSize = getSizeByte(); - dassert(rawSize >= 1); // 0 should be handled as isAllZeros. - if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3 + uint32_t rawSize = getDataBufferLen(); + dassert(rawSize >= 1); // 0 should be handled as isAllZeros. + if (rawSize > kMaxBytesForShortEncoding) { // Case 4 + return rawSize + kPrefixBytes; + } + if (rawSize == 1 && !(getDataBuffer()[0] & 0x80)) { // Case 2 return 1; } - return rawSize + 1; // Case 1 + return rawSize + 1; // Case 3 + } + + bool isLongEncoding() const { + // TypeBits with all zeros is in short encoding regardless of the data buffer length. + return !_isAllZeros && getDataBufferLen() > kMaxBytesForShortEncoding; } // @@ -176,8 +198,7 @@ public: void reset() { _curBit = 0; _isAllZeros = true; - setSizeByte(0); - _buf[1] = 0; + _buf.setlen(kPrefixBytes); } void appendString() { @@ -240,29 +261,32 @@ public: const Version version; private: - /** - * size only includes data bytes, not the size byte itself. - */ - uint8_t getSizeByte() const { - return _buf[0] & 0x7f; + static uint32_t readSizeFromBuffer(BufReader* reader); + + void setRawSize(uint32_t size); + + const char* getDataBuffer() const { + return _buf.buf() + kPrefixBytes; + } + char* getDataBuffer() { + return _buf.buf() + kPrefixBytes; } - void setSizeByte(uint8_t size) { - // This error can only occur in cases where the key is not only too long, but also - // has too many fields requiring type bits. - uassert(ErrorCodes::KeyTooLong, "The key is too long", size < kMaxBytesNeeded); - _buf[0] = 0x80 | size; + uint32_t getDataBufferLen() const { + return _buf.len() - kPrefixBytes; } void appendBit(uint8_t oneOrZero); size_t _curBit; bool _isAllZeros; - - // See getBuffer()/getSize() documentation for a description of how data is encoded. - // Currently whole buffer is copied in default copy methods. If they ever show up as hot - // in profiling, we should add copy operations that only copy the parts of _buf that are - // in use. - uint8_t _buf[1 /*size*/ + kMaxBytesNeeded]; + /** + * See getBuffer()/getSize() documentation for a description of how data is encoded. When + * the TypeBits size is in short encoding range(<=127), the bytes starting from the fifth + * byte are the complete TypeBits in short encoding scheme (1 size byte + data bytes). When + * the TypeBits size is in long encoding range(>127), all the bytes are used for the long + * encoding format (first byte + 4 size bytes + data bytes). + */ + StackBufBuilder _buf; }; enum Discriminator { diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp index 9e500879ba5..423f72ea6f0 100644 --- a/src/mongo/db/storage/key_string_test.cpp +++ b/src/mongo/db/storage/key_string_test.cpp @@ -96,6 +96,112 @@ protected: KeyString::Version version; }; +template <typename T> +void checkSizeWhileAppendingTypeBits(int numOfBitsUsedForType, T&& appendBitsFunc) { + KeyString::TypeBits typeBits(KeyString::Version::V1); + const int kItems = 10000; // Pick an arbitrary large number. + for (int i = 0; i < kItems; i++) { + appendBitsFunc(typeBits); + size_t currentRawSize = ((i + 1) * numOfBitsUsedForType - 1) / 8 + 1; + size_t currentSize = currentRawSize; + if (currentRawSize > KeyString::TypeBits::kMaxBytesForShortEncoding) { + // Case 4: plus 1 signal byte + 4 size bytes. + currentSize += 5; + ASSERT(typeBits.isLongEncoding()); + } else { + ASSERT(!typeBits.isLongEncoding()); + if (currentRawSize == 1 && !(typeBits.getBuffer()[0] & 0x80)) { // Case 2 + currentSize = 1; + } else { + // Case 3: plus 1 size byte. + currentSize += 1; + } + } + ASSERT_EQ(typeBits.getSize(), currentSize); + } +} + +TEST(TypeBitsTest, AppendSymbol) { + checkSizeWhileAppendingTypeBits( + 1, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendSymbol(); }); +} +TEST(TypeBitsTest, AppendString) { + // The typeBits should be all zeros, so numOfBitsUsedForType is set to 0 for + // passing the test although it technically uses 1 bit. + checkSizeWhileAppendingTypeBits( + 0, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendString(); }); +} +TEST(typebitstest, appendDouble) { + checkSizeWhileAppendingTypeBits( + 2, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendNumberDouble(); }); +} +TEST(TypeBitsTest, AppendNumberLong) { + checkSizeWhileAppendingTypeBits( + 2, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendNumberLong(); }); +} +TEST(TypeBitsTest, AppendNumberInt) { + // The typeBits should be all zeros, so numOfBitsUsedForType is set to 0 for + // passing the test although it technically uses 2 bits. + checkSizeWhileAppendingTypeBits( + 0, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendNumberInt(); }); +} +TEST(TypeBitsTest, AppendNumberDecimal) { + checkSizeWhileAppendingTypeBits( + 2, [](KeyString::TypeBits& typeBits) -> void { typeBits.appendNumberDecimal(); }); +} +TEST(TypeBitsTest, AppendLongZero) { + checkSizeWhileAppendingTypeBits(2, [](KeyString::TypeBits& typeBits) -> void { + typeBits.appendZero(KeyString::TypeBits::kLong); + }); +} +TEST(TypeBitsTest, AppendDecimalZero) { + checkSizeWhileAppendingTypeBits(12 + 5, [](KeyString::TypeBits& typeBits) -> void { + typeBits.appendDecimalZero(KeyString::TypeBits::kDecimalZero1xxx); + }); +} +TEST(TypeBitsTest, AppendDecimalExponent) { + checkSizeWhileAppendingTypeBits( + KeyString::TypeBits::kStoredDecimalExponentBits, + [](KeyString::TypeBits& typeBits) -> void { typeBits.appendDecimalExponent(1); }); +} + +TEST(TypeBitsTest, UninitializedTypeBits) { + KeyString::TypeBits typeBits(KeyString::Version::V1); + ASSERT_EQ(typeBits.getSize(), 1u); + ASSERT_EQ(typeBits.getBuffer()[0], 0); + ASSERT(typeBits.isAllZeros()); +} + +TEST(TypeBitsTest, AllZerosTypeBits) { + { + std::string emptyBuffer = ""; + BufReader reader(emptyBuffer.c_str(), 0); + KeyString::TypeBits typeBits = + KeyString::TypeBits::fromBuffer(KeyString::Version::V1, &reader); + ASSERT_EQ(typeBits.getSize(), 1u); + ASSERT_EQ(typeBits.getBuffer()[0], 0); + ASSERT(typeBits.isAllZeros()); + } + + { + std::string allZerosBuffer = "0000000000000000"; + BufReader reader(allZerosBuffer.c_str(), 0); + KeyString::TypeBits typeBits = + KeyString::TypeBits::fromBuffer(KeyString::Version::V1, &reader); + ASSERT_EQ(typeBits.getSize(), 1u); + ASSERT_EQ(typeBits.getBuffer()[0], 0); + ASSERT(typeBits.isAllZeros()); + } +} + +TEST(TypeBitsTest, AppendLotsOfZeroTypeBits) { + KeyString::TypeBits typeBits(KeyString::Version::V1); + for (int i = 0; i < 100000; i++) { + typeBits.appendString(); + } + // TypeBits should still be in short encoding format. + ASSERT(!typeBits.isLongEncoding()); +} TEST_F(KeyStringTest, Simple1) { BSONObj a = BSON("" << 5); @@ -1171,21 +1277,80 @@ TEST_F(KeyStringTest, RecordIds) { } } -TEST_F(KeyStringTest, KeyWithTooManyTypeBitsCausesUassert) { +TEST_F(KeyStringTest, KeyWithLotsOfTypeBits) { BSONObj obj; { BSONObjBuilder builder; { + int kArrSize = 54321; BSONArrayBuilder array(builder.subarrayStart("x")); auto zero = BSON("" << 0.0); - for (int i = 0; i < 1016; i++) + for (int i = 0; i < kArrSize; i++) array.append(zero.firstElement()); } - obj = builder.obj(); + obj = BSON("" << builder.obj()); + } + ROUNDTRIP(version, obj); +} + +BSONObj buildKeyWhichWillHaveNByteOfTypeBits(size_t n, bool allZeros) { + int numItems = n * 8 / 2 /* kInt/kDouble needs two bits */; + + BSONObj obj; + BSONArrayBuilder array; + for (int i = 0; i < numItems; i++) + if (allZeros) + array.append(123); /* kInt uses 00 */ + else + array.append(1.2); /* kDouble uses 10 */ + + obj = BSON("" << array.arr()); + return obj; +} + +void checkKeyWithNByteOfTypeBits(KeyString::Version version, size_t n, bool allZeros) { + const BSONObj orig = buildKeyWhichWillHaveNByteOfTypeBits(n, allZeros); + const KeyString ks(version, orig, ALL_ASCENDING); + const size_t typeBitsSize = ks.getTypeBits().getSize(); + if (n == 1 || allZeros) { + // Case 1&2 + // Case 2: Since we use kDouble, TypeBits="01010101" when n=1. The size + // is thus 1. + ASSERT_EQ(1u, typeBitsSize); + } else if (n <= 127) { + // Case 3 + ASSERT_EQ(n + 1, typeBitsSize); + } else { + // Case 4 + ASSERT_EQ(n + 5, typeBitsSize); } - KeyString key(version); - ASSERT_THROWS_CODE(key.resetToKey(obj, ONE_ASCENDING), DBException, ErrorCodes::KeyTooLong); + const BSONObj converted = toBsonAndCheckKeySize(ks, ALL_ASCENDING); + ASSERT_BSONOBJ_EQ(converted, orig); + ASSERT(converted.binaryEqual(orig)); + + // Also test TypeBits::fromBuffer() + BufReader bufReader(ks.getTypeBits().getBuffer(), typeBitsSize); + KeyString::TypeBits newTypeBits = KeyString::TypeBits::fromBuffer(version, &bufReader); + ASSERT_EQ(toHex(newTypeBits.getBuffer(), newTypeBits.getSize()), + toHex(ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize())); +} + +TEST_F(KeyStringTest, KeysWithNBytesTypeBits) { + checkKeyWithNByteOfTypeBits(version, 0, false); + checkKeyWithNByteOfTypeBits(version, 1, false); + checkKeyWithNByteOfTypeBits(version, 1, true); + checkKeyWithNByteOfTypeBits(version, 127, false); + checkKeyWithNByteOfTypeBits(version, 127, true); + checkKeyWithNByteOfTypeBits(version, 128, false); + checkKeyWithNByteOfTypeBits(version, 128, true); + checkKeyWithNByteOfTypeBits(version, 129, false); + checkKeyWithNByteOfTypeBits(version, 129, true); +} + +TEST_F(KeyStringTest, VeryLargeString) { + BSONObj obj = BSON("" << std::string(123456, 'x')); + ROUNDTRIP(version, obj); } TEST_F(KeyStringTest, ToBsonSafeShouldNotTerminate) { @@ -1266,29 +1431,37 @@ TEST_F(KeyStringTest, InvalidDecimalContinuation) { TEST_F(KeyStringTest, RandomizedInputsForToBsonSafe) { std::mt19937 gen(newSeed()); - std::uniform_int_distribution<> randomByte(std::numeric_limits<unsigned char>::min(), - std::numeric_limits<unsigned char>::max()); + std::uniform_int_distribution<uint32_t> randomNum(std::numeric_limits<uint32_t>::min(), + std::numeric_limits<uint32_t>::max()); + std::uniform_int_distribution<uint8_t> randomByte(std::numeric_limits<uint8_t>::min(), + std::numeric_limits<uint8_t>::max()); const auto interestingElements = getInterestingElements(KeyString::Version::V1); for (auto elem : interestingElements) { const KeyString ks(KeyString::Version::V1, elem, ALL_ASCENDING); - char* ksBuffer = (char*)ks.getBuffer(); - char* typeBits = (char*)ks.getTypeBits().getBuffer(); + auto ksBuffer = SharedBuffer::allocate(ks.getSize()); + memcpy(ksBuffer.get(), ks.getBuffer(), ks.getSize()); + auto tbBuffer = SharedBuffer::allocate(ks.getTypeBits().getSize()); + memcpy(tbBuffer.get(), ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize()); // Select a random byte to change, except for the first byte as it will likely become an // invalid CType and not test anything interesting. - auto offset = randomByte(gen); + auto offset = randomNum(gen) % (ks.getSize() - 1); auto newValue = randomByte(gen); - ksBuffer[offset % ks.getSize() + 1] = newValue; + ksBuffer.get()[offset + 1] = newValue; // Ditto for the type bits buffer. - offset = randomByte(gen); + offset = randomNum(gen) % ks.getTypeBits().getSize(); newValue = randomByte(gen); - typeBits[offset % ks.getTypeBits().getSize() + 1] = newValue; + tbBuffer.get()[offset] = newValue; + + // Build the new TypeBits. + BufReader reader(tbBuffer.get(), ks.getTypeBits().getSize()); try { - KeyString::toBsonSafe(ksBuffer, ks.getSize(), ALL_ASCENDING, ks.getTypeBits()); + auto newTypeBits = KeyString::TypeBits::fromBuffer(KeyString::Version::V1, &reader); + KeyString::toBsonSafe(ksBuffer.get(), ks.getSize(), ALL_ASCENDING, newTypeBits); } catch (const AssertionException&) { // The expectation is that the randomized buffer is likely an invalid KeyString, // however attempting to decode it should fail gracefully. @@ -1296,7 +1469,8 @@ TEST_F(KeyStringTest, RandomizedInputsForToBsonSafe) { // Retest with descending. try { - KeyString::toBsonSafe(ksBuffer, ks.getSize(), ONE_DESCENDING, ks.getTypeBits()); + auto newTypeBits = KeyString::TypeBits::fromBuffer(KeyString::Version::V1, &reader); + KeyString::toBsonSafe(ksBuffer.get(), ks.getSize(), ONE_DESCENDING, newTypeBits); } catch (const AssertionException&) { // The expectation is that the randomized buffer is likely an invalid KeyString, // however attempting to decode it should fail gracefully. |