From d436599ab1793a93e15c6fa13b6829f438a7a445 Mon Sep 17 00:00:00 2001 From: lukebhan Date: Fri, 20 Aug 2021 15:01:12 +0000 Subject: SERVER-58558 Add binary support to simple8b --- src/mongo/bson/util/bsoncolumn_test.cpp | 167 ++++++++++++++++++++++++ src/mongo/bson/util/bsoncolumnbuilder.cpp | 59 ++++++--- src/mongo/bson/util/bsoncolumnbuilder.h | 3 +- src/mongo/bson/util/simple8b_type_util.cpp | 23 ++++ src/mongo/bson/util/simple8b_type_util.h | 6 + src/mongo/bson/util/simple8b_type_util_test.cpp | 52 ++++++++ 6 files changed, 293 insertions(+), 17 deletions(-) (limited to 'src/mongo/bson') diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp index ce8accf2e84..f1aa7228dfa 100644 --- a/src/mongo/bson/util/bsoncolumn_test.cpp +++ b/src/mongo/bson/util/bsoncolumn_test.cpp @@ -130,6 +130,22 @@ public: return _elementMemory.front().firstElement(); } + BSONElement createElementBinData(const std::vector& val) { + BSONObjBuilder ob; + ob.appendBinData("f", val.size(), BinDataGeneral, val.data()); + _elementMemory.emplace_front(ob.obj()); + return _elementMemory.front().firstElement(); + } + + static uint128_t deltaBinData(BSONElement val, BSONElement prev) { + if (val.binaryEqualValues(prev)) { + return 0; + } + return Simple8bTypeUtil::encodeInt128( + Simple8bTypeUtil::encodeBinary(val.valuestr(), val.valuestrsize()) - + Simple8bTypeUtil::encodeBinary(prev.valuestr(), prev.valuestrsize())); + } + static uint64_t deltaInt32(BSONElement val, BSONElement prev) { return Simple8bTypeUtil::encodeInt64(val.Int() - prev.Int()); } @@ -1542,5 +1558,156 @@ TEST_F(BSONColumnTest, SymbolAfterChangeBack) { verifyBinary(cb.finalize(), expected); } +TEST_F(BSONColumnTest, BinDataBase) { + BSONColumnBuilder cb("test"_sd); + std::vector input{'1', '2', '3', '4'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataOdd) { + BSONColumnBuilder cb("test"_sd); + std::vector input{'\n', '2', '\n', '4'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataDelta) { + BSONColumnBuilder cb("test"_sd); + std::vector input{'1', '2', '3', '4'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + cb.append(elemBinData); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData)); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataDeltaShouldFail) { + BSONColumnBuilder cb("test"_sd); + std::vector input{'1', '2', '3', '4'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + std::vector inputLong{'1', '2', '3', '4', '5'}; + auto elemBinDataLong = createElementBinData(inputLong); + cb.append(elemBinDataLong); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendLiteral(expected, elemBinDataLong); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataDeltaCheckSkips) { + BSONColumnBuilder cb("test"_sd); + std::vector input{'1', '2', '3', '4'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + std::vector inputLong{'1', '2', '3', '3'}; + auto elemBinDataLong = createElementBinData(inputLong); + cb.append(elemBinDataLong); + cb.skip(); + cb.append(elemBinData); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendSimple8bControl(expected, 0b1000, 0b0000); + std::vector> expectedValues = { + deltaBinData(elemBinDataLong, elemBinData), + boost::none, + deltaBinData(elemBinData, elemBinDataLong)}; + appendSimple8bBlocks128(expected, expectedValues, 1); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataLargerThan16) { + BSONColumnBuilder cb("test"_sd); + std::vector input{ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '8'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + std::vector inputLong{ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '9'}; + auto elemBinDataLong = createElementBinData(inputLong); + cb.append(elemBinDataLong); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendLiteral(expected, elemBinDataLong); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataEqualTo16) { + BSONColumnBuilder cb("test"_sd); + std::vector input{ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + + std::vector inputLong{ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '8'}; + auto elemBinDataLong = createElementBinData(inputLong); + cb.append(elemBinDataLong); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock128(expected, deltaBinData(elemBinDataLong, elemBinData)); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BinDataLargerThan16SameValue) { + BSONColumnBuilder cb("test"_sd); + std::vector input{ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '8'}; + auto elemBinData = createElementBinData(input); + + cb.append(elemBinData); + cb.append(elemBinData); + + BufBuilder expected; + appendLiteral(expected, elemBinData); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData)); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + } // namespace } // namespace mongo diff --git a/src/mongo/bson/util/bsoncolumnbuilder.cpp b/src/mongo/bson/util/bsoncolumnbuilder.cpp index 7a40cddb603..73a2f4b7431 100644 --- a/src/mongo/bson/util/bsoncolumnbuilder.cpp +++ b/src/mongo/bson/util/bsoncolumnbuilder.cpp @@ -91,7 +91,7 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) { _storePrevious(elem); _simple8bBuilder128.flush(); _simple8bBuilder64.flush(); - _storeWith128 = elem.type() == NumberDecimal; + _storeWith128 = elem.type() == NumberDecimal || elem.type() == BinData; _writeLiteralFromPrevious(); return *this; } @@ -106,23 +106,36 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) { } } + bool encodingPossible = true; if (!compressed) { if (_storeWith128) { int128_t delta = 0; switch (type) { - case NumberDecimal: - delta = (Simple8bTypeUtil::encodeDecimal128(elem._numberDecimal()) - - Simple8bTypeUtil::encodeDecimal128(previous._numberDecimal())); - break; + case BinData: { + encodingPossible = + elem.valuestrsize() == previous.valuestrsize() && elem.valuestrsize() <= 16; + if (!encodingPossible) + break; + int128_t curEncoded = + Simple8bTypeUtil::encodeBinary(elem.valuestr(), elem.valuestrsize()); + delta = curEncoded - _prevEncoded128; + _prevEncoded128 = curEncoded; + } break; + case NumberDecimal: { + int128_t curEncoded = Simple8bTypeUtil::encodeDecimal128(elem._numberDecimal()); + delta = curEncoded - _prevEncoded128; + _prevEncoded128 = curEncoded; + } break; default: // Nothing else is implemented yet invariant(false); }; - compressed = _simple8bBuilder128.append(Simple8bTypeUtil::encodeInt128(delta)); + if (encodingPossible) { + compressed = _simple8bBuilder128.append(Simple8bTypeUtil::encodeInt128(delta)); + } } else if (type == NumberDouble) { compressed = _appendDouble(elem._numberDouble(), previous._numberDouble()); } else { - bool encodingPossible = true; int64_t value = 0; switch (type) { case NumberInt: @@ -253,7 +266,7 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) { if (rescaled) { // Re-scale possible, use this Simple8b builder std::swap(_simple8bBuilder64, *rescaled); - _prevEncoded = encoded; + _prevEncoded64 = encoded; _scaleIndex = scaleIndex; return true; } @@ -264,10 +277,10 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) { // Make sure value and previous are using the same scale factor. uint8_t prevScaleIndex; - std::tie(_prevEncoded, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex); + std::tie(_prevEncoded64, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex); if (scaleIndex != prevScaleIndex) { std::tie(encoded, scaleIndex) = scaleAndEncodeDouble(value, prevScaleIndex); - std::tie(_prevEncoded, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex); + std::tie(_prevEncoded64, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex); } // Record our new scale factor @@ -277,14 +290,15 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) { // Append delta and check if we wrote a Simple8b block. If we did we may be able to reduce the // scale factor when starting a new block auto before = _bufBuilder.len(); - if (!_simple8bBuilder64.append(Simple8bTypeUtil::encodeInt64(calcDelta(encoded, _prevEncoded)))) + if (!_simple8bBuilder64.append( + Simple8bTypeUtil::encodeInt64(calcDelta(encoded, _prevEncoded64)))) return false; if (_bufBuilder.len() != before) { // Reset the scale factor to 0 and append all pending values to a new Simple8bBuilder. In // the worse case we will end up with an identical scale factor. auto prevScale = _scaleIndex; - std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); + std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); // Create a new Simple8bBuilder. Simple8bBuilder builder(_createBufferWriter()); @@ -306,7 +320,7 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) { } } - _prevEncoded = encoded; + _prevEncoded64 = encoded; return true; } @@ -319,7 +333,7 @@ BSONColumnBuilder& BSONColumnBuilder::skip() { } // Rescale previous known value if this skip caused Simple-8b blocks to be written if (before != _bufBuilder.len() && _previous().type() == NumberDouble) { - std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); + std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); } return *this; } @@ -358,15 +372,28 @@ void BSONColumnBuilder::_storePrevious(BSONElement elem) { void BSONColumnBuilder::_writeLiteralFromPrevious() { // Write literal without field name and reset control byte to force new one to be written when // appending next value. + auto prevElem = _previous(); _bufBuilder.appendBuf(_prev.get(), _prevSize); _controlByteOffset = 0; // There is no previous timestamp delta. Set to default. _prevDelta = 0; + switch (prevElem.type()) { + case BinData: + if (prevElem.valuestrsize() <= 16) + _prevEncoded128 = + Simple8bTypeUtil::encodeBinary(prevElem.valuestr(), prevElem.valuestrsize()); + break; + case NumberDecimal: + _prevEncoded128 = Simple8bTypeUtil::encodeDecimal128(prevElem._numberDecimal()); + break; + default: + break; + } // Set scale factor for this literal and values needed to append values if (_prev[0] == NumberDouble) { - _lastValueInPrevBlock = _previous()._numberDouble(); - std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); + _lastValueInPrevBlock = prevElem._numberDouble(); + std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0); } else { _scaleIndex = Simple8bTypeUtil::kMemoryAsInteger; } diff --git a/src/mongo/bson/util/bsoncolumnbuilder.h b/src/mongo/bson/util/bsoncolumnbuilder.h index 2d7774830a1..d4d76c0e89a 100644 --- a/src/mongo/bson/util/bsoncolumnbuilder.h +++ b/src/mongo/bson/util/bsoncolumnbuilder.h @@ -109,7 +109,8 @@ private: std::ptrdiff_t _controlByteOffset = 0; // Additional variables needed for previous state - int64_t _prevEncoded; + int64_t _prevEncoded64 = 0; + int128_t _prevEncoded128 = 0; double _lastValueInPrevBlock = 0; uint8_t _scaleIndex; diff --git a/src/mongo/bson/util/simple8b_type_util.cpp b/src/mongo/bson/util/simple8b_type_util.cpp index 61787104f9b..32b9fe4b558 100644 --- a/src/mongo/bson/util/simple8b_type_util.cpp +++ b/src/mongo/bson/util/simple8b_type_util.cpp @@ -185,4 +185,27 @@ Decimal128 Simple8bTypeUtil::decodeDecimal128(int128_t val) { return res; } +int128_t Simple8bTypeUtil::encodeBinary(const char* val, size_t size) { + char arr[16] = {}; + memcpy(arr, val, size); + uint64_t low = ConstDataView(arr).read>(); + uint64_t high = ConstDataView(arr + 8).read>(); + return absl::MakeInt128(high, low); +} + +void Simple8bTypeUtil::decodeBinary(int128_t val, char* result, size_t size) { + uint64_t low = absl::Int128Low64(val); + uint64_t high = absl::Int128High64(val); + if (size > 8) { + memcpy(result, &low, 8); + memcpy(result + 8, &high, size - 8); + } else { + memcpy(result, &low, size); + } + if (size < 16) { + // Set the position at end of binary to be always one. + result[size] = 1; + } +} + } // namespace mongo diff --git a/src/mongo/bson/util/simple8b_type_util.h b/src/mongo/bson/util/simple8b_type_util.h index 644c63dbdc6..345e66ce81a 100644 --- a/src/mongo/bson/util/simple8b_type_util.h +++ b/src/mongo/bson/util/simple8b_type_util.h @@ -78,6 +78,12 @@ public: static int128_t encodeDecimal128(Decimal128 val); static Decimal128 decodeDecimal128(int128_t val); + // These methods allow encoding binary with simple8b. We do not make any + // assumptions about the data other than the fact that the data is valid up to the size + // provided. The max size must be less than or equal to 16 bytes. + static int128_t encodeBinary(const char* val, size_t size); + static void decodeBinary(int128_t val, char* result, size_t size); + // Array is a double as it will always be multiplied by a double and we don't want to do an // extra cast for static constexpr uint8_t kMemoryAsInteger = 5; diff --git a/src/mongo/bson/util/simple8b_type_util_test.cpp b/src/mongo/bson/util/simple8b_type_util_test.cpp index 5d4e3051dce..5f5d82d192f 100644 --- a/src/mongo/bson/util/simple8b_type_util_test.cpp +++ b/src/mongo/bson/util/simple8b_type_util_test.cpp @@ -54,6 +54,17 @@ void assertDecimal128Equal(Decimal128 val) { ASSERT_TRUE(decodeResult == val); } +void assertBinaryEqual(char* val, size_t size, int128_t expected) { + int128_t encodeResult = Simple8bTypeUtil::encodeBinary(val, size); + ASSERT_EQUALS(encodeResult, expected); + char charPtr[16] = {1}; + Simple8bTypeUtil::decodeBinary(encodeResult, charPtr, size); + ASSERT_EQUALS(std::memcmp(charPtr, val, size), 0); + if (size <= 16) { + ASSERT_EQUALS(charPtr[size], 1); + } +} + TEST(Simple8bTypeUtil, EncodeAndDecodePositiveSignedInt) { int64_t signedVal = 1; uint64_t unsignedVal = Simple8bTypeUtil::encodeInt64(signedVal); @@ -392,3 +403,44 @@ TEST(Simple8bTypeUtil, Decimal128Min) { TEST(Simple8bTypeUtil, Decimal128Lowest) { assertDecimal128Equal(Decimal128(std::numeric_limits::lowest())); } + +TEST(Simple8bTypeUtil, EmptyBinary) { + char arr[0]; + assertBinaryEqual(arr, 0, 0); +} + +TEST(Simple8bTypeUtil, SingleLetterBinary) { + char arr[1] = {'a'}; + assertBinaryEqual(arr, 1, 97); +} + +TEST(Simple8bTypeUtil, MultiLetterBinary) { + // a = 97 = 01100001 + // b = 98 = 01100010 + // c = 99 = 01100011 + // abc = 011000110110001001100001 = 6513249 + char arr[3] = {'a', 'b', 'c'}; + assertBinaryEqual(arr, 3, 6513249); +} + +TEST(Simple8bTypeUtil, MultiCharWithOddValues) { + char arr[5] = {'a', char(1), '\n'}; + // a = 97 = 01100001 + // 1 = 00000001 + // \n = 00001010 + // a1\n = 000010100000000101100001 = 655713 + assertBinaryEqual(arr, 5, 655713); +} + +TEST(Simple8bTypeUtil, LargeChar) { + char arr[15] = "abcdefghijklmn"; + assertBinaryEqual(arr, 15, absl::MakeInt128(0x6E6D6C6B6A69, 0x6867666564636261)); +} + +TEST(Simple8bTypeUtil, LeadingAndTrailingZeros) { + char arr[7] = {'0', '0', '0', 'a', '0', '0', '0'}; + // 0 = 48 = 0011000 + // Our reuslt should be + // 00110000 0011000 00110000 1100001 00110000 00110000 00110000 + assertBinaryEqual(arr, 7, absl::MakeInt128(0, 0x30303061303030)); +} -- cgit v1.2.1