summaryrefslogtreecommitdiff
path: root/src/mongo/bson
diff options
context:
space:
mode:
authorlukebhan <luke.bhan@vanderbilt.edu>2021-08-20 15:01:12 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-20 21:11:25 +0000
commitd436599ab1793a93e15c6fa13b6829f438a7a445 (patch)
tree47b3866567974581d90635e73e4c7deda7e0df2e /src/mongo/bson
parent681521f2d475abaa40c668596344f269c750386b (diff)
downloadmongo-d436599ab1793a93e15c6fa13b6829f438a7a445.tar.gz
SERVER-58558 Add binary support to simple8b
Diffstat (limited to 'src/mongo/bson')
-rw-r--r--src/mongo/bson/util/bsoncolumn_test.cpp167
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.cpp59
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.h3
-rw-r--r--src/mongo/bson/util/simple8b_type_util.cpp23
-rw-r--r--src/mongo/bson/util/simple8b_type_util.h6
-rw-r--r--src/mongo/bson/util/simple8b_type_util_test.cpp52
6 files changed, 293 insertions, 17 deletions
diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp
index ce8accf2e84..f1aa7228dfa 100644
--- a/src/mongo/bson/util/bsoncolumn_test.cpp
+++ b/src/mongo/bson/util/bsoncolumn_test.cpp
@@ -130,6 +130,22 @@ public:
return _elementMemory.front().firstElement();
}
+ BSONElement createElementBinData(const std::vector<uint8_t>& val) {
+ BSONObjBuilder ob;
+ ob.appendBinData("f", val.size(), BinDataGeneral, val.data());
+ _elementMemory.emplace_front(ob.obj());
+ return _elementMemory.front().firstElement();
+ }
+
+ static uint128_t deltaBinData(BSONElement val, BSONElement prev) {
+ if (val.binaryEqualValues(prev)) {
+ return 0;
+ }
+ return Simple8bTypeUtil::encodeInt128(
+ Simple8bTypeUtil::encodeBinary(val.valuestr(), val.valuestrsize()) -
+ Simple8bTypeUtil::encodeBinary(prev.valuestr(), prev.valuestrsize()));
+ }
+
static uint64_t deltaInt32(BSONElement val, BSONElement prev) {
return Simple8bTypeUtil::encodeInt64(val.Int() - prev.Int());
}
@@ -1542,5 +1558,156 @@ TEST_F(BSONColumnTest, SymbolAfterChangeBack) {
verifyBinary(cb.finalize(), expected);
}
+TEST_F(BSONColumnTest, BinDataBase) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{'1', '2', '3', '4'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataOdd) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{'\n', '2', '\n', '4'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataDelta) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{'1', '2', '3', '4'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+ cb.append(elemBinData);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData));
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataDeltaShouldFail) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{'1', '2', '3', '4'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ std::vector<uint8_t> inputLong{'1', '2', '3', '4', '5'};
+ auto elemBinDataLong = createElementBinData(inputLong);
+ cb.append(elemBinDataLong);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendLiteral(expected, elemBinDataLong);
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataDeltaCheckSkips) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{'1', '2', '3', '4'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ std::vector<uint8_t> inputLong{'1', '2', '3', '3'};
+ auto elemBinDataLong = createElementBinData(inputLong);
+ cb.append(elemBinDataLong);
+ cb.skip();
+ cb.append(elemBinData);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ std::vector<boost::optional<uint128_t>> expectedValues = {
+ deltaBinData(elemBinDataLong, elemBinData),
+ boost::none,
+ deltaBinData(elemBinData, elemBinDataLong)};
+ appendSimple8bBlocks128(expected, expectedValues, 1);
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataLargerThan16) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '8'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ std::vector<uint8_t> inputLong{
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '9'};
+ auto elemBinDataLong = createElementBinData(inputLong);
+ cb.append(elemBinDataLong);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendLiteral(expected, elemBinDataLong);
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataEqualTo16) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+
+ std::vector<uint8_t> inputLong{
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '8'};
+ auto elemBinDataLong = createElementBinData(inputLong);
+ cb.append(elemBinDataLong);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlock128(expected, deltaBinData(elemBinDataLong, elemBinData));
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
+TEST_F(BSONColumnTest, BinDataLargerThan16SameValue) {
+ BSONColumnBuilder cb("test"_sd);
+ std::vector<uint8_t> input{
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', '1', '2', '3', '4', '5', '6', '7', '8'};
+ auto elemBinData = createElementBinData(input);
+
+ cb.append(elemBinData);
+ cb.append(elemBinData);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemBinData);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData));
+ appendEOO(expected);
+
+ verifyBinary(cb.finalize(), expected);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.cpp b/src/mongo/bson/util/bsoncolumnbuilder.cpp
index 7a40cddb603..73a2f4b7431 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.cpp
+++ b/src/mongo/bson/util/bsoncolumnbuilder.cpp
@@ -91,7 +91,7 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
_storePrevious(elem);
_simple8bBuilder128.flush();
_simple8bBuilder64.flush();
- _storeWith128 = elem.type() == NumberDecimal;
+ _storeWith128 = elem.type() == NumberDecimal || elem.type() == BinData;
_writeLiteralFromPrevious();
return *this;
}
@@ -106,23 +106,36 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
}
}
+ bool encodingPossible = true;
if (!compressed) {
if (_storeWith128) {
int128_t delta = 0;
switch (type) {
- case NumberDecimal:
- delta = (Simple8bTypeUtil::encodeDecimal128(elem._numberDecimal()) -
- Simple8bTypeUtil::encodeDecimal128(previous._numberDecimal()));
- break;
+ case BinData: {
+ encodingPossible =
+ elem.valuestrsize() == previous.valuestrsize() && elem.valuestrsize() <= 16;
+ if (!encodingPossible)
+ break;
+ int128_t curEncoded =
+ Simple8bTypeUtil::encodeBinary(elem.valuestr(), elem.valuestrsize());
+ delta = curEncoded - _prevEncoded128;
+ _prevEncoded128 = curEncoded;
+ } break;
+ case NumberDecimal: {
+ int128_t curEncoded = Simple8bTypeUtil::encodeDecimal128(elem._numberDecimal());
+ delta = curEncoded - _prevEncoded128;
+ _prevEncoded128 = curEncoded;
+ } break;
default:
// Nothing else is implemented yet
invariant(false);
};
- compressed = _simple8bBuilder128.append(Simple8bTypeUtil::encodeInt128(delta));
+ if (encodingPossible) {
+ compressed = _simple8bBuilder128.append(Simple8bTypeUtil::encodeInt128(delta));
+ }
} else if (type == NumberDouble) {
compressed = _appendDouble(elem._numberDouble(), previous._numberDouble());
} else {
- bool encodingPossible = true;
int64_t value = 0;
switch (type) {
case NumberInt:
@@ -253,7 +266,7 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) {
if (rescaled) {
// Re-scale possible, use this Simple8b builder
std::swap(_simple8bBuilder64, *rescaled);
- _prevEncoded = encoded;
+ _prevEncoded64 = encoded;
_scaleIndex = scaleIndex;
return true;
}
@@ -264,10 +277,10 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) {
// Make sure value and previous are using the same scale factor.
uint8_t prevScaleIndex;
- std::tie(_prevEncoded, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex);
+ std::tie(_prevEncoded64, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex);
if (scaleIndex != prevScaleIndex) {
std::tie(encoded, scaleIndex) = scaleAndEncodeDouble(value, prevScaleIndex);
- std::tie(_prevEncoded, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex);
+ std::tie(_prevEncoded64, prevScaleIndex) = scaleAndEncodeDouble(previous, scaleIndex);
}
// Record our new scale factor
@@ -277,14 +290,15 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) {
// Append delta and check if we wrote a Simple8b block. If we did we may be able to reduce the
// scale factor when starting a new block
auto before = _bufBuilder.len();
- if (!_simple8bBuilder64.append(Simple8bTypeUtil::encodeInt64(calcDelta(encoded, _prevEncoded))))
+ if (!_simple8bBuilder64.append(
+ Simple8bTypeUtil::encodeInt64(calcDelta(encoded, _prevEncoded64))))
return false;
if (_bufBuilder.len() != before) {
// Reset the scale factor to 0 and append all pending values to a new Simple8bBuilder. In
// the worse case we will end up with an identical scale factor.
auto prevScale = _scaleIndex;
- std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
+ std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
// Create a new Simple8bBuilder.
Simple8bBuilder<uint64_t> builder(_createBufferWriter());
@@ -306,7 +320,7 @@ bool BSONColumnBuilder::_appendDouble(double value, double previous) {
}
}
- _prevEncoded = encoded;
+ _prevEncoded64 = encoded;
return true;
}
@@ -319,7 +333,7 @@ BSONColumnBuilder& BSONColumnBuilder::skip() {
}
// Rescale previous known value if this skip caused Simple-8b blocks to be written
if (before != _bufBuilder.len() && _previous().type() == NumberDouble) {
- std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
+ std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
}
return *this;
}
@@ -358,15 +372,28 @@ void BSONColumnBuilder::_storePrevious(BSONElement elem) {
void BSONColumnBuilder::_writeLiteralFromPrevious() {
// Write literal without field name and reset control byte to force new one to be written when
// appending next value.
+ auto prevElem = _previous();
_bufBuilder.appendBuf(_prev.get(), _prevSize);
_controlByteOffset = 0;
// There is no previous timestamp delta. Set to default.
_prevDelta = 0;
+ switch (prevElem.type()) {
+ case BinData:
+ if (prevElem.valuestrsize() <= 16)
+ _prevEncoded128 =
+ Simple8bTypeUtil::encodeBinary(prevElem.valuestr(), prevElem.valuestrsize());
+ break;
+ case NumberDecimal:
+ _prevEncoded128 = Simple8bTypeUtil::encodeDecimal128(prevElem._numberDecimal());
+ break;
+ default:
+ break;
+ }
// Set scale factor for this literal and values needed to append values
if (_prev[0] == NumberDouble) {
- _lastValueInPrevBlock = _previous()._numberDouble();
- std::tie(_prevEncoded, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
+ _lastValueInPrevBlock = prevElem._numberDouble();
+ std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
} else {
_scaleIndex = Simple8bTypeUtil::kMemoryAsInteger;
}
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.h b/src/mongo/bson/util/bsoncolumnbuilder.h
index 2d7774830a1..d4d76c0e89a 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.h
+++ b/src/mongo/bson/util/bsoncolumnbuilder.h
@@ -109,7 +109,8 @@ private:
std::ptrdiff_t _controlByteOffset = 0;
// Additional variables needed for previous state
- int64_t _prevEncoded;
+ int64_t _prevEncoded64 = 0;
+ int128_t _prevEncoded128 = 0;
double _lastValueInPrevBlock = 0;
uint8_t _scaleIndex;
diff --git a/src/mongo/bson/util/simple8b_type_util.cpp b/src/mongo/bson/util/simple8b_type_util.cpp
index 61787104f9b..32b9fe4b558 100644
--- a/src/mongo/bson/util/simple8b_type_util.cpp
+++ b/src/mongo/bson/util/simple8b_type_util.cpp
@@ -185,4 +185,27 @@ Decimal128 Simple8bTypeUtil::decodeDecimal128(int128_t val) {
return res;
}
+int128_t Simple8bTypeUtil::encodeBinary(const char* val, size_t size) {
+ char arr[16] = {};
+ memcpy(arr, val, size);
+ uint64_t low = ConstDataView(arr).read<LittleEndian<uint64_t>>();
+ uint64_t high = ConstDataView(arr + 8).read<LittleEndian<uint64_t>>();
+ return absl::MakeInt128(high, low);
+}
+
+void Simple8bTypeUtil::decodeBinary(int128_t val, char* result, size_t size) {
+ uint64_t low = absl::Int128Low64(val);
+ uint64_t high = absl::Int128High64(val);
+ if (size > 8) {
+ memcpy(result, &low, 8);
+ memcpy(result + 8, &high, size - 8);
+ } else {
+ memcpy(result, &low, size);
+ }
+ if (size < 16) {
+ // Set the position at end of binary to be always one.
+ result[size] = 1;
+ }
+}
+
} // namespace mongo
diff --git a/src/mongo/bson/util/simple8b_type_util.h b/src/mongo/bson/util/simple8b_type_util.h
index 644c63dbdc6..345e66ce81a 100644
--- a/src/mongo/bson/util/simple8b_type_util.h
+++ b/src/mongo/bson/util/simple8b_type_util.h
@@ -78,6 +78,12 @@ public:
static int128_t encodeDecimal128(Decimal128 val);
static Decimal128 decodeDecimal128(int128_t val);
+ // These methods allow encoding binary with simple8b. We do not make any
+ // assumptions about the data other than the fact that the data is valid up to the size
+ // provided. The max size must be less than or equal to 16 bytes.
+ static int128_t encodeBinary(const char* val, size_t size);
+ static void decodeBinary(int128_t val, char* result, size_t size);
+
// Array is a double as it will always be multiplied by a double and we don't want to do an
// extra cast for
static constexpr uint8_t kMemoryAsInteger = 5;
diff --git a/src/mongo/bson/util/simple8b_type_util_test.cpp b/src/mongo/bson/util/simple8b_type_util_test.cpp
index 5d4e3051dce..5f5d82d192f 100644
--- a/src/mongo/bson/util/simple8b_type_util_test.cpp
+++ b/src/mongo/bson/util/simple8b_type_util_test.cpp
@@ -54,6 +54,17 @@ void assertDecimal128Equal(Decimal128 val) {
ASSERT_TRUE(decodeResult == val);
}
+void assertBinaryEqual(char* val, size_t size, int128_t expected) {
+ int128_t encodeResult = Simple8bTypeUtil::encodeBinary(val, size);
+ ASSERT_EQUALS(encodeResult, expected);
+ char charPtr[16] = {1};
+ Simple8bTypeUtil::decodeBinary(encodeResult, charPtr, size);
+ ASSERT_EQUALS(std::memcmp(charPtr, val, size), 0);
+ if (size <= 16) {
+ ASSERT_EQUALS(charPtr[size], 1);
+ }
+}
+
TEST(Simple8bTypeUtil, EncodeAndDecodePositiveSignedInt) {
int64_t signedVal = 1;
uint64_t unsignedVal = Simple8bTypeUtil::encodeInt64(signedVal);
@@ -392,3 +403,44 @@ TEST(Simple8bTypeUtil, Decimal128Min) {
TEST(Simple8bTypeUtil, Decimal128Lowest) {
assertDecimal128Equal(Decimal128(std::numeric_limits<Decimal128>::lowest()));
}
+
+TEST(Simple8bTypeUtil, EmptyBinary) {
+ char arr[0];
+ assertBinaryEqual(arr, 0, 0);
+}
+
+TEST(Simple8bTypeUtil, SingleLetterBinary) {
+ char arr[1] = {'a'};
+ assertBinaryEqual(arr, 1, 97);
+}
+
+TEST(Simple8bTypeUtil, MultiLetterBinary) {
+ // a = 97 = 01100001
+ // b = 98 = 01100010
+ // c = 99 = 01100011
+ // abc = 011000110110001001100001 = 6513249
+ char arr[3] = {'a', 'b', 'c'};
+ assertBinaryEqual(arr, 3, 6513249);
+}
+
+TEST(Simple8bTypeUtil, MultiCharWithOddValues) {
+ char arr[5] = {'a', char(1), '\n'};
+ // a = 97 = 01100001
+ // 1 = 00000001
+ // \n = 00001010
+ // a1\n = 000010100000000101100001 = 655713
+ assertBinaryEqual(arr, 5, 655713);
+}
+
+TEST(Simple8bTypeUtil, LargeChar) {
+ char arr[15] = "abcdefghijklmn";
+ assertBinaryEqual(arr, 15, absl::MakeInt128(0x6E6D6C6B6A69, 0x6867666564636261));
+}
+
+TEST(Simple8bTypeUtil, LeadingAndTrailingZeros) {
+ char arr[7] = {'0', '0', '0', 'a', '0', '0', '0'};
+ // 0 = 48 = 0011000
+ // Our reuslt should be
+ // 00110000 0011000 00110000 1100001 00110000 00110000 00110000
+ assertBinaryEqual(arr, 7, absl::MakeInt128(0, 0x30303061303030));
+}