summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Seyster <justin.seyster@mongodb.com>2021-01-21 18:51:18 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-01-27 04:19:52 +0000
commit657fd55617da405757b94bc9973df40394a18e5b (patch)
tree008bfef400ffaa7183156a7b5ef2881ec1bdd5f6
parent84a07cc16f411f0cc5897a410ba0248b35d5d293 (diff)
downloadmongo-657fd55617da405757b94bc9973df40394a18e5b.tar.gz
SERVER-50386 Testing and documentation for ByteArrayDeprecated subtype
-rw-r--r--src/mongo/db/exec/sbe/SConscript1
-rw-r--r--src/mongo/db/exec/sbe/sbe_test.cpp154
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp12
-rw-r--r--src/mongo/db/exec/sbe/values/value.h41
-rw-r--r--src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp129
-rw-r--r--src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp11
6 files changed, 339 insertions, 9 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 8b2e7f1eb51..4f4b5a201fd 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -146,6 +146,7 @@ env.CppUnitTest(
'sbe_sorted_merge_test.cpp',
'sbe_test.cpp',
'sbe_unique_test.cpp',
+ 'values/value_serialize_for_sorter_test.cpp',
'values/write_value_to_stream_test.cpp'
],
LIBDEPS=[
diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp
index 31a4e53eac5..74460db8e2c 100644
--- a/src/mongo/db/exec/sbe/sbe_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_test.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/db/exec/sbe/values/bson.h"
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/exec/sbe/vm/vm.h"
#include "mongo/unittest/unittest.h"
@@ -146,6 +147,32 @@ TEST(SBEValues, Hash) {
value::hashValue(tagDecimalBig, valDecimalBig));
value::releaseValue(tagDecimalBig, valDecimalBig);
+
+ uint8_t byteArray1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ uint8_t byteArray2[] = {4, 3, 2, 1, 5, 6, 7, 8};
+ auto binDataOperands =
+ BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral)
+ << BSONBinData(byteArray1, sizeof(byteArray1), ByteArrayDeprecated)
+ << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated));
+
+ // Two BinData values with the same data but different subtypes should hash differently.
+ auto tagGeneralBinData = value::TypeTags::bsonBinData;
+ auto valGeneralBinData = value::bitcastFrom<const char*>(binDataOperands[0].value());
+
+ auto tagDeprecatedBinData1 = value::TypeTags::bsonBinData;
+ auto valDeprecatedBinData1 = value::bitcastFrom<const char*>(binDataOperands[1].value());
+
+ ASSERT_NE(value::hashValue(tagGeneralBinData, valGeneralBinData),
+ value::hashValue(tagDeprecatedBinData1, valDeprecatedBinData1));
+
+ // Two ByteArrayDeprecated BinData values with different values in the leading four bytes should
+ // hash differently, even though those four bytes are technically not part of the binary data
+ // payload.
+ auto tagDeprecatedBinData2 = value::TypeTags::bsonBinData;
+ auto valDeprecatedBinData2 = value::bitcastFrom<const char*>(binDataOperands[2].value());
+
+ ASSERT_NE(value::hashValue(tagDeprecatedBinData1, valDeprecatedBinData1),
+ value::hashValue(tagDeprecatedBinData2, valDeprecatedBinData2));
}
TEST(SBEValues, HashCompound) {
@@ -248,4 +275,131 @@ TEST(SBEVM, Add) {
}
}
+TEST(SBEVM, CompareBinData) {
+ {
+ uint8_t byteArray1[] = {1, 2, 3, 4};
+ uint8_t byteArray2[] = {1, 2, 3, 10};
+ auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral)
+ << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral));
+
+ vm::CodeFragment code;
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[0].value()));
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[1].value()));
+ code.appendCmp3w();
+
+ vm::ByteCode interpreter;
+ auto [owned, tag, val] = interpreter.run(&code);
+
+ ASSERT_EQ(tag, value::TypeTags::NumberInt32);
+ ASSERT_LT(value::bitcastTo<int32_t>(val), 0);
+ ASSERT_FALSE(owned);
+ }
+ {
+ uint8_t byteArray1[] = {1, 2, 3, 4};
+ uint8_t byteArray2[] = {1, 2, 3, 4};
+ auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral)
+ << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral));
+
+ vm::CodeFragment code;
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[0].value()));
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[1].value()));
+ code.appendCmp3w();
+
+ vm::ByteCode interpreter;
+ auto [owned, tag, val] = interpreter.run(&code);
+
+ ASSERT_EQ(tag, value::TypeTags::NumberInt32);
+ ASSERT_EQ(value::bitcastTo<int32_t>(val), 0);
+ ASSERT_FALSE(owned);
+ }
+ {
+ uint8_t byteArray1[] = {1, 2, 10, 4};
+ uint8_t byteArray2[] = {1, 2, 3, 4};
+ auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral)
+ << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral));
+
+ vm::CodeFragment code;
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[0].value()));
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[1].value()));
+ code.appendCmp3w();
+
+ vm::ByteCode interpreter;
+ auto [owned, tag, val] = interpreter.run(&code);
+
+ ASSERT_EQ(tag, value::TypeTags::NumberInt32);
+ ASSERT_GT(value::bitcastTo<int32_t>(val), 0);
+ ASSERT_FALSE(owned);
+ }
+
+ // BinData values are ordered by subtype. Values with different subtypes should compare as not
+ // equal, even if they have the same data.
+ {
+ uint8_t byteArray1[] = {1, 2, 3, 4};
+ uint8_t byteArray2[] = {1, 2, 3, 4};
+ auto operands =
+ BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral)
+ << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated));
+
+ vm::CodeFragment code;
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[0].value()));
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[1].value()));
+ code.appendCmp3w();
+
+ vm::ByteCode interpreter;
+ auto [owned, tag, val] = interpreter.run(&code);
+
+ ASSERT_EQ(tag, value::TypeTags::NumberInt32);
+ ASSERT_LT(value::bitcastTo<int32_t>(val), 0);
+ ASSERT_FALSE(owned);
+ }
+
+ // Comparison of 'ByteArrayDeprecated' BinData values should consider the leading four bytes,
+ // even those those bytes are not part of the data payload, according to the standard.
+ {
+ uint8_t byteArray1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ uint8_t byteArray2[] = {11, 12, 13, 14, 5, 6, 7, 8};
+ auto operands =
+ BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), ByteArrayDeprecated)
+ << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated));
+
+ vm::CodeFragment code;
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[0].value()));
+ code.appendConstVal(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(operands[1].value()));
+ code.appendCmp3w();
+
+ vm::ByteCode interpreter;
+ auto [owned, tag, val] = interpreter.run(&code);
+
+ ASSERT_EQ(tag, value::TypeTags::NumberInt32);
+ ASSERT_LT(value::bitcastTo<int32_t>(val), 0);
+ ASSERT_FALSE(owned);
+ }
+}
+
+TEST(SBEVM, ConvertBinDataToBsonObj) {
+ uint8_t byteArray[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ auto originalBinData =
+ BSON_ARRAY(BSONBinData(byteArray, sizeof(byteArray), ByteArrayDeprecated));
+
+ value::Array array;
+ auto [binDataTag, binDataVal] = value::copyValue(
+ value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(originalBinData[0].value()));
+ array.push_back(binDataTag, binDataVal);
+
+ BSONArrayBuilder builder;
+ bson::convertToBsonObj(builder, &array);
+ auto convertedBinData = builder.done();
+
+ ASSERT_EQ(originalBinData.woCompare(convertedBinData), 0);
+}
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index 613539b881a..4debb14a7e2 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -387,17 +387,11 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) {
stream << "---===*** bsonObjectId ***===---";
break;
case value::TypeTags::bsonBinData: {
- auto data =
- reinterpret_cast<const char*>(getBSONBinData(value::TypeTags::bsonBinData, val));
- auto len = getBSONBinDataSize(value::TypeTags::bsonBinData, val);
+ auto data = reinterpret_cast<const char*>(
+ getBSONBinDataCompat(value::TypeTags::bsonBinData, val));
+ auto len = getBSONBinDataSizeCompat(value::TypeTags::bsonBinData, val);
auto type = getBSONBinDataSubtype(value::TypeTags::bsonBinData, val);
- if (type == ByteArrayDeprecated) {
- // Skip extra size
- len -= 4;
- data += 4;
- }
-
// If the BinData is a correctly sized newUUID, display it as such.
if (type == newUUID && len == kNewUUIDLength) {
using namespace fmt::literals;
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index c34e8219c87..a6875441e19 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -696,6 +696,47 @@ inline uint8_t* getBSONBinData(TypeTags tag, Value val) {
return reinterpret_cast<uint8_t*>(getRawPointerView(val) + sizeof(uint32_t) + 1);
}
+/**
+ * Same as 'getBsonBinDataSize()' except when the BinData has the 'ByteArrayDeprecated' subtype,
+ * in which case it returns the size of the payload, rather than the size of the entire BinData.
+ *
+ * The BSON spec originally stipulated that BinData values with the "binary" subtype (named
+ * 'ByteArrayDeprecated' here) should structure their contents so that the first four bytes store
+ * the length of the payload, which occupies the remaining bytes. That subtype is now deprecated,
+ * but there are some callers that remain aware of it and operate on the payload rather than the
+ * whole BinData byte array. Most callers, however, should use the regular 'getBSONBinDataSize()'
+ * and 'getBSONBinData()' and remain oblivious to the BinData subtype.
+ *
+ * Note that this payload size is computed by subtracting the size of the length bytes from the
+ * overall size of BinData. Even though this function supports the deprecated subtype, it still
+ * ignores the payload length value.
+ */
+inline size_t getBSONBinDataSizeCompat(TypeTags tag, Value val) {
+ auto size = getBSONBinDataSize(tag, val);
+ if (getBSONBinDataSubtype(tag, val) != ByteArrayDeprecated) {
+ return size;
+ } else {
+ return (size >= sizeof(uint32_t)) ? size - sizeof(uint32_t) : 0;
+ }
+}
+
+/**
+ * Same as 'getBsonBinData()' except when the BinData has the 'ByteArrayDeprecated' subtype, in
+ * which case it returns a pointer to the payload, rather than a pointer to the beginning of the
+ * BinData.
+ *
+ * See the 'getBSONBinDataSizeCompat()' documentation for an explanation of the
+ * 'ByteArrayDeprecated' subtype.
+ */
+inline uint8_t* getBSONBinDataCompat(TypeTags tag, Value val) {
+ auto binData = getBSONBinData(tag, val);
+ if (getBSONBinDataSubtype(tag, val) != ByteArrayDeprecated) {
+ return binData;
+ } else {
+ return binData + sizeof(uint32_t);
+ }
+}
+
inline bool canUseSmallString(std::string_view input) {
auto length = input.size();
auto ptr = input.data();
diff --git a/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp
new file mode 100644
index 00000000000..2b4b97e53d0
--- /dev/null
+++ b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp
@@ -0,0 +1,129 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+/**
+ * This file contains tests for sbe::value::writeValueToStream.
+ */
+
+#include "mongo/db/exec/sbe/values/slot.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::sbe {
+TEST(ValueSerializeForSorter, Serialize) {
+ value::MaterializedRow originalRow(21);
+
+ originalRow.reset(0, true, value::TypeTags::Nothing, 0);
+ originalRow.reset(1, true, value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(33550336));
+ originalRow.reset(2, true, value::TypeTags::RecordId, value::bitcastFrom<int64_t>(8589869056));
+ originalRow.reset(
+ 3, true, value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(137438691328));
+ originalRow.reset(4, true, value::TypeTags::NumberDouble, value::bitcastFrom<double>(2.305e18));
+
+ auto [decimalTag, decimalVal] =
+ value::makeCopyDecimal(Decimal128("2658455991569831744654692615953842176"));
+ originalRow.reset(5, true, decimalTag, decimalVal);
+
+ originalRow.reset(6, true, value::TypeTags::Date, value::bitcastFrom<int64_t>(1234));
+ originalRow.reset(7, true, value::TypeTags::Timestamp, value::bitcastFrom<uint64_t>(5678));
+ originalRow.reset(8, true, value::TypeTags::Boolean, value::bitcastFrom<bool>(true));
+ originalRow.reset(9, true, value::TypeTags::Null, 0);
+ originalRow.reset(10, true, value::TypeTags::MinKey, 0);
+ originalRow.reset(11, true, value::TypeTags::MaxKey, 0);
+ originalRow.reset(12, true, value::TypeTags::bsonUndefined, 0);
+
+ auto [stringTag, stringVal] = value::makeNewString("perfect");
+ originalRow.reset(13, true, stringTag, stringVal);
+
+ auto [objectTag, objectVal] = value::makeNewObject();
+ originalRow.reset(14, true, objectTag, objectVal);
+
+ auto object = value::getObjectView(objectVal);
+ object->push_back("num", value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1));
+
+ auto [arrayTag, arrayVal] = value::makeNewArray();
+ object->push_back("arr", arrayTag, arrayVal);
+
+ auto array = value::getArrayView(arrayVal);
+ array->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(2));
+ array->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(3));
+
+ auto [arraySetTag, arraySetVal] = value::makeNewArraySet();
+ object->push_back("set", arraySetTag, arraySetVal);
+
+ auto arraySet = value::getArraySetView(arraySetVal);
+ arraySet->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(4));
+ arraySet->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(5));
+
+ auto [oidTag, oidVal] = value::makeCopyObjectId({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+ originalRow.reset(15, true, oidTag, oidVal);
+
+ uint8_t byteArray[] = {8, 7, 6, 5, 4, 3, 2, 1};
+ auto bson =
+ BSON("obj" << BSON("a" << 1 << "b" << 2) << "arr" << BSON_ARRAY(1 << 2 << 3) //
+ << "binDataGeneral" << BSONBinData(byteArray, sizeof(byteArray), BinDataGeneral)
+ << "binDataDeprecated"
+ << BSONBinData(byteArray, sizeof(byteArray), ByteArrayDeprecated)
+ << "malformedBinDataDeprecated" << BSONBinData(nullptr, 0, ByteArrayDeprecated));
+
+ auto [bsonObjTag, bsonObjVal] = value::copyValue(
+ value::TypeTags::bsonObject, value::bitcastFrom<const char*>(bson["obj"].value()));
+ originalRow.reset(16, true, bsonObjTag, bsonObjVal);
+
+
+ auto [bsonArrayTag, bsonArrayVal] = value::copyValue(
+ value::TypeTags::bsonArray, value::bitcastFrom<const char*>(bson["arr"].value()));
+ originalRow.reset(17, true, bsonArrayTag, bsonArrayVal);
+
+ auto [bsonBinDataGeneralTag, bsonBinDataGeneralVal] =
+ value::copyValue(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(bson["binDataGeneral"].value()));
+ originalRow.reset(18, true, bsonBinDataGeneralTag, bsonBinDataGeneralVal);
+
+ auto [bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal] =
+ value::copyValue(value::TypeTags::bsonBinData,
+ value::bitcastFrom<const char*>(bson["binDataDeprecated"].value()));
+ originalRow.reset(19, true, bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal);
+
+ KeyString::Builder keyStringBuilder(KeyString::Version::V1);
+ keyStringBuilder.appendNumberLong(1);
+ keyStringBuilder.appendNumberLong(2);
+ keyStringBuilder.appendNumberLong(3);
+ auto [keyStringTag, keyStringVal] = value::makeCopyKeyString(keyStringBuilder.getValueCopy());
+ originalRow.reset(20, true, keyStringTag, keyStringVal);
+
+ BufBuilder builder;
+ originalRow.serializeForSorter(builder);
+ auto buffer = builder.release();
+
+ BufReader reader(buffer.get(), buffer.capacity());
+ value::MaterializedRow roundTripRow = value::MaterializedRow::deserializeForSorter(reader, {});
+
+ ASSERT(originalRow == roundTripRow);
+}
+} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp b/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp
index b3e9a1da51b..7ab10c360e7 100644
--- a/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp
+++ b/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp
@@ -92,6 +92,17 @@ TEST(WriteValueToStream, ByteArrayDeprecatedBSONBinDataTest) {
ASSERT_EQUALS(expectedString, oss.str());
}
+TEST(WriteValueToStream, MalformedByteArrayDeprecatedBSONBinDataTest) {
+ uint8_t array[] = {0, 1};
+ auto bsonString = BSON("binData" << BSONBinData(array, 2, ByteArrayDeprecated));
+ auto val = value::bitcastFrom<const char*>(bsonString["binData"].value());
+ const std::pair<value::TypeTags, value::Value> value(value::TypeTags::bsonBinData, val);
+ std::ostringstream oss;
+ writeToStream(oss, value);
+ auto expectedString = "BinData(2, )";
+ ASSERT_EQUALS(expectedString, oss.str());
+}
+
TEST(WriteValueToStream, ShortStringBigTest) {
auto [tag, val] = value::makeNewString(kStringShort);
value::ValueGuard guard{tag, val};