diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2021-01-21 18:51:18 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-27 04:19:52 +0000 |
commit | 657fd55617da405757b94bc9973df40394a18e5b (patch) | |
tree | 008bfef400ffaa7183156a7b5ef2881ec1bdd5f6 | |
parent | 84a07cc16f411f0cc5897a410ba0248b35d5d293 (diff) | |
download | mongo-657fd55617da405757b94bc9973df40394a18e5b.tar.gz |
SERVER-50386 Testing and documentation for ByteArrayDeprecated subtype
-rw-r--r-- | src/mongo/db/exec/sbe/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/sbe_test.cpp | 154 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 41 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp | 129 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp | 11 |
6 files changed, 339 insertions, 9 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 8b2e7f1eb51..4f4b5a201fd 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -146,6 +146,7 @@ env.CppUnitTest( 'sbe_sorted_merge_test.cpp', 'sbe_test.cpp', 'sbe_unique_test.cpp', + 'values/value_serialize_for_sorter_test.cpp', 'values/write_value_to_stream_test.cpp' ], LIBDEPS=[ diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp index 31a4e53eac5..74460db8e2c 100644 --- a/src/mongo/db/exec/sbe/sbe_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_test.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/exec/sbe/values/bson.h" #include "mongo/db/exec/sbe/values/value.h" #include "mongo/db/exec/sbe/vm/vm.h" #include "mongo/unittest/unittest.h" @@ -146,6 +147,32 @@ TEST(SBEValues, Hash) { value::hashValue(tagDecimalBig, valDecimalBig)); value::releaseValue(tagDecimalBig, valDecimalBig); + + uint8_t byteArray1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t byteArray2[] = {4, 3, 2, 1, 5, 6, 7, 8}; + auto binDataOperands = + BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral) + << BSONBinData(byteArray1, sizeof(byteArray1), ByteArrayDeprecated) + << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated)); + + // Two BinData values with the same data but different subtypes should hash differently. + auto tagGeneralBinData = value::TypeTags::bsonBinData; + auto valGeneralBinData = value::bitcastFrom<const char*>(binDataOperands[0].value()); + + auto tagDeprecatedBinData1 = value::TypeTags::bsonBinData; + auto valDeprecatedBinData1 = value::bitcastFrom<const char*>(binDataOperands[1].value()); + + ASSERT_NE(value::hashValue(tagGeneralBinData, valGeneralBinData), + value::hashValue(tagDeprecatedBinData1, valDeprecatedBinData1)); + + // Two ByteArrayDeprecated BinData values with different values in the leading four bytes should + // hash differently, even though those four bytes are technically not part of the binary data + // payload. + auto tagDeprecatedBinData2 = value::TypeTags::bsonBinData; + auto valDeprecatedBinData2 = value::bitcastFrom<const char*>(binDataOperands[2].value()); + + ASSERT_NE(value::hashValue(tagDeprecatedBinData1, valDeprecatedBinData1), + value::hashValue(tagDeprecatedBinData2, valDeprecatedBinData2)); } TEST(SBEValues, HashCompound) { @@ -248,4 +275,131 @@ TEST(SBEVM, Add) { } } +TEST(SBEVM, CompareBinData) { + { + uint8_t byteArray1[] = {1, 2, 3, 4}; + uint8_t byteArray2[] = {1, 2, 3, 10}; + auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral) + << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral)); + + vm::CodeFragment code; + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[0].value())); + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[1].value())); + code.appendCmp3w(); + + vm::ByteCode interpreter; + auto [owned, tag, val] = interpreter.run(&code); + + ASSERT_EQ(tag, value::TypeTags::NumberInt32); + ASSERT_LT(value::bitcastTo<int32_t>(val), 0); + ASSERT_FALSE(owned); + } + { + uint8_t byteArray1[] = {1, 2, 3, 4}; + uint8_t byteArray2[] = {1, 2, 3, 4}; + auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral) + << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral)); + + vm::CodeFragment code; + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[0].value())); + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[1].value())); + code.appendCmp3w(); + + vm::ByteCode interpreter; + auto [owned, tag, val] = interpreter.run(&code); + + ASSERT_EQ(tag, value::TypeTags::NumberInt32); + ASSERT_EQ(value::bitcastTo<int32_t>(val), 0); + ASSERT_FALSE(owned); + } + { + uint8_t byteArray1[] = {1, 2, 10, 4}; + uint8_t byteArray2[] = {1, 2, 3, 4}; + auto operands = BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral) + << BSONBinData(byteArray2, sizeof(byteArray2), BinDataGeneral)); + + vm::CodeFragment code; + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[0].value())); + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[1].value())); + code.appendCmp3w(); + + vm::ByteCode interpreter; + auto [owned, tag, val] = interpreter.run(&code); + + ASSERT_EQ(tag, value::TypeTags::NumberInt32); + ASSERT_GT(value::bitcastTo<int32_t>(val), 0); + ASSERT_FALSE(owned); + } + + // BinData values are ordered by subtype. Values with different subtypes should compare as not + // equal, even if they have the same data. + { + uint8_t byteArray1[] = {1, 2, 3, 4}; + uint8_t byteArray2[] = {1, 2, 3, 4}; + auto operands = + BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), BinDataGeneral) + << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated)); + + vm::CodeFragment code; + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[0].value())); + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[1].value())); + code.appendCmp3w(); + + vm::ByteCode interpreter; + auto [owned, tag, val] = interpreter.run(&code); + + ASSERT_EQ(tag, value::TypeTags::NumberInt32); + ASSERT_LT(value::bitcastTo<int32_t>(val), 0); + ASSERT_FALSE(owned); + } + + // Comparison of 'ByteArrayDeprecated' BinData values should consider the leading four bytes, + // even those those bytes are not part of the data payload, according to the standard. + { + uint8_t byteArray1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t byteArray2[] = {11, 12, 13, 14, 5, 6, 7, 8}; + auto operands = + BSON_ARRAY(BSONBinData(byteArray1, sizeof(byteArray1), ByteArrayDeprecated) + << BSONBinData(byteArray2, sizeof(byteArray2), ByteArrayDeprecated)); + + vm::CodeFragment code; + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[0].value())); + code.appendConstVal(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(operands[1].value())); + code.appendCmp3w(); + + vm::ByteCode interpreter; + auto [owned, tag, val] = interpreter.run(&code); + + ASSERT_EQ(tag, value::TypeTags::NumberInt32); + ASSERT_LT(value::bitcastTo<int32_t>(val), 0); + ASSERT_FALSE(owned); + } +} + +TEST(SBEVM, ConvertBinDataToBsonObj) { + uint8_t byteArray[] = {1, 2, 3, 4, 5, 6, 7, 8}; + auto originalBinData = + BSON_ARRAY(BSONBinData(byteArray, sizeof(byteArray), ByteArrayDeprecated)); + + value::Array array; + auto [binDataTag, binDataVal] = value::copyValue( + value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(originalBinData[0].value())); + array.push_back(binDataTag, binDataVal); + + BSONArrayBuilder builder; + bson::convertToBsonObj(builder, &array); + auto convertedBinData = builder.done(); + + ASSERT_EQ(originalBinData.woCompare(convertedBinData), 0); +} } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index 613539b881a..4debb14a7e2 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -387,17 +387,11 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { stream << "---===*** bsonObjectId ***===---"; break; case value::TypeTags::bsonBinData: { - auto data = - reinterpret_cast<const char*>(getBSONBinData(value::TypeTags::bsonBinData, val)); - auto len = getBSONBinDataSize(value::TypeTags::bsonBinData, val); + auto data = reinterpret_cast<const char*>( + getBSONBinDataCompat(value::TypeTags::bsonBinData, val)); + auto len = getBSONBinDataSizeCompat(value::TypeTags::bsonBinData, val); auto type = getBSONBinDataSubtype(value::TypeTags::bsonBinData, val); - if (type == ByteArrayDeprecated) { - // Skip extra size - len -= 4; - data += 4; - } - // If the BinData is a correctly sized newUUID, display it as such. if (type == newUUID && len == kNewUUIDLength) { using namespace fmt::literals; diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index c34e8219c87..a6875441e19 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -696,6 +696,47 @@ inline uint8_t* getBSONBinData(TypeTags tag, Value val) { return reinterpret_cast<uint8_t*>(getRawPointerView(val) + sizeof(uint32_t) + 1); } +/** + * Same as 'getBsonBinDataSize()' except when the BinData has the 'ByteArrayDeprecated' subtype, + * in which case it returns the size of the payload, rather than the size of the entire BinData. + * + * The BSON spec originally stipulated that BinData values with the "binary" subtype (named + * 'ByteArrayDeprecated' here) should structure their contents so that the first four bytes store + * the length of the payload, which occupies the remaining bytes. That subtype is now deprecated, + * but there are some callers that remain aware of it and operate on the payload rather than the + * whole BinData byte array. Most callers, however, should use the regular 'getBSONBinDataSize()' + * and 'getBSONBinData()' and remain oblivious to the BinData subtype. + * + * Note that this payload size is computed by subtracting the size of the length bytes from the + * overall size of BinData. Even though this function supports the deprecated subtype, it still + * ignores the payload length value. + */ +inline size_t getBSONBinDataSizeCompat(TypeTags tag, Value val) { + auto size = getBSONBinDataSize(tag, val); + if (getBSONBinDataSubtype(tag, val) != ByteArrayDeprecated) { + return size; + } else { + return (size >= sizeof(uint32_t)) ? size - sizeof(uint32_t) : 0; + } +} + +/** + * Same as 'getBsonBinData()' except when the BinData has the 'ByteArrayDeprecated' subtype, in + * which case it returns a pointer to the payload, rather than a pointer to the beginning of the + * BinData. + * + * See the 'getBSONBinDataSizeCompat()' documentation for an explanation of the + * 'ByteArrayDeprecated' subtype. + */ +inline uint8_t* getBSONBinDataCompat(TypeTags tag, Value val) { + auto binData = getBSONBinData(tag, val); + if (getBSONBinDataSubtype(tag, val) != ByteArrayDeprecated) { + return binData; + } else { + return binData + sizeof(uint32_t); + } +} + inline bool canUseSmallString(std::string_view input) { auto length = input.size(); auto ptr = input.data(); diff --git a/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp new file mode 100644 index 00000000000..2b4b97e53d0 --- /dev/null +++ b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp @@ -0,0 +1,129 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +/** + * This file contains tests for sbe::value::writeValueToStream. + */ + +#include "mongo/db/exec/sbe/values/slot.h" +#include "mongo/unittest/unittest.h" + +namespace mongo::sbe { +TEST(ValueSerializeForSorter, Serialize) { + value::MaterializedRow originalRow(21); + + originalRow.reset(0, true, value::TypeTags::Nothing, 0); + originalRow.reset(1, true, value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(33550336)); + originalRow.reset(2, true, value::TypeTags::RecordId, value::bitcastFrom<int64_t>(8589869056)); + originalRow.reset( + 3, true, value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(137438691328)); + originalRow.reset(4, true, value::TypeTags::NumberDouble, value::bitcastFrom<double>(2.305e18)); + + auto [decimalTag, decimalVal] = + value::makeCopyDecimal(Decimal128("2658455991569831744654692615953842176")); + originalRow.reset(5, true, decimalTag, decimalVal); + + originalRow.reset(6, true, value::TypeTags::Date, value::bitcastFrom<int64_t>(1234)); + originalRow.reset(7, true, value::TypeTags::Timestamp, value::bitcastFrom<uint64_t>(5678)); + originalRow.reset(8, true, value::TypeTags::Boolean, value::bitcastFrom<bool>(true)); + originalRow.reset(9, true, value::TypeTags::Null, 0); + originalRow.reset(10, true, value::TypeTags::MinKey, 0); + originalRow.reset(11, true, value::TypeTags::MaxKey, 0); + originalRow.reset(12, true, value::TypeTags::bsonUndefined, 0); + + auto [stringTag, stringVal] = value::makeNewString("perfect"); + originalRow.reset(13, true, stringTag, stringVal); + + auto [objectTag, objectVal] = value::makeNewObject(); + originalRow.reset(14, true, objectTag, objectVal); + + auto object = value::getObjectView(objectVal); + object->push_back("num", value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1)); + + auto [arrayTag, arrayVal] = value::makeNewArray(); + object->push_back("arr", arrayTag, arrayVal); + + auto array = value::getArrayView(arrayVal); + array->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(2)); + array->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(3)); + + auto [arraySetTag, arraySetVal] = value::makeNewArraySet(); + object->push_back("set", arraySetTag, arraySetVal); + + auto arraySet = value::getArraySetView(arraySetVal); + arraySet->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(4)); + arraySet->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(5)); + + auto [oidTag, oidVal] = value::makeCopyObjectId({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + originalRow.reset(15, true, oidTag, oidVal); + + uint8_t byteArray[] = {8, 7, 6, 5, 4, 3, 2, 1}; + auto bson = + BSON("obj" << BSON("a" << 1 << "b" << 2) << "arr" << BSON_ARRAY(1 << 2 << 3) // + << "binDataGeneral" << BSONBinData(byteArray, sizeof(byteArray), BinDataGeneral) + << "binDataDeprecated" + << BSONBinData(byteArray, sizeof(byteArray), ByteArrayDeprecated) + << "malformedBinDataDeprecated" << BSONBinData(nullptr, 0, ByteArrayDeprecated)); + + auto [bsonObjTag, bsonObjVal] = value::copyValue( + value::TypeTags::bsonObject, value::bitcastFrom<const char*>(bson["obj"].value())); + originalRow.reset(16, true, bsonObjTag, bsonObjVal); + + + auto [bsonArrayTag, bsonArrayVal] = value::copyValue( + value::TypeTags::bsonArray, value::bitcastFrom<const char*>(bson["arr"].value())); + originalRow.reset(17, true, bsonArrayTag, bsonArrayVal); + + auto [bsonBinDataGeneralTag, bsonBinDataGeneralVal] = + value::copyValue(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(bson["binDataGeneral"].value())); + originalRow.reset(18, true, bsonBinDataGeneralTag, bsonBinDataGeneralVal); + + auto [bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal] = + value::copyValue(value::TypeTags::bsonBinData, + value::bitcastFrom<const char*>(bson["binDataDeprecated"].value())); + originalRow.reset(19, true, bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal); + + KeyString::Builder keyStringBuilder(KeyString::Version::V1); + keyStringBuilder.appendNumberLong(1); + keyStringBuilder.appendNumberLong(2); + keyStringBuilder.appendNumberLong(3); + auto [keyStringTag, keyStringVal] = value::makeCopyKeyString(keyStringBuilder.getValueCopy()); + originalRow.reset(20, true, keyStringTag, keyStringVal); + + BufBuilder builder; + originalRow.serializeForSorter(builder); + auto buffer = builder.release(); + + BufReader reader(buffer.get(), buffer.capacity()); + value::MaterializedRow roundTripRow = value::MaterializedRow::deserializeForSorter(reader, {}); + + ASSERT(originalRow == roundTripRow); +} +} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp b/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp index b3e9a1da51b..7ab10c360e7 100644 --- a/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp +++ b/src/mongo/db/exec/sbe/values/write_value_to_stream_test.cpp @@ -92,6 +92,17 @@ TEST(WriteValueToStream, ByteArrayDeprecatedBSONBinDataTest) { ASSERT_EQUALS(expectedString, oss.str()); } +TEST(WriteValueToStream, MalformedByteArrayDeprecatedBSONBinDataTest) { + uint8_t array[] = {0, 1}; + auto bsonString = BSON("binData" << BSONBinData(array, 2, ByteArrayDeprecated)); + auto val = value::bitcastFrom<const char*>(bsonString["binData"].value()); + const std::pair<value::TypeTags, value::Value> value(value::TypeTags::bsonBinData, val); + std::ostringstream oss; + writeToStream(oss, value); + auto expectedString = "BinData(2, )"; + ASSERT_EQUALS(expectedString, oss.str()); +} + TEST(WriteValueToStream, ShortStringBigTest) { auto [tag, val] = value::makeNewString(kStringShort); value::ValueGuard guard{tag, val}; |