diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/sbe/values/bson.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/slot.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 17 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value_builder.h | 15 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp | 100 |
6 files changed, 197 insertions, 58 deletions
diff --git a/src/mongo/db/exec/sbe/values/bson.cpp b/src/mongo/db/exec/sbe/values/bson.cpp index 4b459a3b4d4..ea5f50b7e58 100644 --- a/src/mongo/db/exec/sbe/values/bson.cpp +++ b/src/mongo/db/exec/sbe/values/bson.cpp @@ -34,24 +34,33 @@ namespace mongo { namespace sbe { namespace bson { + +/** + * Advance table specifies how to change the pointer to skip current BSON value (so that pointer + * points to the next byte after the BSON value): + * - For values less than 128 (0x80), pointer is advanced by this value + * - 255 (0xff) - pointer is advanced by the 32-bit integer stored in the buffer plus 4 bytes + * - 254 (0xfe) - pointer is advanced by the 32-bit integer stored in the buffer + * - 128 (0x80) - the type is either unsupported or handled explicitly + */ // clang-format off static uint8_t advanceTable[] = { - 0xff, // End - 8, // double - 0xff, // string - 0xfe, // document - 0xfe, // document - 0x80, // binary ??? +1 ? - 0, // Undefined(value) - Deprecated + 0xff, // EOO + 8, // Double + 0xff, // String + 0xfe, // Object + 0xfe, // Array + 0x80, // BinData + 0, // Undefined - Deprecated 12, // ObjectId 1, // Boolean 8, // UTC datetime 0, // Null value 0x80, // Regular expression - 0x80, // DBPointer - 0x80, // JavaScript code - 0x80, // Symbol - 0x80, // JavaScript code w/ scope ???? + 0x80, // DBPointer - Deprecated + 0xff, // JavaScript code + 0x80, // Symbol - Deprecated + 0x80, // JavaScript code with scope - Deprecated 4, // 32-bit integer 8, // Timestamp 8, // 64-bit integer @@ -227,13 +236,18 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, case BSONType::Undefined: return {value::TypeTags::bsonUndefined, 0}; case BSONType::RegEx: { + auto value = value::bitcastFrom<const char*>(be); if (view) { - return {value::TypeTags::bsonRegex, value::bitcastFrom<const char*>(be)}; + return {value::TypeTags::bsonRegex, value}; } - - value::BsonRegex bsonRegex{be}; - auto [_, strVal] = value::makeBigString(bsonRegex.dataView()); - return {value::TypeTags::bsonRegex, strVal}; + return value::makeCopyBsonRegex(value::getBsonRegexView(value)); + } + case BSONType::Code: { + auto value = value::bitcastFrom<const char*>(be); + if (view) { + return {value::TypeTags::bsonJavascript, value}; + } + return value::makeCopyBsonJavascript(value::getBsonJavascriptView(value)); } default: return {value::TypeTags::Nothing, 0}; @@ -331,6 +345,9 @@ void convertToBsonObj(ArrayBuilder& builder, value::ArrayEnumerator arr) { builder.appendRegex(regex.pattern, regex.flags); break; } + case value::TypeTags::bsonJavascript: + builder.appendCode(value::getBsonJavascriptView(val)); + break; default: MONGO_UNREACHABLE; } @@ -459,6 +476,9 @@ void appendValueToBsonObj(ObjBuilder& builder, builder.appendRegex(name, regex.pattern, regex.flags); break; } + case value::TypeTags::bsonJavascript: + builder.appendCode(name, value::getBsonJavascriptView(val)); + break; default: MONGO_UNREACHABLE; } diff --git a/src/mongo/db/exec/sbe/values/slot.cpp b/src/mongo/db/exec/sbe/values/slot.cpp index 73dd4ee9564..e11727d8cf0 100644 --- a/src/mongo/db/exec/sbe/values/slot.cpp +++ b/src/mongo/db/exec/sbe/values/slot.cpp @@ -72,13 +72,15 @@ static std::pair<TypeTags, Value> deserializeTagVal(BufReader& buf) { case TypeTags::bsonUndefined: val = 0; break; - case TypeTags::StringSmall: + case TypeTags::StringSmall: { + std::tie(tag, val) = makeNewString(buf.readCStr()); + break; + } case TypeTags::StringBig: case TypeTags::bsonString: { - auto str = buf.readCStr(); - auto [strTag, strVal] = makeNewString({str.rawData(), str.size()}); - tag = strTag; - val = strVal; + auto stringLength = buf.read<LittleEndian<uint32_t>>(); + auto stringStart = reinterpret_cast<const char*>(buf.skip(stringLength)); + std::tie(tag, val) = makeNewString({stringStart, stringLength}); break; } case TypeTags::Array: { @@ -156,9 +158,13 @@ static std::pair<TypeTags, Value> deserializeTagVal(BufReader& buf) { case TypeTags::bsonRegex: { auto pattern = buf.readCStr(); auto flags = buf.readCStr(); - BsonRegex bsonRegex{pattern, flags}; - auto [_, strVal] = makeBigString(bsonRegex.dataView()); - val = strVal; + std::tie(tag, val) = value::makeCopyBsonRegex({pattern, flags}); + break; + } + case TypeTags::bsonJavascript: { + auto codeLength = buf.read<LittleEndian<uint32_t>>(); + auto codeStart = reinterpret_cast<const char*>(buf.skip(codeLength)); + std::tie(tag, val) = makeCopyBsonJavascript({codeStart, codeLength}); break; } default: @@ -217,11 +223,17 @@ static void serializeTagValue(BufBuilder& buf, TypeTags tag, Value val) { break; case TypeTags::bsonUndefined: break; - case TypeTags::StringSmall: + case TypeTags::StringSmall: { + // Small strings cannot contain null bytes, so it is safe to serialize them as plain + // C-strings. Null byte is implicitly added at the end by 'buf.appendStr'. + buf.appendStr(getStringView(tag, val)); + break; + } case TypeTags::StringBig: case TypeTags::bsonString: { auto sv = getStringView(tag, val); - buf.appendStr({sv.data(), sv.size()}); + buf.appendNum(static_cast<uint32_t>(sv.size())); + buf.appendStr(sv, false /* includeEndingNull */); break; } case TypeTags::Array: { @@ -292,6 +304,12 @@ static void serializeTagValue(BufBuilder& buf, TypeTags tag, Value val) { buf.appendStr(regex.flags); break; } + case TypeTags::bsonJavascript: { + auto javascriptCode = getBsonJavascriptView(val); + buf.appendNum(static_cast<uint32_t>(javascriptCode.size())); + buf.appendStr(javascriptCode, false /* includeEndingNull */); + break; + } default: MONGO_UNREACHABLE; } @@ -385,6 +403,11 @@ int getApproximateSize(TypeTags tag, Value val) { result += regex.byteSize(); break; } + case TypeTags::bsonJavascript: { + auto code = getBsonJavascriptView(val); + result += sizeof(uint32_t) + code.size() + sizeof(char); + break; + } default: MONGO_UNREACHABLE; } diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index f2dc9c1723a..52f49ede825 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -44,6 +44,17 @@ namespace mongo { namespace sbe { namespace value { +std::pair<TypeTags, Value> makeCopyBsonRegex(const BsonRegex& regex) { + auto buffer = new char[regex.byteSize()]; + memcpy(buffer, regex.data(), regex.byteSize()); + return {TypeTags::bsonRegex, bitcastFrom<char*>(buffer)}; +} + +std::pair<TypeTags, Value> makeCopyBsonJavascript(std::string_view code) { + auto [_, strVal] = makeBigString(code); + return {TypeTags::bsonJavascript, strVal}; +} + std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey) { auto k = new KeyString::Value(inKey); return {TypeTags::ksValue, bitcastFrom<KeyString::Value*>(k)}; @@ -115,6 +126,8 @@ void releaseValue(TypeTags tag, Value val) noexcept { case TypeTags::StringBig: case TypeTags::bsonObjectId: case TypeTags::bsonBinData: + case TypeTags::bsonRegex: + case TypeTags::bsonJavascript: delete[] getRawPointerView(val); break; @@ -236,6 +249,9 @@ void writeTagToStream(T& stream, const TypeTags tag) { case TypeTags::bsonRegex: stream << "bsonRegex"; break; + case TypeTags::bsonJavascript: + stream << "bsonJavascript"; + break; default: stream << "unknown tag"; break; @@ -455,6 +471,9 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { stream << '/' << regex.pattern << '/' << regex.flags; break; } + case value::TypeTags::bsonJavascript: + stream << "Javascript(" << getBsonJavascriptView(val) << ")"; + break; default: MONGO_UNREACHABLE; } @@ -534,6 +553,8 @@ BSONType tagToType(TypeTags tag) noexcept { return BSONType::EOO; case TypeTags::bsonRegex: return BSONType::RegEx; + case TypeTags::bsonJavascript: + return BSONType::Code; default: MONGO_UNREACHABLE; } @@ -648,6 +669,8 @@ std::size_t hashValue(TypeTags tag, Value val, const CollatorInterface* collator auto regex = getBsonRegexView(val); return absl::Hash<std::string_view>{}(regex.dataView()); } + case TypeTags::bsonJavascript: + return absl::Hash<std::string_view>{}(getBsonJavascriptView(val)); default: break; } @@ -823,6 +846,11 @@ std::pair<TypeTags, Value> compareValue(TypeTags lhsTag, auto rhsRegex = getBsonRegexView(rhsValue); auto result = compareHelper(lhsRegex.dataView(), rhsRegex.dataView()); return {TypeTags::NumberInt32, bitcastFrom<int32_t>(result)}; + } else if (lhsTag == TypeTags::bsonJavascript && rhsTag == TypeTags::bsonJavascript) { + auto lhsCode = getBsonJavascriptView(lhsValue); + auto rhsCode = getBsonJavascriptView(rhsValue); + auto result = compareHelper(lhsCode, rhsCode); + return {TypeTags::NumberInt32, result}; } else { // Different types. auto lhsType = tagToType(lhsTag); diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index ddf502bb35f..86f1ebef155 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -115,6 +115,7 @@ enum class TypeTags : uint8_t { // or from user over the wire). It is never created or manipulated by SBE. bsonUndefined, bsonRegex, + bsonJavascript, // KeyString::Value ksValue, @@ -250,13 +251,13 @@ public: ValueGuard(TypeTags tag, Value val) : _tag(tag), _value(val) {} ValueGuard() = delete; ValueGuard(const ValueGuard&) = delete; - ValueGuard(ValueGuard&&) = delete; + ValueGuard(ValueGuard&& other) = delete; ~ValueGuard() { releaseValue(_tag, _value); } ValueGuard& operator=(const ValueGuard&) = delete; - ValueGuard& operator=(ValueGuard&&) = delete; + ValueGuard& operator=(ValueGuard&& other) = delete; void reset() { _tag = TypeTags::Nothing; @@ -944,6 +945,14 @@ inline BsonRegex getBsonRegexView(Value val) noexcept { return BsonRegex(getRawPointerView(val)); } +std::pair<TypeTags, Value> makeCopyBsonRegex(const BsonRegex& regex); + +inline std::string_view getBsonJavascriptView(Value val) noexcept { + return getStringView(TypeTags::StringBig, val); +} + +std::pair<TypeTags, Value> makeCopyBsonJavascript(std::string_view code); + std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey); std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction&); @@ -1002,6 +1011,10 @@ inline std::pair<TypeTags, Value> copyValue(TypeTags tag, Value val) { return makeCopyJsFunction(*getJsFunctionView(val)); case TypeTags::shardFilterer: return makeCopyShardFilterer(*getShardFiltererView(val)); + case TypeTags::bsonRegex: + return makeCopyBsonRegex(getBsonRegexView(val)); + case TypeTags::bsonJavascript: + return makeCopyBsonJavascript(getBsonJavascriptView(val)); default: break; } diff --git a/src/mongo/db/exec/sbe/values/value_builder.h b/src/mongo/db/exec/sbe/values/value_builder.h index dc58c501a25..8288a1375e2 100644 --- a/src/mongo/db/exec/sbe/values/value_builder.h +++ b/src/mongo/db/exec/sbe/values/value_builder.h @@ -122,7 +122,10 @@ public: } void append(const BSONCode& in) { - unsupportedType("javascript"); + appendValueBufferOffset(TypeTags::bsonJavascript); + // Add one to account null byte at the end. + _valueBufferBuilder->appendNum(static_cast<uint32_t>(in.code.size() + 1)); + _valueBufferBuilder->appendStr(in.code); } void append(const BSONCodeWScope& in) { @@ -137,7 +140,9 @@ public: } void append(const BSONRegEx& in) { - unsupportedType("regex"); + appendValueBufferOffset(TypeTags::bsonRegex); + _valueBufferBuilder->appendStr(in.pattern); + _valueBufferBuilder->appendStr(in.flags); } void append(const BSONDBRef& in) { @@ -206,7 +211,9 @@ public: case TypeTags::NumberDecimal: case TypeTags::bsonObject: case TypeTags::bsonArray: - case TypeTags::bsonBinData: { + case TypeTags::bsonBinData: + case TypeTags::bsonRegex: + case TypeTags::bsonJavascript: { auto offset = bitcastTo<decltype(bufferLen)>(val); invariant(offset < bufferLen); val = bitcastFrom<const char*>(_valueBufferBuilder->buf() + offset); @@ -245,7 +252,7 @@ private: // // During the building process, pointers into that memory can become invalidated, so instead of // storing a pointer, we store an _offset_ into the under-construction buffer. Translation from - // offset to pointer occurs as part of the 'releaseValues()' function. + // offset to pointer occurs as part of the 'readValues()' function. void appendValueBufferOffset(TypeTags tag) { _tagList[_numValues] = tag; _valList[_numValues] = value::bitcastFrom<int32_t>(_valueBufferBuilder->len()); diff --git a/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp index d95fd3ba771..766bdac983a 100644 --- a/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp +++ b/src/mongo/db/exec/sbe/values/value_serialize_for_sorter_test.cpp @@ -36,32 +36,55 @@ namespace mongo::sbe { TEST(ValueSerializeForSorter, Serialize) { - value::MaterializedRow originalRow(21); + auto [testDataTag, testDataVal] = sbe::value::makeNewArray(); + sbe::value::ValueGuard testDataGuard{testDataTag, testDataVal}; + auto testData = sbe::value::getArrayView(testDataVal); - originalRow.reset(0, true, value::TypeTags::Nothing, 0); - originalRow.reset(1, true, value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(33550336)); - originalRow.reset(2, true, value::TypeTags::RecordId, value::bitcastFrom<int64_t>(8589869056)); - originalRow.reset( - 3, true, value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(137438691328)); - originalRow.reset(4, true, value::TypeTags::NumberDouble, value::bitcastFrom<double>(2.305e18)); + testData->push_back(value::TypeTags::Nothing, 0); + testData->push_back(value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(33550336)); + testData->push_back(value::TypeTags::RecordId, value::bitcastFrom<int64_t>(8589869056)); + testData->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(137438691328)); + testData->push_back(value::TypeTags::NumberDouble, value::bitcastFrom<double>(2.305e18)); auto [decimalTag, decimalVal] = value::makeCopyDecimal(Decimal128("2658455991569831744654692615953842176")); - originalRow.reset(5, true, decimalTag, decimalVal); - - originalRow.reset(6, true, value::TypeTags::Date, value::bitcastFrom<int64_t>(1234)); - originalRow.reset(7, true, value::TypeTags::Timestamp, value::bitcastFrom<uint64_t>(5678)); - originalRow.reset(8, true, value::TypeTags::Boolean, value::bitcastFrom<bool>(true)); - originalRow.reset(9, true, value::TypeTags::Null, 0); - originalRow.reset(10, true, value::TypeTags::MinKey, 0); - originalRow.reset(11, true, value::TypeTags::MaxKey, 0); - originalRow.reset(12, true, value::TypeTags::bsonUndefined, 0); - - auto [stringTag, stringVal] = value::makeNewString("perfect"); - originalRow.reset(13, true, stringTag, stringVal); + testData->push_back(decimalTag, decimalVal); + + testData->push_back(value::TypeTags::Date, value::bitcastFrom<int64_t>(1234)); + testData->push_back(value::TypeTags::Timestamp, value::bitcastFrom<uint64_t>(5678)); + testData->push_back(value::TypeTags::Boolean, value::bitcastFrom<bool>(true)); + testData->push_back(value::TypeTags::Null, 0); + testData->push_back(value::TypeTags::MinKey, 0); + testData->push_back(value::TypeTags::MaxKey, 0); + testData->push_back(value::TypeTags::bsonUndefined, 0); + + std::string_view smallString = "perfect"; + invariant(sbe::value::canUseSmallString(smallString)); + std::string_view bigString = "too big string to fit into value"; + invariant(!sbe::value::canUseSmallString(bigString)); + std::string_view smallStringWithNull = "a\0b"; + invariant(smallStringWithNull.size() <= sbe::value::kSmallStringMaxLength); + std::string_view bigStringWithNull = "too big string \0 to fit into value"; + invariant(bigStringWithNull.size() > sbe::value::kSmallStringMaxLength); + + std::vector<std::string_view> stringCases = { + smallString, + smallStringWithNull, + bigString, + bigStringWithNull, + "", + "a", + "a\0", + "\0", + "\0\0\0", + }; + for (const auto& stringCase : stringCases) { + auto [stringTag, stringVal] = value::makeNewString(stringCase); + testData->push_back(stringTag, stringVal); + } auto [objectTag, objectVal] = value::makeNewObject(); - originalRow.reset(14, true, objectTag, objectVal); + testData->push_back(objectTag, objectVal); auto object = value::getObjectView(objectVal); object->push_back("num", value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1)); @@ -81,7 +104,7 @@ TEST(ValueSerializeForSorter, Serialize) { arraySet->push_back(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(5)); auto [oidTag, oidVal] = value::makeCopyObjectId({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - originalRow.reset(15, true, oidTag, oidVal); + testData->push_back(oidTag, oidVal); uint8_t byteArray[] = {8, 7, 6, 5, 4, 3, 2, 1}; auto bson = @@ -93,29 +116,54 @@ TEST(ValueSerializeForSorter, Serialize) { auto [bsonObjTag, bsonObjVal] = value::copyValue( value::TypeTags::bsonObject, value::bitcastFrom<const char*>(bson["obj"].value())); - originalRow.reset(16, true, bsonObjTag, bsonObjVal); + testData->push_back(bsonObjTag, bsonObjVal); auto [bsonArrayTag, bsonArrayVal] = value::copyValue( value::TypeTags::bsonArray, value::bitcastFrom<const char*>(bson["arr"].value())); - originalRow.reset(17, true, bsonArrayTag, bsonArrayVal); + testData->push_back(bsonArrayTag, bsonArrayVal); auto [bsonBinDataGeneralTag, bsonBinDataGeneralVal] = value::copyValue(value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(bson["binDataGeneral"].value())); - originalRow.reset(18, true, bsonBinDataGeneralTag, bsonBinDataGeneralVal); + testData->push_back(bsonBinDataGeneralTag, bsonBinDataGeneralVal); auto [bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal] = value::copyValue(value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(bson["binDataDeprecated"].value())); - originalRow.reset(19, true, bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal); + testData->push_back(bsonBinDataDeprecatedTag, bsonBinDataDeprecatedVal); KeyString::Builder keyStringBuilder(KeyString::Version::V1); keyStringBuilder.appendNumberLong(1); keyStringBuilder.appendNumberLong(2); keyStringBuilder.appendNumberLong(3); auto [keyStringTag, keyStringVal] = value::makeCopyKeyString(keyStringBuilder.getValueCopy()); - originalRow.reset(20, true, keyStringTag, keyStringVal); + testData->push_back(keyStringTag, keyStringVal); + + auto [plainCodeTag, plainCodeVal] = + value::makeCopyBsonJavascript("function test() { return 'Hello world!'; }"); + testData->push_back(value::TypeTags::bsonJavascript, plainCodeVal); + + auto [codeWithNullTag, codeWithNullVal] = + value::makeCopyBsonJavascript("function test() { return 'Danger\0us!'; }"); + testData->push_back(value::TypeTags::bsonJavascript, codeWithNullVal); + + auto regexBson = + BSON("noOptions" << BSONRegEx("[a-z]+") << "withOptions" << BSONRegEx(".*", "i") + << "emptyPatternNoOptions" << BSONRegEx("") << "emptyPatternWithOptions" + << BSONRegEx("", "s")); + + for (const auto& element : regexBson) { + auto [copyTag, copyVal] = value::copyValue( + value::TypeTags::bsonRegex, value::bitcastFrom<const char*>(element.value())); + testData->push_back(copyTag, copyVal); + } + + value::MaterializedRow originalRow{testData->size()}; + for (size_t i = 0; i < testData->size(); i++) { + auto [tag, value] = testData->getAt(i); + originalRow.reset(i, false, tag, value); + } BufBuilder builder; originalRow.serializeForSorter(builder); |