diff options
author | Milena Ivanova <milena.ivanova@mongodb.com> | 2020-09-22 16:16:18 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-23 14:07:45 +0000 |
commit | f39eb409f4f3e32b3d77b4b05bb977113c52f48c (patch) | |
tree | 1bdf8735570aa1ad6e0d195be867636996cacb0e | |
parent | afe5d5357afb60ebf1c5ba09fecc9e712d56eacb (diff) | |
download | mongo-f39eb409f4f3e32b3d77b4b05bb977113c52f48c.tar.gz |
SERVER-50734 Support regexp expressions in SBE
-rw-r--r-- | src/mongo/db/exec/sbe/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/expressions/expression.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp | 203 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 94 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.cpp | 316 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.h | 6 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 56 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_expression.cpp | 107 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_filter.cpp | 14 |
12 files changed, 813 insertions, 53 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 719e6012188..71bd9879d9d 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -22,7 +22,7 @@ env.Library( '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/query/datetime/date_time_support', '$BUILD_DIR/mongo/db/storage/key_string', - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/regex_util', ] ) @@ -124,6 +124,7 @@ env.CppUnitTest( 'expressions/sbe_is_member_builtin_test.cpp', 'expressions/sbe_iso_date_to_parts_test.cpp', 'expressions/sbe_mod_expression_test.cpp', + 'expressions/sbe_regex_test.cpp', 'expressions/sbe_set_expressions_test.cpp', 'expressions/sbe_to_upper_to_lower_test.cpp', 'expressions/sbe_trigonometric_expressions_test.cpp', diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp index 35fa4ee3597..292efa3147f 100644 --- a/src/mongo/db/exec/sbe/expressions/expression.cpp +++ b/src/mongo/db/exec/sbe/expressions/expression.cpp @@ -416,6 +416,9 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = { BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::setDifference, false}}, {"runJsPredicate", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::runJsPredicate, false}}, + {"regexCompile", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexCompile, false}}, + {"regexFind", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexFind, false}}, + {"regexFindAll", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexFindAll, false}}, }; /** diff --git a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp new file mode 100644 index 00000000000..cd601c52590 --- /dev/null +++ b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp @@ -0,0 +1,203 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/exec/sbe/expression_test_base.h" + +namespace mongo::sbe { +class SBERegexTest : public EExpressionTestFixture { +protected: + void runAndAssertRegexCompile(const vm::CodeFragment* compiledExpr, + std::string_view regexString) { + auto [tag, val] = runCompiledExpression(compiledExpr); + value::ValueGuard guard(tag, val); + + ASSERT_EQUALS(value::TypeTags::pcreRegex, tag); + + auto regex = value::getPcreRegexView(val); + std::string res = str::stream() << "/" << regex->pattern() << "/" << regex->options(); + ASSERT_EQUALS(res, regexString); + } + + void runAndAssertMatchExpression(const vm::CodeFragment* compiledExpr, bool expected) { + auto [tag, val] = runCompiledExpression(compiledExpr); + value::ValueGuard guard(tag, val); + + ASSERT(tag == value::TypeTags::Boolean); + ASSERT_EQUALS(value::bitcastTo<bool>(val), expected); + } + + void runAndAssertFindExpression(const vm::CodeFragment* compiledExpr, + std::string_view expectedMatch, + int idx) { + auto [tag, val] = runCompiledExpression(compiledExpr); + value::ValueGuard guard(tag, val); + + ASSERT(tag == value::TypeTags::Object); + auto obj = value::getObjectView(val); + + auto [matchTag, matchVal] = obj->getField("match"); + value::ValueGuard(matchTag, matchVal); + ASSERT(value::isString(matchTag)); + ASSERT_EQUALS(value::getStringView(matchTag, matchVal), expectedMatch); + + auto [idxTag, idxVal] = obj->getField("idx"); + value::ValueGuard(idxTag, idxVal); + ASSERT_EQUALS(idxTag, value::TypeTags::NumberInt32); + ASSERT_EQUALS(value::numericCast<int32_t>(idxTag, idxVal), idx); + } + + void addMatchResult(value::Array* arrayPtr, std::string_view matchStr, int32_t idx) { + auto [objTag, objVal] = value::makeNewObject(); + value::ValueGuard objGuard{objTag, objVal}; + auto obj = value::getObjectView(objVal); + + auto [matchStrTag, matchStrVal] = value::makeNewString(matchStr); + auto [capturesTag, capturesVal] = value::makeNewArray(); + obj->push_back("match", matchStrTag, matchStrVal); + obj->push_back("idx", value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(idx)); + obj->push_back("captures", capturesTag, capturesVal); + objGuard.reset(); + arrayPtr->push_back(objTag, objVal); + } + + void runAndAssertFindAllExpression(const vm::CodeFragment* compiledExpr, + value::Array* expected) { + auto [tag, val] = runCompiledExpression(compiledExpr); + value::ValueGuard guard(tag, val); + + ASSERT(tag == value::TypeTags::Array); + auto arr = value::getArrayView(val); + + ASSERT_EQUALS(arr->size(), expected->size()); + + for (size_t idx = 0; idx < arr->size(); ++idx) { + auto [objTag, objVal] = arr->getAt(idx); + ASSERT(objTag == value::TypeTags::Object); + auto [expObjTag, expObjVal] = expected->getAt(idx); + ASSERT(expObjTag == value::TypeTags::Object); + + auto [matchTag, matchVal] = value::getObjectView(objVal)->getField("match"); + auto [expMatchTag, expMatchVal] = value::getObjectView(expObjVal)->getField("match"); + ASSERT_EQUALS(matchTag, expMatchTag); + ASSERT_EQUALS(value::getStringView(matchTag, matchVal), + value::getStringView(expMatchTag, expMatchVal)); + + auto [idxTag, idxVal] = value::getObjectView(objVal)->getField("idx"); + auto [expIdxTag, expIdxVal] = value::getObjectView(expObjVal)->getField("idx"); + ASSERT_EQUALS(idxTag, expIdxTag); + ASSERT_EQUALS(value::numericCast<int64_t>(idxTag, idxVal), + value::numericCast<int64_t>(expIdxTag, expIdxVal)); + } + } +}; + +TEST_F(SBERegexTest, ComputesRegexCompile) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto patternSlot = bindAccessor(&slotAccessor1); + auto optionsSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE<sbe::EFunction>( + "regexCompile", sbe::makeEs(makeE<EVariable>(patternSlot), makeE<EVariable>(optionsSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto [patternTag, patternVal] = value::makeNewString("^Many"); + auto [optionsTag, optionsVal] = value::makeNewString("i"); + slotAccessor1.reset(patternTag, patternVal); + slotAccessor2.reset(optionsTag, optionsVal); + runAndAssertRegexCompile(compiledExpr.get(), "/^Many/i"); +} + +TEST_F(SBERegexTest, ComputesRegexMatch) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto regexSlot = bindAccessor(&slotAccessor1); + auto inputSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE<sbe::EFunction>( + "regexMatch", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto [regexTag, regexVal] = value::makeNewPcreRegex("line", ""); + auto [inputTag, inputVal] = value::makeNewString("Many lines of code"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertMatchExpression(compiledExpr.get(), true); + + std::tie(regexTag, regexVal) = value::makeNewPcreRegex("link", ""); + std::tie(inputTag, inputVal) = value::makeNewString("Example text"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertMatchExpression(compiledExpr.get(), false); +} + +TEST_F(SBERegexTest, ComputesRegexFind) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto regexSlot = bindAccessor(&slotAccessor1); + auto inputSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE<sbe::EFunction>( + "regexFind", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto [regexTag, regexVal] = value::makeNewPcreRegex("line", ""); + auto [inputTag, inputVal] = value::makeNewString("Many lines of code"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertFindExpression(compiledExpr.get(), "line", 5); + + std::tie(regexTag, regexVal) = value::makeNewPcreRegex("line", "i"); + std::tie(inputTag, inputVal) = value::makeNewString("Many LINES of code"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertFindExpression(compiledExpr.get(), "LINE", 5); +} + +TEST_F(SBERegexTest, ComputesRegexFindAll) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto regexSlot = bindAccessor(&slotAccessor1); + auto inputSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE<sbe::EFunction>( + "regexFindAll", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto [arrTag, arrVal] = value::makeNewArray(); + value::ValueGuard arrGuard{arrTag, arrVal}; + auto arrayView = value::getArrayView(arrVal); + + addMatchResult(arrayView, "line", 4); + addMatchResult(arrayView, "line", 16); + + auto [regexTag, regexVal] = value::makeNewPcreRegex("line", ""); + auto [inputTag, inputVal] = value::makeNewString("One line or two lines of code"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertFindAllExpression(compiledExpr.get(), arrayView); +} + +} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index b2107e7a127..56e092237f8 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -31,13 +31,12 @@ #include "mongo/db/exec/sbe/values/value.h" -#include <pcrecpp.h> - #include "mongo/db/exec/js_function.h" #include "mongo/db/exec/sbe/values/bson.h" #include "mongo/db/exec/sbe/values/value_builder.h" #include "mongo/db/query/datetime/date_time_support.h" #include "mongo/db/storage/key_string.h" +#include "mongo/util/regex_util.h" namespace mongo { namespace sbe { @@ -48,9 +47,49 @@ std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey) { return {TypeTags::ksValue, bitcastFrom<KeyString::Value*>(k)}; } -std::pair<TypeTags, Value> makeCopyPcreRegex(const pcrecpp::RE& regex) { - auto ownedRegexVal = sbe::value::bitcastFrom<pcrecpp::RE*>(new pcrecpp::RE(regex)); - return {TypeTags::pcreRegex, ownedRegexVal}; +std::pair<TypeTags, Value> makeNewPcreRegex(std::string_view pattern, std::string_view options) { + auto regex = std::make_unique<PcreRegex>(pattern, options); + if (regex->isValid()) { + return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regex.release())}; + } + return {TypeTags::Nothing, 0}; +} + +std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex) { + if (regex.isValid()) { + auto regexCopy = std::make_unique<PcreRegex>(regex); + invariant(regexCopy->isValid()); + return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regexCopy.release())}; + } + return {TypeTags::Nothing, 0}; +} + +void PcreRegex::_compile() { + const auto pcreOptions = regex_util::flagsToPcreOptions(_options.c_str(), false).all_options(); + const char* compile_error; + int eoffset; + _pcrePtr = pcre_compile(_pattern.c_str(), pcreOptions, &compile_error, &eoffset, nullptr); + _isValid = (_pcrePtr != nullptr); +} + +int PcreRegex::execute(std::string_view stringView, int startPos, std::vector<int>& buf) { + invariant(_isValid); + return pcre_exec(_pcrePtr, + nullptr, + stringView.data(), + stringView.length(), + startPos, + 0, + &(buf.front()), + buf.size()); +} + +size_t PcreRegex::getNumberCaptures() const { + int numCaptures; + invariant(_isValid); + pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_CAPTURECOUNT, &numCaptures); + invariant(numCaptures >= 0); + return static_cast<size_t>(numCaptures); } std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction& jsFunction) { @@ -374,8 +413,7 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { } case value::TypeTags::pcreRegex: { auto regex = getPcreRegexView(val); - // TODO: Also include the regex flags. - stream << "/" << regex->pattern() << "/"; + stream << "/" << regex->pattern() << "/" << regex->options(); break; } case value::TypeTags::timeZoneDB: { diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index c853e6ec4b2..087e693d83f 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -35,6 +35,7 @@ #include <bitset> #include <cstdint> #include <ostream> +#include <pcre.h> #include <string> #include <utility> #include <vector> @@ -47,10 +48,6 @@ #include "mongo/util/assert_util.h" #include "mongo/util/represent_as.h" -namespace pcrecpp { -class RE; -} // namespace pcrecpp - namespace mongo { /** * Forward declaration. @@ -155,6 +152,10 @@ inline constexpr bool isRecordId(TypeTags tag) noexcept { return tag == TypeTags::RecordId; } +inline constexpr bool isPcreRegex(TypeTags tag) noexcept { + return tag == TypeTags::pcreRegex; +} + BSONType tagToType(TypeTags tag) noexcept; /** @@ -529,6 +530,81 @@ private: ValueSetType _values; }; +/** + * Implements a wrapper of PCRE regular expression. + * Storing the pattern and the options allows for copying of the sbe::value::PcreRegex expression, + * which includes recompilation. + * The compiled expression pcre* allows for direct usage of the pcre C library functionality. + */ +class PcreRegex { +public: + PcreRegex() = default; + + PcreRegex(std::string_view pattern, std::string_view options) + : _pattern(pattern), _options(options), _pcrePtr(nullptr) { + _compile(); + } + + PcreRegex(std::string_view pattern) : PcreRegex(pattern, "") {} + + PcreRegex(const PcreRegex& other) : PcreRegex(other._pattern, other._options) {} + + PcreRegex& operator=(const PcreRegex& other) { + if (this != &other) { + if (_pcrePtr != nullptr) { + (*pcre_free)(_pcrePtr); + } + _pattern = other._pattern; + _options = other._options; + _isValid = false; + _compile(); + } + return *this; + } + + ~PcreRegex() { + if (_pcrePtr != nullptr) { + (*pcre_free)(_pcrePtr); + } + } + + bool isValid() const { + return _isValid; + } + + const std::string& pattern() const { + return _pattern; + } + + const std::string& options() const { + return _options; + } + + /** + * Wrapper function for pcre_exec(). + * - input: The input string. + * - startPos: The position from where the search should start. + * - buf: Array populated with the found matched string and capture groups. + * Returns the number of matches or an error code: + * < -1 error + * = -1 no match + * = 0 there was a match, but not enough space in the buffer + * > 0 the number of matches + */ + int execute(std::string_view input, int startPos, std::vector<int>& buf); + + size_t getNumberCaptures() const; + +private: + void _compile(); + + std::string _pattern; + std::string _options; + + pcre* _pcrePtr; + bool _isValid = false; +}; + constexpr size_t kSmallStringThreshold = 8; using ObjectIdType = std::array<uint8_t, 12>; static_assert(sizeof(ObjectIdType) == 12); @@ -694,8 +770,12 @@ inline KeyString::Value* getKeyStringView(Value val) noexcept { return reinterpret_cast<KeyString::Value*>(val); } -inline pcrecpp::RE* getPcreRegexView(Value val) noexcept { - return reinterpret_cast<pcrecpp::RE*>(val); +std::pair<TypeTags, Value> makeNewPcreRegex(std::string_view pattern, std::string_view options); + +std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex); + +inline PcreRegex* getPcreRegexView(Value val) noexcept { + return reinterpret_cast<PcreRegex*>(val); } inline JsFunction* getJsFunctionView(Value val) noexcept { @@ -708,8 +788,6 @@ inline TimeZoneDatabase* getTimeZoneDBView(Value val) noexcept { std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey); -std::pair<TypeTags, Value> makeCopyPcreRegex(const pcrecpp::RE&); - std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction&); void releaseValue(TypeTags tag, Value val) noexcept; diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 50b0b70a3ec..9ad1c9116e3 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -27,13 +27,15 @@ * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + #include "mongo/platform/basic.h" #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/vm/vm.h" #include <boost/algorithm/string.hpp> -#include <pcrecpp.h> +#include <pcre.h> #include "mongo/bson/oid.h" #include "mongo/db/client.h" @@ -43,7 +45,9 @@ #include "mongo/db/exec/sbe/vm/datetime.h" #include "mongo/db/query/datetime/date_time_support.h" #include "mongo/db/storage/key_string.h" +#include "mongo/logv2/log.h" #include "mongo/util/fail_point.h" +#include "mongo/util/str.h" #include "mongo/util/summation.h" MONGO_FAIL_POINT_DEFINE(failOnPoisonedFieldLookup); @@ -974,25 +978,6 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinAddToSet(ArityT return {ownAgg, tagAgg, valAgg}; } -std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexMatch(ArityType arity) { - invariant(arity == 2); - - auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0); - auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1); - - if (!value::isString(typeTagInputStr) || typeTagPcreRegex != value::TypeTags::pcreRegex) { - return {false, value::TypeTags::Nothing, 0}; - } - - auto stringView = value::getStringView(typeTagInputStr, valueInputStr); - pcrecpp::StringPiece pcreStringView{stringView.data(), static_cast<int>(stringView.size())}; - - auto pcreRegex = value::getPcreRegexView(valuePcreRegex); - auto regexMatchResult = pcreRegex->PartialMatch(pcreStringView); - - return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(regexMatchResult)}; -} - std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRunJsPredicate(ArityType arity) { invariant(arity == 2); @@ -1932,6 +1917,291 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinSetDifference(A return {true, resTag, resVal}; } +namespace { +/** + * A helper function to create the result object {"match" : .., "idx" : ..., "captures" : + * ...} from the result of pcre_exec(). + */ +std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject( + std::string_view inputString, + const std::vector<int>& capturesBuffer, + size_t numCaptures, + uint32_t& startBytePos, + uint32_t& codePointPos) { + + auto verifyBounds = [&inputString](auto startPos, auto limitPos, auto isCapture) { + // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1. + // These bounds cannot occur for a match on the full string. + if (startPos == -1 && limitPos == -1 && isCapture) { + return true; + } + if (startPos == -1 || limitPos == -1) { + LOGV2_ERROR(5073412, + "Unexpected error occurred while executing regexFind.", + "startPos"_attr = startPos, + "limitPos"_attr = limitPos); + return false; + } + if (startPos < 0 || static_cast<size_t>(startPos) > inputString.size() || limitPos < 0 || + static_cast<size_t>(limitPos) > inputString.size() || startPos > limitPos) { + LOGV2_ERROR(5073413, + "Unexpected error occurred while executing regexFind.", + "startPos"_attr = startPos, + "limitPos"_attr = limitPos); + return false; + } + return true; + }; + + // Extract the matched string: its start and (end+1) indices are in the first two elements of + // capturesBuffer. + if (!verifyBounds(capturesBuffer[0], capturesBuffer[1], false)) { + return {false, value::TypeTags::Nothing, 0}; + } + auto matchStartIdx = capturesBuffer[0]; + auto matchedString = inputString.substr(matchStartIdx, capturesBuffer[1] - matchStartIdx); + auto [matchedTag, matchedVal] = value::makeNewString(matchedString); + value::ValueGuard matchedGuard{matchedTag, matchedVal}; + + // We iterate through the input string's contents preceding the match index, in order to convert + // the byte offset to a code point offset. + for (auto byteIdx = startBytePos; byteIdx < static_cast<uint32_t>(matchStartIdx); + ++codePointPos) { + byteIdx += str::getCodePointLength(inputString[byteIdx]); + } + startBytePos = matchStartIdx; + + auto [arrTag, arrVal] = value::makeNewArray(); + value::ValueGuard arrGuard{arrTag, arrVal}; + auto arrayView = value::getArrayView(arrVal); + // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer' + // hold the (start, limit) pairs of indexes, for each of the capture groups. We skip the first + // two elements and start iteration from 3rd element so that we only construct the strings for + // capture groups. + for (size_t i = 0; i < numCaptures; ++i) { + const auto start = capturesBuffer[2 * (i + 1)]; + const auto limit = capturesBuffer[2 * (i + 1) + 1]; + if (!verifyBounds(start, limit, true)) { + return {false, value::TypeTags::Nothing, 0}; + } + + if (start == -1 && limit == -1) { + arrayView->push_back(value::TypeTags::Null, 0); + } else { + auto captureString = inputString.substr(start, limit - start); + auto [tag, val] = value::makeNewString(captureString); + arrayView->push_back(tag, val); + } + } + + auto [resTag, resVal] = value::makeNewObject(); + value::ValueGuard resGuard{resTag, resVal}; + auto resObjectView = value::getObjectView(resVal); + resObjectView->reserve(3); + matchedGuard.reset(); + resObjectView->push_back("match", matchedTag, matchedVal); + resObjectView->push_back( + "idx", value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(codePointPos)); + arrGuard.reset(); + resObjectView->push_back("captures", arrTag, arrVal); + resGuard.reset(); + return {true, resTag, resVal}; +} + +/** + * A helper function to extract the next match in the subject string using the compiled regex + * pattern. + * - pcre: The wrapper object containing the compiled pcre expression + * - inputString: The subject string. + * - capturesBuffer: Array to be populated with the found matched string and capture groups. + * - startBytePos: The position from where the search should start given in bytes. + * - codePointPos: The same position in terms of code points. + * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result + * returned is true/false. + */ +std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(value::PcreRegex* pcre, + std::string_view inputString, + std::vector<int>& capturesBuffer, + uint32_t& startBytePos, + uint32_t& codePointPos, + bool isMatch = false) { + auto execResult = pcre->execute(inputString, startBytePos, capturesBuffer); + + auto numCaptures = pcre->getNumberCaptures(); + if (execResult < -1 || execResult > static_cast<int>(numCaptures) + 1) { + LOGV2_ERROR(5073414, + "Error occurred while executing regular expression.", + "execResult"_attr = execResult); + return {false, value::TypeTags::Nothing, 0}; + } + + if (isMatch) { + // $regexMatch returns true or false. + bool match = (execResult != PCRE_ERROR_NOMATCH); + return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(match)}; + } else { + // $regexFind and $regexFindAll build result object or return null. + if (execResult == PCRE_ERROR_NOMATCH) { + return {false, value::TypeTags::Null, 0}; + } + return buildRegexMatchResultObject( + inputString, capturesBuffer, numCaptures, startBytePos, codePointPos); + } +} + +/** + * A helper function to extract the first match in the subject string using the compiled regex + * pattern. See 'pcreNextMatch' function for parameters description. + */ +std::tuple<bool, value::TypeTags, value::Value> pcreFirstMatch( + value::PcreRegex* pcre, + std::string_view inputString, + bool isMatch = false, + std::vector<int>* capturesBuffer = nullptr, + uint32_t* startBytePos = nullptr, + uint32_t* codePointPos = nullptr) { + std::vector<int> tmpCapturesBuffer; + uint32_t tmpStartBytePos = 0; + uint32_t tmpCodePointPos = 0; + + capturesBuffer = capturesBuffer ? capturesBuffer : &tmpCapturesBuffer; + startBytePos = startBytePos ? startBytePos : &tmpStartBytePos; + codePointPos = codePointPos ? codePointPos : &tmpCodePointPos; + + // The first two-thirds of the capturesBuffer is used to pass back captured substrings' start + // and (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec() + // while matching capturing subpatterns, and is not available for passing back information. + auto numCaptures = pcre->getNumberCaptures(); + capturesBuffer->resize((1 + numCaptures) * 3); + + return pcreNextMatch(pcre, inputString, *capturesBuffer, *startBytePos, *codePointPos, isMatch); +} + +/** + * A helper function with common logic for $regexMatch and $regexFind functions. Both extract only + * the first match to a regular expression, but return different result objects. + */ +std::tuple<bool, value::TypeTags, value::Value> genericPcreRegexSingleMatch( + value::TypeTags typeTagPcreRegex, + value::Value valuePcreRegex, + value::TypeTags typeTagInputStr, + value::Value valueInputStr, + bool isMatch) { + if (!value::isString(typeTagInputStr) || !value::isPcreRegex(typeTagPcreRegex)) { + return {false, value::TypeTags::Nothing, 0}; + } + + auto inputString = value::getStringView(typeTagInputStr, valueInputStr); + auto pcreRegex = value::getPcreRegexView(valuePcreRegex); + + return pcreFirstMatch(pcreRegex, inputString, isMatch); +} +} // namespace + +std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexCompile(ArityType arity) { + invariant(arity == 2); + + auto [patternOwned, patternTypeTag, patternValue] = getFromStack(0); + auto [optionsOwned, optionsTypeTag, optionsValue] = getFromStack(1); + + if (patternTypeTag == value::TypeTags::Null) { + return {false, value::TypeTags::Null, 0}; + } + if (!value::isString(patternTypeTag) || !value::isString(optionsTypeTag)) { + return {false, value::TypeTags::Nothing, 0}; + } + // At the moment we support only string patterns. + // TODO SERVER-51266 : complete the following items once BSONType::RegEx is supported in SBE + // - Handle the case when patternTypeTag == TypeTags::bsonRegex. + // - Ensure that regex options are specified either in the options argument or in bsonRegex + // value. + auto pattern = value::getStringView(patternTypeTag, patternValue); + auto options = value::getStringView(optionsTypeTag, optionsValue); + + if (pattern.find('\0', 0) != std::string::npos || options.find('\0', 0) != std::string::npos) { + return {false, value::TypeTags::Nothing, 0}; + } + + auto [pcreTag, pcreValue] = value::makeNewPcreRegex(pattern, options); + return {true, pcreTag, pcreValue}; +} + +std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexMatch(ArityType arity) { + invariant(arity == 2); + auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0); + auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1); + + return genericPcreRegexSingleMatch( + typeTagPcreRegex, valuePcreRegex, typeTagInputStr, valueInputStr, true); +} + +std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFind(ArityType arity) { + invariant(arity == 2); + auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0); + auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1); + + return genericPcreRegexSingleMatch( + typeTagPcreRegex, valuePcreRegex, typeTagInputStr, valueInputStr, false); +} + +std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(ArityType arity) { + invariant(arity == 2); + auto [ownedPcre, typeTagPcreRegex, valuePcreRegex] = getFromStack(0); + auto [ownedStr, typeTagInputStr, valueInputStr] = getFromStack(1); + + if (!value::isString(typeTagInputStr) || typeTagPcreRegex != value::TypeTags::pcreRegex) { + return {false, value::TypeTags::Nothing, 0}; + } + + auto inputString = value::getStringView(typeTagInputStr, valueInputStr); + auto pcre = value::getPcreRegexView(valuePcreRegex); + + std::vector<int> capturesBuffer; + uint32_t startBytePos = 0; + uint32_t codePointPos = 0; + bool isFirstMatch = true; + + // Prepare the result array of matching objects. + auto [arrTag, arrVal] = value::makeNewArray(); + value::ValueGuard arrGuard{arrTag, arrVal}; + auto arrayView = value::getArrayView(arrVal); + + do { + auto [owned, matchTag, matchVal] = [&]() { + if (isFirstMatch) { + isFirstMatch = false; + return pcreFirstMatch( + pcre, inputString, false, &capturesBuffer, &startBytePos, &codePointPos); + } + return pcreNextMatch(pcre, inputString, capturesBuffer, startBytePos, codePointPos); + }(); + + if (matchTag == value::TypeTags::Null) { + break; + } + if (matchTag != value::TypeTags::Object) { + return {false, value::TypeTags::Nothing, 0}; + } + arrayView->push_back(matchTag, matchVal); + + // Move indexes after the current matched string to prepare for the next search. + auto [mstrTag, mstrVal] = value::getObjectView(matchVal)->getField("match"); + auto matchString = value::getStringView(mstrTag, mstrVal); + if (matchString.empty()) { + startBytePos += str::getCodePointLength(inputString[startBytePos]); + ++codePointPos; + } else { + startBytePos += matchString.length(); + for (size_t byteIdx = 0; byteIdx < matchString.length(); ++codePointPos) { + byteIdx += str::getCodePointLength(matchString[byteIdx]); + } + } + } while (startBytePos < inputString.size()); + + arrGuard.reset(); + return {true, arrTag, arrVal}; +} + std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin f, ArityType arity) { switch (f) { @@ -2045,6 +2315,12 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builti return builtinSetDifference(arity); case Builtin::runJsPredicate: return builtinRunJsPredicate(arity); + case Builtin::regexCompile: + return builtinRegexCompile(arity); + case Builtin::regexFind: + return builtinRegexFind(arity); + case Builtin::regexFindAll: + return builtinRegexFindAll(arity); } MONGO_UNREACHABLE; diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 1b24904b08c..236681a9a02 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -231,6 +231,9 @@ enum class Builtin : uint8_t { setIntersection, setDifference, runJsPredicate, + regexCompile, // compile <pattern, options> into value::pcreRegex + regexFind, + regexFindAll, }; using SmallArityType = uint8_t; @@ -582,6 +585,9 @@ private: std::tuple<bool, value::TypeTags, value::Value> builtinSetIntersection(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> builtinSetDifference(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> builtinRunJsPredicate(ArityType arity); + std::tuple<bool, value::TypeTags, value::Value> builtinRegexCompile(ArityType arity); + std::tuple<bool, value::TypeTags, value::Value> builtinRegexFind(ArityType arity); + std::tuple<bool, value::TypeTags, value::Value> builtinRegexFindAll(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> dispatchBuiltin(Builtin f, ArityType arity); diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 49f55dda847..8ccff46430e 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -3132,6 +3132,7 @@ Value ExpressionIndexOfCP::evaluate(const Document& root, Variables* variables) if (stringHasTokenAtIndex(byteIx, input, token)) { return Value(static_cast<int>(currentCodePointIndex)); } + byteIx += str::getCodePointLength(input[byteIx]); } @@ -6335,6 +6336,61 @@ void ExpressionRegex::_doAddDependencies(DepsTracker* deps) const { } } +std::pair<boost::optional<std::string>, std::string> ExpressionRegex::getConstantPatternAndOptions() + const { + if (!ExpressionConstant::isNullOrConstant(_regex) || + !ExpressionConstant::isNullOrConstant(_options)) { + return {boost::none, ""}; + } + auto patternValue = static_cast<ExpressionConstant*>(_regex.get())->getValue(); + uassert(5073405, + str::stream() << _opName << " needs 'regex' to be of type string or regex", + patternValue.nullish() || patternValue.getType() == BSONType::RegEx || + patternValue.getType() == BSONType::String); + auto patternStr = [&]() -> boost::optional<std::string> { + if (patternValue.getType() == BSONType::RegEx) { + StringData flags = patternValue.getRegexFlags(); + uassert(5073406, + str::stream() + << _opName + << ": found regex options specified in both 'regex' and 'options' fields", + _options.get() == nullptr || flags.empty()); + return std::string(patternValue.getRegex()); + } else if (patternValue.getType() == BSONType::String) { + return patternValue.getString(); + } else { + return boost::none; + } + }(); + + auto optionsStr = [&]() -> std::string { + if (_options.get() != nullptr) { + auto optValue = static_cast<ExpressionConstant*>(_options.get())->getValue(); + if (optValue.getType() == BSONType::String) { + return optValue.getString(); + } + } + if (patternValue.getType() == BSONType::RegEx) { + StringData flags = patternValue.getRegexFlags(); + if (!flags.empty()) { + return flags.toString(); + } + } + return {}; + }(); + + uassert(5073407, + str::stream() << _opName << ": regular expression cannot contain an embedded null byte", + patternStr->find('\0', 0) == std::string::npos); + + uassert(5073408, + str::stream() << _opName + << ": regular expression options cannot contain an embedded null byte", + optionsStr.find('\0', 0) == std::string::npos); + + return {patternStr, optionsStr}; +} + /* -------------------------- ExpressionRegexFind ------------------------------ */ REGISTER_EXPRESSION(regexFind, ExpressionRegexFind::parse); diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index b9c221bf71e..0e614dfb8be 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -2898,6 +2898,16 @@ public: return _initialExecStateForConstantRegex.has_value(); } + bool hasOptions() const { + return (_options.get() != nullptr); + } + + /** + * Return regex pattern and options in case they are constants. Return pattern boost::none in + * case the pattern or options are not constants, or if the pattern is null. + */ + std::pair<boost::optional<std::string>, std::string> getConstantPatternAndOptions() const; + Value serialize(bool explain) const; const std::string& getOpName() const { diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index d28dd909032..fa90e211989 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -907,7 +907,7 @@ protected: std::unique_ptr<SlotBasedPrepareExecutionResult> buildIdHackPlan( const IndexDescriptor* descriptor, QueryPlannerParams* plannerParams) final { uassert(4822862, - "IDHack plan is not supprted by SBE yet", + "IDHack plan is not supported by SBE yet", !(_cq->metadataDeps()[DocumentMetadataFields::kSortKey] || _cq->getQueryRequest().returnKey() || _cq->getProj())); diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp index 31f7c87ac8a..42b7a086e90 100644 --- a/src/mongo/db/query/sbe_stage_builder_expression.cpp +++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp @@ -298,10 +298,6 @@ void generateStringCaseConversionExpression(ExpressionVisitorContext* _context, sbe::makeE<sbe::ELocalBind>(frameId, std::move(str), std::move(totalCaseConversionExpr))); } -std::unique_ptr<sbe::EExpression> makeNot(std::unique_ptr<sbe::EExpression> e) { - return sbe::makeE<sbe::EPrimUnary>(sbe::EPrimUnary::logicNot, std::move(e)); -} - void buildArrayAccessByConstantIndex(ExpressionVisitorContext* context, const std::string& exprName, int32_t index) { @@ -330,6 +326,21 @@ void buildArrayAccessByConstantIndex(ExpressionVisitorContext* context, sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(resultExpr))); } +/** + * Generate an EExpression representing a Regex function result upon null argument(s) depending on + * the type of the function: $regexMatch - false, $regexFind - null, $RegexFindAll - []. + */ +std::unique_ptr<sbe::EExpression> generateRegexNullResponse(StringData exprName) { + if (exprName.toString().compare(std::string("regexMatch")) == 0) { + return sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Boolean, + sbe::value::bitcastFrom<bool>(false)); + } else if (exprName.toString().compare("regexFindAll") == 0) { + auto [arrTag, arrVal] = sbe::value::makeNewArray(); + return sbe::makeE<sbe::EConstant>(arrTag, arrVal); + } + return sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0); +} + class ExpressionPreVisitor final : public ExpressionVisitor { public: ExpressionPreVisitor(ExpressionVisitorContext* context) : _context{context} {} @@ -1136,7 +1147,7 @@ public: // // 2) Check if the value in a given slot is an integral int64. This test is done by // computing a lossless conversion of the value in s1 to an int64. The exposed - // conversion function by the vm returns a value if there is no loss of precsision, + // conversion function by the vm returns a value if there is no loss of precision, // otherwise it returns Nothing. In both the valid or Nothing case, we can store the result // of the conversion in l2.0 of the inner let binding and test for existence. If the // existence check fails we know the conversion is lossy and we can fail the query. @@ -2116,13 +2127,13 @@ public: unsupportedExpression("$convert"); } void visit(ExpressionRegexFind* expr) final { - unsupportedExpression("$regexFind"); + generateRegexExpression(expr, "regexFind"); } void visit(ExpressionRegexFindAll* expr) final { - unsupportedExpression("$regexFind"); + generateRegexExpression(expr, "regexFindAll"); } void visit(ExpressionRegexMatch* expr) final { - unsupportedExpression("$regexFind"); + generateRegexExpression(expr, "regexMatch"); } void visit(ExpressionCosine* expr) final { generateTrigonometricExpressionWithBounds( @@ -2701,6 +2712,86 @@ private: sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(setExpr))); } + /** + * Shared expression building logic for regex expressions. + */ + void generateRegexExpression(ExpressionRegex* expr, StringData exprName) { + size_t arity = (expr->hasOptions()) ? 3 : 2; + _context->ensureArity(arity); + + std::unique_ptr<sbe::EExpression> options = + (arity == 3) ? _context->popExpr() : sbe::makeE<sbe::EConstant>(""); + auto pattern = _context->popExpr(); + auto input = _context->popExpr(); + + auto pcreRegexExpr = [&]() { + auto [patternStr, optStr] = expr->getConstantPatternAndOptions(); + if (patternStr) { + // Create the compiled Regex from constant pattern and options. + auto [regexTag, regexVal] = sbe::value::makeNewPcreRegex(patternStr.get(), optStr); + return sbe::makeE<sbe::EConstant>(regexTag, regexVal); + } else { + // Build a call to regexCompile function. + auto frameId = _context->frameIdGenerator->generate(); + auto binds = sbe::makeEs(std::move(pattern)); + sbe::EVariable patternRef(frameId, 0); + + return sbe::makeE<sbe::ELocalBind>( + frameId, + std::move(binds), + buildMultiBranchConditional( + CaseValuePair{generateNullOrMissing(patternRef), + sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0)}, + CaseValuePair{generateNonStringCheck(patternRef), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073400}, + str::stream() + << "$" << exprName.toString() + << " expects string pattern")}, + sbe::makeE<sbe::EFunction>( + "regexCompile", sbe::makeEs(patternRef.clone(), std::move(options))))); + } + }(); + + auto outerFrameId = _context->frameIdGenerator->generate(); + auto outerBinds = sbe::makeEs(std::move(pcreRegexExpr), std::move(input)); + sbe::EVariable regexRef(outerFrameId, 0); + sbe::EVariable inputRef(outerFrameId, 1); + auto innerFrameId = _context->frameIdGenerator->generate(); + sbe::EVariable resRef(innerFrameId, 0); + + auto regexWithErrorCheck = buildMultiBranchConditional( + CaseValuePair{sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::logicOr, + generateNullOrMissing(inputRef), + sbe::makeE<sbe::EFunction>("isNull", sbe::makeEs(regexRef.clone()))), + generateRegexNullResponse(exprName)}, + CaseValuePair{generateNonStringCheck(inputRef), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073401}, + str::stream() << "$" << exprName.toString() + << " expects input of type string")}, + + CaseValuePair{ + sbe::makeE<sbe::EPrimUnary>( + sbe::EPrimUnary::logicNot, + sbe::makeE<sbe::EFunction>("exists", sbe::makeEs(regexRef.clone()))), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073402}, "Invalid regular expression")}, + sbe::makeE<sbe::ELocalBind>( + innerFrameId, + sbe::makeEs(sbe::makeE<sbe::EFunction>( + exprName.toString(), sbe::makeEs(regexRef.clone(), inputRef.clone()))), + sbe::makeE<sbe::EIf>( + sbe::makeE<sbe::EFunction>("exists", sbe::makeEs(resRef.clone())), + resRef.clone(), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073403}, + str::stream() + << "Unexpected error occurred while executing " + << exprName.toString() + << ". For more details see the error logs.")))); + + _context->pushExpr(sbe::makeE<sbe::ELocalBind>( + outerFrameId, std::move(outerBinds), std::move(regexWithErrorCheck))); + } + void unsupportedExpression(const char* op) const { uasserted(ErrorCodes::InternalErrorNotSupported, str::stream() << "Expression is not supported in SBE: " << op); diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index df2f2e3e096..e9080637b95 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -1042,9 +1042,9 @@ public: arr->reserve(regexes.size()); for (auto&& r : regexes) { - auto regex = RegexMatchExpression::makeRegex(r->getString(), r->getFlags()); - arr->push_back(sbe::value::TypeTags::pcreRegex, - sbe::value::bitcastFrom<pcrecpp::RE*>(regex.release())); + auto [regexTag, regexVal] = + sbe::value::makeNewPcreRegex(r->getString(), r->getFlags()); + arr->push_back(regexTag, regexVal); } auto makePredicate = @@ -1214,16 +1214,14 @@ public: void visit(const RegexMatchExpression* expr) final { auto makePredicate = [expr](sbe::value::SlotId inputSlot, EvalStage inputStage) -> EvalExprStagePair { - auto regex = RegexMatchExpression::makeRegex(expr->getString(), expr->getFlags()); - auto ownedRegexVal = sbe::value::bitcastFrom<pcrecpp::RE*>(regex.release()); - + auto [regexTag, regexVal] = + sbe::value::makeNewPcreRegex(expr->getString(), expr->getFlags()); // TODO: In the future, this needs to account for the fact that the regex match // expression matches strings, but also matches stored regexes. For example, // {$match: {a: /foo/}} matches the document {a: /foo/} in addition to {a: "foobar"}. return {makeFillEmptyFalse(sbe::makeE<sbe::EFunction>( "regexMatch", - sbe::makeEs(sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::pcreRegex, - ownedRegexVal), + sbe::makeEs(sbe::makeE<sbe::EConstant>(regexTag, regexVal), sbe::makeE<sbe::EVariable>(inputSlot)))), std::move(inputStage)}; }; |