summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilena Ivanova <milena.ivanova@mongodb.com>2020-09-22 16:16:18 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-23 14:07:45 +0000
commitf39eb409f4f3e32b3d77b4b05bb977113c52f48c (patch)
tree1bdf8735570aa1ad6e0d195be867636996cacb0e
parentafe5d5357afb60ebf1c5ba09fecc9e712d56eacb (diff)
downloadmongo-f39eb409f4f3e32b3d77b4b05bb977113c52f48c.tar.gz
SERVER-50734 Support regexp expressions in SBE
-rw-r--r--src/mongo/db/exec/sbe/SConscript3
-rw-r--r--src/mongo/db/exec/sbe/expressions/expression.cpp3
-rw-r--r--src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp203
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp52
-rw-r--r--src/mongo/db/exec/sbe/values/value.h94
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp316
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.h6
-rw-r--r--src/mongo/db/pipeline/expression.cpp56
-rw-r--r--src/mongo/db/pipeline/expression.h10
-rw-r--r--src/mongo/db/query/get_executor.cpp2
-rw-r--r--src/mongo/db/query/sbe_stage_builder_expression.cpp107
-rw-r--r--src/mongo/db/query/sbe_stage_builder_filter.cpp14
12 files changed, 813 insertions, 53 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 719e6012188..71bd9879d9d 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -22,7 +22,7 @@ env.Library(
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/query/datetime/date_time_support',
'$BUILD_DIR/mongo/db/storage/key_string',
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/regex_util',
]
)
@@ -124,6 +124,7 @@ env.CppUnitTest(
'expressions/sbe_is_member_builtin_test.cpp',
'expressions/sbe_iso_date_to_parts_test.cpp',
'expressions/sbe_mod_expression_test.cpp',
+ 'expressions/sbe_regex_test.cpp',
'expressions/sbe_set_expressions_test.cpp',
'expressions/sbe_to_upper_to_lower_test.cpp',
'expressions/sbe_trigonometric_expressions_test.cpp',
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index 35fa4ee3597..292efa3147f 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -416,6 +416,9 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = {
BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::setDifference, false}},
{"runJsPredicate",
BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::runJsPredicate, false}},
+ {"regexCompile", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexCompile, false}},
+ {"regexFind", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexFind, false}},
+ {"regexFindAll", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::regexFindAll, false}},
};
/**
diff --git a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp
new file mode 100644
index 00000000000..cd601c52590
--- /dev/null
+++ b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp
@@ -0,0 +1,203 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/exec/sbe/expression_test_base.h"
+
+namespace mongo::sbe {
+class SBERegexTest : public EExpressionTestFixture {
+protected:
+ void runAndAssertRegexCompile(const vm::CodeFragment* compiledExpr,
+ std::string_view regexString) {
+ auto [tag, val] = runCompiledExpression(compiledExpr);
+ value::ValueGuard guard(tag, val);
+
+ ASSERT_EQUALS(value::TypeTags::pcreRegex, tag);
+
+ auto regex = value::getPcreRegexView(val);
+ std::string res = str::stream() << "/" << regex->pattern() << "/" << regex->options();
+ ASSERT_EQUALS(res, regexString);
+ }
+
+ void runAndAssertMatchExpression(const vm::CodeFragment* compiledExpr, bool expected) {
+ auto [tag, val] = runCompiledExpression(compiledExpr);
+ value::ValueGuard guard(tag, val);
+
+ ASSERT(tag == value::TypeTags::Boolean);
+ ASSERT_EQUALS(value::bitcastTo<bool>(val), expected);
+ }
+
+ void runAndAssertFindExpression(const vm::CodeFragment* compiledExpr,
+ std::string_view expectedMatch,
+ int idx) {
+ auto [tag, val] = runCompiledExpression(compiledExpr);
+ value::ValueGuard guard(tag, val);
+
+ ASSERT(tag == value::TypeTags::Object);
+ auto obj = value::getObjectView(val);
+
+ auto [matchTag, matchVal] = obj->getField("match");
+ value::ValueGuard(matchTag, matchVal);
+ ASSERT(value::isString(matchTag));
+ ASSERT_EQUALS(value::getStringView(matchTag, matchVal), expectedMatch);
+
+ auto [idxTag, idxVal] = obj->getField("idx");
+ value::ValueGuard(idxTag, idxVal);
+ ASSERT_EQUALS(idxTag, value::TypeTags::NumberInt32);
+ ASSERT_EQUALS(value::numericCast<int32_t>(idxTag, idxVal), idx);
+ }
+
+ void addMatchResult(value::Array* arrayPtr, std::string_view matchStr, int32_t idx) {
+ auto [objTag, objVal] = value::makeNewObject();
+ value::ValueGuard objGuard{objTag, objVal};
+ auto obj = value::getObjectView(objVal);
+
+ auto [matchStrTag, matchStrVal] = value::makeNewString(matchStr);
+ auto [capturesTag, capturesVal] = value::makeNewArray();
+ obj->push_back("match", matchStrTag, matchStrVal);
+ obj->push_back("idx", value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(idx));
+ obj->push_back("captures", capturesTag, capturesVal);
+ objGuard.reset();
+ arrayPtr->push_back(objTag, objVal);
+ }
+
+ void runAndAssertFindAllExpression(const vm::CodeFragment* compiledExpr,
+ value::Array* expected) {
+ auto [tag, val] = runCompiledExpression(compiledExpr);
+ value::ValueGuard guard(tag, val);
+
+ ASSERT(tag == value::TypeTags::Array);
+ auto arr = value::getArrayView(val);
+
+ ASSERT_EQUALS(arr->size(), expected->size());
+
+ for (size_t idx = 0; idx < arr->size(); ++idx) {
+ auto [objTag, objVal] = arr->getAt(idx);
+ ASSERT(objTag == value::TypeTags::Object);
+ auto [expObjTag, expObjVal] = expected->getAt(idx);
+ ASSERT(expObjTag == value::TypeTags::Object);
+
+ auto [matchTag, matchVal] = value::getObjectView(objVal)->getField("match");
+ auto [expMatchTag, expMatchVal] = value::getObjectView(expObjVal)->getField("match");
+ ASSERT_EQUALS(matchTag, expMatchTag);
+ ASSERT_EQUALS(value::getStringView(matchTag, matchVal),
+ value::getStringView(expMatchTag, expMatchVal));
+
+ auto [idxTag, idxVal] = value::getObjectView(objVal)->getField("idx");
+ auto [expIdxTag, expIdxVal] = value::getObjectView(expObjVal)->getField("idx");
+ ASSERT_EQUALS(idxTag, expIdxTag);
+ ASSERT_EQUALS(value::numericCast<int64_t>(idxTag, idxVal),
+ value::numericCast<int64_t>(expIdxTag, expIdxVal));
+ }
+ }
+};
+
+TEST_F(SBERegexTest, ComputesRegexCompile) {
+ value::OwnedValueAccessor slotAccessor1;
+ value::OwnedValueAccessor slotAccessor2;
+ auto patternSlot = bindAccessor(&slotAccessor1);
+ auto optionsSlot = bindAccessor(&slotAccessor2);
+ auto regexExpr = sbe::makeE<sbe::EFunction>(
+ "regexCompile", sbe::makeEs(makeE<EVariable>(patternSlot), makeE<EVariable>(optionsSlot)));
+ auto compiledExpr = compileExpression(*regexExpr);
+
+ auto [patternTag, patternVal] = value::makeNewString("^Many");
+ auto [optionsTag, optionsVal] = value::makeNewString("i");
+ slotAccessor1.reset(patternTag, patternVal);
+ slotAccessor2.reset(optionsTag, optionsVal);
+ runAndAssertRegexCompile(compiledExpr.get(), "/^Many/i");
+}
+
+TEST_F(SBERegexTest, ComputesRegexMatch) {
+ value::OwnedValueAccessor slotAccessor1;
+ value::OwnedValueAccessor slotAccessor2;
+ auto regexSlot = bindAccessor(&slotAccessor1);
+ auto inputSlot = bindAccessor(&slotAccessor2);
+ auto regexExpr = sbe::makeE<sbe::EFunction>(
+ "regexMatch", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot)));
+ auto compiledExpr = compileExpression(*regexExpr);
+
+ auto [regexTag, regexVal] = value::makeNewPcreRegex("line", "");
+ auto [inputTag, inputVal] = value::makeNewString("Many lines of code");
+ slotAccessor1.reset(regexTag, regexVal);
+ slotAccessor2.reset(inputTag, inputVal);
+ runAndAssertMatchExpression(compiledExpr.get(), true);
+
+ std::tie(regexTag, regexVal) = value::makeNewPcreRegex("link", "");
+ std::tie(inputTag, inputVal) = value::makeNewString("Example text");
+ slotAccessor1.reset(regexTag, regexVal);
+ slotAccessor2.reset(inputTag, inputVal);
+ runAndAssertMatchExpression(compiledExpr.get(), false);
+}
+
+TEST_F(SBERegexTest, ComputesRegexFind) {
+ value::OwnedValueAccessor slotAccessor1;
+ value::OwnedValueAccessor slotAccessor2;
+ auto regexSlot = bindAccessor(&slotAccessor1);
+ auto inputSlot = bindAccessor(&slotAccessor2);
+ auto regexExpr = sbe::makeE<sbe::EFunction>(
+ "regexFind", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot)));
+ auto compiledExpr = compileExpression(*regexExpr);
+
+ auto [regexTag, regexVal] = value::makeNewPcreRegex("line", "");
+ auto [inputTag, inputVal] = value::makeNewString("Many lines of code");
+ slotAccessor1.reset(regexTag, regexVal);
+ slotAccessor2.reset(inputTag, inputVal);
+ runAndAssertFindExpression(compiledExpr.get(), "line", 5);
+
+ std::tie(regexTag, regexVal) = value::makeNewPcreRegex("line", "i");
+ std::tie(inputTag, inputVal) = value::makeNewString("Many LINES of code");
+ slotAccessor1.reset(regexTag, regexVal);
+ slotAccessor2.reset(inputTag, inputVal);
+ runAndAssertFindExpression(compiledExpr.get(), "LINE", 5);
+}
+
+TEST_F(SBERegexTest, ComputesRegexFindAll) {
+ value::OwnedValueAccessor slotAccessor1;
+ value::OwnedValueAccessor slotAccessor2;
+ auto regexSlot = bindAccessor(&slotAccessor1);
+ auto inputSlot = bindAccessor(&slotAccessor2);
+ auto regexExpr = sbe::makeE<sbe::EFunction>(
+ "regexFindAll", sbe::makeEs(makeE<EVariable>(regexSlot), makeE<EVariable>(inputSlot)));
+ auto compiledExpr = compileExpression(*regexExpr);
+
+ auto [arrTag, arrVal] = value::makeNewArray();
+ value::ValueGuard arrGuard{arrTag, arrVal};
+ auto arrayView = value::getArrayView(arrVal);
+
+ addMatchResult(arrayView, "line", 4);
+ addMatchResult(arrayView, "line", 16);
+
+ auto [regexTag, regexVal] = value::makeNewPcreRegex("line", "");
+ auto [inputTag, inputVal] = value::makeNewString("One line or two lines of code");
+ slotAccessor1.reset(regexTag, regexVal);
+ slotAccessor2.reset(inputTag, inputVal);
+ runAndAssertFindAllExpression(compiledExpr.get(), arrayView);
+}
+
+} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index b2107e7a127..56e092237f8 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -31,13 +31,12 @@
#include "mongo/db/exec/sbe/values/value.h"
-#include <pcrecpp.h>
-
#include "mongo/db/exec/js_function.h"
#include "mongo/db/exec/sbe/values/bson.h"
#include "mongo/db/exec/sbe/values/value_builder.h"
#include "mongo/db/query/datetime/date_time_support.h"
#include "mongo/db/storage/key_string.h"
+#include "mongo/util/regex_util.h"
namespace mongo {
namespace sbe {
@@ -48,9 +47,49 @@ std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey) {
return {TypeTags::ksValue, bitcastFrom<KeyString::Value*>(k)};
}
-std::pair<TypeTags, Value> makeCopyPcreRegex(const pcrecpp::RE& regex) {
- auto ownedRegexVal = sbe::value::bitcastFrom<pcrecpp::RE*>(new pcrecpp::RE(regex));
- return {TypeTags::pcreRegex, ownedRegexVal};
+std::pair<TypeTags, Value> makeNewPcreRegex(std::string_view pattern, std::string_view options) {
+ auto regex = std::make_unique<PcreRegex>(pattern, options);
+ if (regex->isValid()) {
+ return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regex.release())};
+ }
+ return {TypeTags::Nothing, 0};
+}
+
+std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex) {
+ if (regex.isValid()) {
+ auto regexCopy = std::make_unique<PcreRegex>(regex);
+ invariant(regexCopy->isValid());
+ return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regexCopy.release())};
+ }
+ return {TypeTags::Nothing, 0};
+}
+
+void PcreRegex::_compile() {
+ const auto pcreOptions = regex_util::flagsToPcreOptions(_options.c_str(), false).all_options();
+ const char* compile_error;
+ int eoffset;
+ _pcrePtr = pcre_compile(_pattern.c_str(), pcreOptions, &compile_error, &eoffset, nullptr);
+ _isValid = (_pcrePtr != nullptr);
+}
+
+int PcreRegex::execute(std::string_view stringView, int startPos, std::vector<int>& buf) {
+ invariant(_isValid);
+ return pcre_exec(_pcrePtr,
+ nullptr,
+ stringView.data(),
+ stringView.length(),
+ startPos,
+ 0,
+ &(buf.front()),
+ buf.size());
+}
+
+size_t PcreRegex::getNumberCaptures() const {
+ int numCaptures;
+ invariant(_isValid);
+ pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_CAPTURECOUNT, &numCaptures);
+ invariant(numCaptures >= 0);
+ return static_cast<size_t>(numCaptures);
}
std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction& jsFunction) {
@@ -374,8 +413,7 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) {
}
case value::TypeTags::pcreRegex: {
auto regex = getPcreRegexView(val);
- // TODO: Also include the regex flags.
- stream << "/" << regex->pattern() << "/";
+ stream << "/" << regex->pattern() << "/" << regex->options();
break;
}
case value::TypeTags::timeZoneDB: {
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index c853e6ec4b2..087e693d83f 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -35,6 +35,7 @@
#include <bitset>
#include <cstdint>
#include <ostream>
+#include <pcre.h>
#include <string>
#include <utility>
#include <vector>
@@ -47,10 +48,6 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/represent_as.h"
-namespace pcrecpp {
-class RE;
-} // namespace pcrecpp
-
namespace mongo {
/**
* Forward declaration.
@@ -155,6 +152,10 @@ inline constexpr bool isRecordId(TypeTags tag) noexcept {
return tag == TypeTags::RecordId;
}
+inline constexpr bool isPcreRegex(TypeTags tag) noexcept {
+ return tag == TypeTags::pcreRegex;
+}
+
BSONType tagToType(TypeTags tag) noexcept;
/**
@@ -529,6 +530,81 @@ private:
ValueSetType _values;
};
+/**
+ * Implements a wrapper of PCRE regular expression.
+ * Storing the pattern and the options allows for copying of the sbe::value::PcreRegex expression,
+ * which includes recompilation.
+ * The compiled expression pcre* allows for direct usage of the pcre C library functionality.
+ */
+class PcreRegex {
+public:
+ PcreRegex() = default;
+
+ PcreRegex(std::string_view pattern, std::string_view options)
+ : _pattern(pattern), _options(options), _pcrePtr(nullptr) {
+ _compile();
+ }
+
+ PcreRegex(std::string_view pattern) : PcreRegex(pattern, "") {}
+
+ PcreRegex(const PcreRegex& other) : PcreRegex(other._pattern, other._options) {}
+
+ PcreRegex& operator=(const PcreRegex& other) {
+ if (this != &other) {
+ if (_pcrePtr != nullptr) {
+ (*pcre_free)(_pcrePtr);
+ }
+ _pattern = other._pattern;
+ _options = other._options;
+ _isValid = false;
+ _compile();
+ }
+ return *this;
+ }
+
+ ~PcreRegex() {
+ if (_pcrePtr != nullptr) {
+ (*pcre_free)(_pcrePtr);
+ }
+ }
+
+ bool isValid() const {
+ return _isValid;
+ }
+
+ const std::string& pattern() const {
+ return _pattern;
+ }
+
+ const std::string& options() const {
+ return _options;
+ }
+
+ /**
+ * Wrapper function for pcre_exec().
+ * - input: The input string.
+ * - startPos: The position from where the search should start.
+ * - buf: Array populated with the found matched string and capture groups.
+ * Returns the number of matches or an error code:
+ * < -1 error
+ * = -1 no match
+ * = 0 there was a match, but not enough space in the buffer
+ * > 0 the number of matches
+ */
+ int execute(std::string_view input, int startPos, std::vector<int>& buf);
+
+ size_t getNumberCaptures() const;
+
+private:
+ void _compile();
+
+ std::string _pattern;
+ std::string _options;
+
+ pcre* _pcrePtr;
+ bool _isValid = false;
+};
+
constexpr size_t kSmallStringThreshold = 8;
using ObjectIdType = std::array<uint8_t, 12>;
static_assert(sizeof(ObjectIdType) == 12);
@@ -694,8 +770,12 @@ inline KeyString::Value* getKeyStringView(Value val) noexcept {
return reinterpret_cast<KeyString::Value*>(val);
}
-inline pcrecpp::RE* getPcreRegexView(Value val) noexcept {
- return reinterpret_cast<pcrecpp::RE*>(val);
+std::pair<TypeTags, Value> makeNewPcreRegex(std::string_view pattern, std::string_view options);
+
+std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex);
+
+inline PcreRegex* getPcreRegexView(Value val) noexcept {
+ return reinterpret_cast<PcreRegex*>(val);
}
inline JsFunction* getJsFunctionView(Value val) noexcept {
@@ -708,8 +788,6 @@ inline TimeZoneDatabase* getTimeZoneDBView(Value val) noexcept {
std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey);
-std::pair<TypeTags, Value> makeCopyPcreRegex(const pcrecpp::RE&);
-
std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction&);
void releaseValue(TypeTags tag, Value val) noexcept;
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 50b0b70a3ec..9ad1c9116e3 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -27,13 +27,15 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
#include "mongo/platform/basic.h"
#include "mongo/db/exec/sbe/expressions/expression.h"
#include "mongo/db/exec/sbe/vm/vm.h"
#include <boost/algorithm/string.hpp>
-#include <pcrecpp.h>
+#include <pcre.h>
#include "mongo/bson/oid.h"
#include "mongo/db/client.h"
@@ -43,7 +45,9 @@
#include "mongo/db/exec/sbe/vm/datetime.h"
#include "mongo/db/query/datetime/date_time_support.h"
#include "mongo/db/storage/key_string.h"
+#include "mongo/logv2/log.h"
#include "mongo/util/fail_point.h"
+#include "mongo/util/str.h"
#include "mongo/util/summation.h"
MONGO_FAIL_POINT_DEFINE(failOnPoisonedFieldLookup);
@@ -974,25 +978,6 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinAddToSet(ArityT
return {ownAgg, tagAgg, valAgg};
}
-std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexMatch(ArityType arity) {
- invariant(arity == 2);
-
- auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0);
- auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1);
-
- if (!value::isString(typeTagInputStr) || typeTagPcreRegex != value::TypeTags::pcreRegex) {
- return {false, value::TypeTags::Nothing, 0};
- }
-
- auto stringView = value::getStringView(typeTagInputStr, valueInputStr);
- pcrecpp::StringPiece pcreStringView{stringView.data(), static_cast<int>(stringView.size())};
-
- auto pcreRegex = value::getPcreRegexView(valuePcreRegex);
- auto regexMatchResult = pcreRegex->PartialMatch(pcreStringView);
-
- return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(regexMatchResult)};
-}
-
std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRunJsPredicate(ArityType arity) {
invariant(arity == 2);
@@ -1932,6 +1917,291 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinSetDifference(A
return {true, resTag, resVal};
}
+namespace {
+/**
+ * A helper function to create the result object {"match" : .., "idx" : ..., "captures" :
+ * ...} from the result of pcre_exec().
+ */
+std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject(
+ std::string_view inputString,
+ const std::vector<int>& capturesBuffer,
+ size_t numCaptures,
+ uint32_t& startBytePos,
+ uint32_t& codePointPos) {
+
+ auto verifyBounds = [&inputString](auto startPos, auto limitPos, auto isCapture) {
+ // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1.
+ // These bounds cannot occur for a match on the full string.
+ if (startPos == -1 && limitPos == -1 && isCapture) {
+ return true;
+ }
+ if (startPos == -1 || limitPos == -1) {
+ LOGV2_ERROR(5073412,
+ "Unexpected error occurred while executing regexFind.",
+ "startPos"_attr = startPos,
+ "limitPos"_attr = limitPos);
+ return false;
+ }
+ if (startPos < 0 || static_cast<size_t>(startPos) > inputString.size() || limitPos < 0 ||
+ static_cast<size_t>(limitPos) > inputString.size() || startPos > limitPos) {
+ LOGV2_ERROR(5073413,
+ "Unexpected error occurred while executing regexFind.",
+ "startPos"_attr = startPos,
+ "limitPos"_attr = limitPos);
+ return false;
+ }
+ return true;
+ };
+
+ // Extract the matched string: its start and (end+1) indices are in the first two elements of
+ // capturesBuffer.
+ if (!verifyBounds(capturesBuffer[0], capturesBuffer[1], false)) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+ auto matchStartIdx = capturesBuffer[0];
+ auto matchedString = inputString.substr(matchStartIdx, capturesBuffer[1] - matchStartIdx);
+ auto [matchedTag, matchedVal] = value::makeNewString(matchedString);
+ value::ValueGuard matchedGuard{matchedTag, matchedVal};
+
+ // We iterate through the input string's contents preceding the match index, in order to convert
+ // the byte offset to a code point offset.
+ for (auto byteIdx = startBytePos; byteIdx < static_cast<uint32_t>(matchStartIdx);
+ ++codePointPos) {
+ byteIdx += str::getCodePointLength(inputString[byteIdx]);
+ }
+ startBytePos = matchStartIdx;
+
+ auto [arrTag, arrVal] = value::makeNewArray();
+ value::ValueGuard arrGuard{arrTag, arrVal};
+ auto arrayView = value::getArrayView(arrVal);
+ // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer'
+ // hold the (start, limit) pairs of indexes, for each of the capture groups. We skip the first
+ // two elements and start iteration from 3rd element so that we only construct the strings for
+ // capture groups.
+ for (size_t i = 0; i < numCaptures; ++i) {
+ const auto start = capturesBuffer[2 * (i + 1)];
+ const auto limit = capturesBuffer[2 * (i + 1) + 1];
+ if (!verifyBounds(start, limit, true)) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ if (start == -1 && limit == -1) {
+ arrayView->push_back(value::TypeTags::Null, 0);
+ } else {
+ auto captureString = inputString.substr(start, limit - start);
+ auto [tag, val] = value::makeNewString(captureString);
+ arrayView->push_back(tag, val);
+ }
+ }
+
+ auto [resTag, resVal] = value::makeNewObject();
+ value::ValueGuard resGuard{resTag, resVal};
+ auto resObjectView = value::getObjectView(resVal);
+ resObjectView->reserve(3);
+ matchedGuard.reset();
+ resObjectView->push_back("match", matchedTag, matchedVal);
+ resObjectView->push_back(
+ "idx", value::TypeTags::NumberInt32, value::bitcastFrom<int32_t>(codePointPos));
+ arrGuard.reset();
+ resObjectView->push_back("captures", arrTag, arrVal);
+ resGuard.reset();
+ return {true, resTag, resVal};
+}
+
+/**
+ * A helper function to extract the next match in the subject string using the compiled regex
+ * pattern.
+ * - pcre: The wrapper object containing the compiled pcre expression
+ * - inputString: The subject string.
+ * - capturesBuffer: Array to be populated with the found matched string and capture groups.
+ * - startBytePos: The position from where the search should start given in bytes.
+ * - codePointPos: The same position in terms of code points.
+ * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result
+ * returned is true/false.
+ */
+std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(value::PcreRegex* pcre,
+ std::string_view inputString,
+ std::vector<int>& capturesBuffer,
+ uint32_t& startBytePos,
+ uint32_t& codePointPos,
+ bool isMatch = false) {
+ auto execResult = pcre->execute(inputString, startBytePos, capturesBuffer);
+
+ auto numCaptures = pcre->getNumberCaptures();
+ if (execResult < -1 || execResult > static_cast<int>(numCaptures) + 1) {
+ LOGV2_ERROR(5073414,
+ "Error occurred while executing regular expression.",
+ "execResult"_attr = execResult);
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ if (isMatch) {
+ // $regexMatch returns true or false.
+ bool match = (execResult != PCRE_ERROR_NOMATCH);
+ return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(match)};
+ } else {
+ // $regexFind and $regexFindAll build result object or return null.
+ if (execResult == PCRE_ERROR_NOMATCH) {
+ return {false, value::TypeTags::Null, 0};
+ }
+ return buildRegexMatchResultObject(
+ inputString, capturesBuffer, numCaptures, startBytePos, codePointPos);
+ }
+}
+
+/**
+ * A helper function to extract the first match in the subject string using the compiled regex
+ * pattern. See 'pcreNextMatch' function for parameters description.
+ */
+std::tuple<bool, value::TypeTags, value::Value> pcreFirstMatch(
+ value::PcreRegex* pcre,
+ std::string_view inputString,
+ bool isMatch = false,
+ std::vector<int>* capturesBuffer = nullptr,
+ uint32_t* startBytePos = nullptr,
+ uint32_t* codePointPos = nullptr) {
+ std::vector<int> tmpCapturesBuffer;
+ uint32_t tmpStartBytePos = 0;
+ uint32_t tmpCodePointPos = 0;
+
+ capturesBuffer = capturesBuffer ? capturesBuffer : &tmpCapturesBuffer;
+ startBytePos = startBytePos ? startBytePos : &tmpStartBytePos;
+ codePointPos = codePointPos ? codePointPos : &tmpCodePointPos;
+
+ // The first two-thirds of the capturesBuffer is used to pass back captured substrings' start
+ // and (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec()
+ // while matching capturing subpatterns, and is not available for passing back information.
+ auto numCaptures = pcre->getNumberCaptures();
+ capturesBuffer->resize((1 + numCaptures) * 3);
+
+ return pcreNextMatch(pcre, inputString, *capturesBuffer, *startBytePos, *codePointPos, isMatch);
+}
+
+/**
+ * A helper function with common logic for $regexMatch and $regexFind functions. Both extract only
+ * the first match to a regular expression, but return different result objects.
+ */
+std::tuple<bool, value::TypeTags, value::Value> genericPcreRegexSingleMatch(
+ value::TypeTags typeTagPcreRegex,
+ value::Value valuePcreRegex,
+ value::TypeTags typeTagInputStr,
+ value::Value valueInputStr,
+ bool isMatch) {
+ if (!value::isString(typeTagInputStr) || !value::isPcreRegex(typeTagPcreRegex)) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ auto inputString = value::getStringView(typeTagInputStr, valueInputStr);
+ auto pcreRegex = value::getPcreRegexView(valuePcreRegex);
+
+ return pcreFirstMatch(pcreRegex, inputString, isMatch);
+}
+} // namespace
+
+std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexCompile(ArityType arity) {
+ invariant(arity == 2);
+
+ auto [patternOwned, patternTypeTag, patternValue] = getFromStack(0);
+ auto [optionsOwned, optionsTypeTag, optionsValue] = getFromStack(1);
+
+ if (patternTypeTag == value::TypeTags::Null) {
+ return {false, value::TypeTags::Null, 0};
+ }
+ if (!value::isString(patternTypeTag) || !value::isString(optionsTypeTag)) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+ // At the moment we support only string patterns.
+ // TODO SERVER-51266 : complete the following items once BSONType::RegEx is supported in SBE
+ // - Handle the case when patternTypeTag == TypeTags::bsonRegex.
+ // - Ensure that regex options are specified either in the options argument or in bsonRegex
+ // value.
+ auto pattern = value::getStringView(patternTypeTag, patternValue);
+ auto options = value::getStringView(optionsTypeTag, optionsValue);
+
+ if (pattern.find('\0', 0) != std::string::npos || options.find('\0', 0) != std::string::npos) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ auto [pcreTag, pcreValue] = value::makeNewPcreRegex(pattern, options);
+ return {true, pcreTag, pcreValue};
+}
+
+std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexMatch(ArityType arity) {
+ invariant(arity == 2);
+ auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0);
+ auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1);
+
+ return genericPcreRegexSingleMatch(
+ typeTagPcreRegex, valuePcreRegex, typeTagInputStr, valueInputStr, true);
+}
+
+std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFind(ArityType arity) {
+ invariant(arity == 2);
+ auto [ownedPcreRegex, typeTagPcreRegex, valuePcreRegex] = getFromStack(0);
+ auto [ownedInputStr, typeTagInputStr, valueInputStr] = getFromStack(1);
+
+ return genericPcreRegexSingleMatch(
+ typeTagPcreRegex, valuePcreRegex, typeTagInputStr, valueInputStr, false);
+}
+
+std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(ArityType arity) {
+ invariant(arity == 2);
+ auto [ownedPcre, typeTagPcreRegex, valuePcreRegex] = getFromStack(0);
+ auto [ownedStr, typeTagInputStr, valueInputStr] = getFromStack(1);
+
+ if (!value::isString(typeTagInputStr) || typeTagPcreRegex != value::TypeTags::pcreRegex) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ auto inputString = value::getStringView(typeTagInputStr, valueInputStr);
+ auto pcre = value::getPcreRegexView(valuePcreRegex);
+
+ std::vector<int> capturesBuffer;
+ uint32_t startBytePos = 0;
+ uint32_t codePointPos = 0;
+ bool isFirstMatch = true;
+
+ // Prepare the result array of matching objects.
+ auto [arrTag, arrVal] = value::makeNewArray();
+ value::ValueGuard arrGuard{arrTag, arrVal};
+ auto arrayView = value::getArrayView(arrVal);
+
+ do {
+ auto [owned, matchTag, matchVal] = [&]() {
+ if (isFirstMatch) {
+ isFirstMatch = false;
+ return pcreFirstMatch(
+ pcre, inputString, false, &capturesBuffer, &startBytePos, &codePointPos);
+ }
+ return pcreNextMatch(pcre, inputString, capturesBuffer, startBytePos, codePointPos);
+ }();
+
+ if (matchTag == value::TypeTags::Null) {
+ break;
+ }
+ if (matchTag != value::TypeTags::Object) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+ arrayView->push_back(matchTag, matchVal);
+
+ // Move indexes after the current matched string to prepare for the next search.
+ auto [mstrTag, mstrVal] = value::getObjectView(matchVal)->getField("match");
+ auto matchString = value::getStringView(mstrTag, mstrVal);
+ if (matchString.empty()) {
+ startBytePos += str::getCodePointLength(inputString[startBytePos]);
+ ++codePointPos;
+ } else {
+ startBytePos += matchString.length();
+ for (size_t byteIdx = 0; byteIdx < matchString.length(); ++codePointPos) {
+ byteIdx += str::getCodePointLength(matchString[byteIdx]);
+ }
+ }
+ } while (startBytePos < inputString.size());
+
+ arrGuard.reset();
+ return {true, arrTag, arrVal};
+}
+
std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin f,
ArityType arity) {
switch (f) {
@@ -2045,6 +2315,12 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builti
return builtinSetDifference(arity);
case Builtin::runJsPredicate:
return builtinRunJsPredicate(arity);
+ case Builtin::regexCompile:
+ return builtinRegexCompile(arity);
+ case Builtin::regexFind:
+ return builtinRegexFind(arity);
+ case Builtin::regexFindAll:
+ return builtinRegexFindAll(arity);
}
MONGO_UNREACHABLE;
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 1b24904b08c..236681a9a02 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -231,6 +231,9 @@ enum class Builtin : uint8_t {
setIntersection,
setDifference,
runJsPredicate,
+ regexCompile, // compile <pattern, options> into value::pcreRegex
+ regexFind,
+ regexFindAll,
};
using SmallArityType = uint8_t;
@@ -582,6 +585,9 @@ private:
std::tuple<bool, value::TypeTags, value::Value> builtinSetIntersection(ArityType arity);
std::tuple<bool, value::TypeTags, value::Value> builtinSetDifference(ArityType arity);
std::tuple<bool, value::TypeTags, value::Value> builtinRunJsPredicate(ArityType arity);
+ std::tuple<bool, value::TypeTags, value::Value> builtinRegexCompile(ArityType arity);
+ std::tuple<bool, value::TypeTags, value::Value> builtinRegexFind(ArityType arity);
+ std::tuple<bool, value::TypeTags, value::Value> builtinRegexFindAll(ArityType arity);
std::tuple<bool, value::TypeTags, value::Value> dispatchBuiltin(Builtin f, ArityType arity);
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 49f55dda847..8ccff46430e 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -3132,6 +3132,7 @@ Value ExpressionIndexOfCP::evaluate(const Document& root, Variables* variables)
if (stringHasTokenAtIndex(byteIx, input, token)) {
return Value(static_cast<int>(currentCodePointIndex));
}
+
byteIx += str::getCodePointLength(input[byteIx]);
}
@@ -6335,6 +6336,61 @@ void ExpressionRegex::_doAddDependencies(DepsTracker* deps) const {
}
}
+std::pair<boost::optional<std::string>, std::string> ExpressionRegex::getConstantPatternAndOptions()
+ const {
+ if (!ExpressionConstant::isNullOrConstant(_regex) ||
+ !ExpressionConstant::isNullOrConstant(_options)) {
+ return {boost::none, ""};
+ }
+ auto patternValue = static_cast<ExpressionConstant*>(_regex.get())->getValue();
+ uassert(5073405,
+ str::stream() << _opName << " needs 'regex' to be of type string or regex",
+ patternValue.nullish() || patternValue.getType() == BSONType::RegEx ||
+ patternValue.getType() == BSONType::String);
+ auto patternStr = [&]() -> boost::optional<std::string> {
+ if (patternValue.getType() == BSONType::RegEx) {
+ StringData flags = patternValue.getRegexFlags();
+ uassert(5073406,
+ str::stream()
+ << _opName
+ << ": found regex options specified in both 'regex' and 'options' fields",
+ _options.get() == nullptr || flags.empty());
+ return std::string(patternValue.getRegex());
+ } else if (patternValue.getType() == BSONType::String) {
+ return patternValue.getString();
+ } else {
+ return boost::none;
+ }
+ }();
+
+ auto optionsStr = [&]() -> std::string {
+ if (_options.get() != nullptr) {
+ auto optValue = static_cast<ExpressionConstant*>(_options.get())->getValue();
+ if (optValue.getType() == BSONType::String) {
+ return optValue.getString();
+ }
+ }
+ if (patternValue.getType() == BSONType::RegEx) {
+ StringData flags = patternValue.getRegexFlags();
+ if (!flags.empty()) {
+ return flags.toString();
+ }
+ }
+ return {};
+ }();
+
+ uassert(5073407,
+ str::stream() << _opName << ": regular expression cannot contain an embedded null byte",
+ patternStr->find('\0', 0) == std::string::npos);
+
+ uassert(5073408,
+ str::stream() << _opName
+ << ": regular expression options cannot contain an embedded null byte",
+ optionsStr.find('\0', 0) == std::string::npos);
+
+ return {patternStr, optionsStr};
+}
+
/* -------------------------- ExpressionRegexFind ------------------------------ */
REGISTER_EXPRESSION(regexFind, ExpressionRegexFind::parse);
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index b9c221bf71e..0e614dfb8be 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -2898,6 +2898,16 @@ public:
return _initialExecStateForConstantRegex.has_value();
}
+ bool hasOptions() const {
+ return (_options.get() != nullptr);
+ }
+
+ /**
+ * Return regex pattern and options in case they are constants. Return pattern boost::none in
+ * case the pattern or options are not constants, or if the pattern is null.
+ */
+ std::pair<boost::optional<std::string>, std::string> getConstantPatternAndOptions() const;
+
Value serialize(bool explain) const;
const std::string& getOpName() const {
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index d28dd909032..fa90e211989 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -907,7 +907,7 @@ protected:
std::unique_ptr<SlotBasedPrepareExecutionResult> buildIdHackPlan(
const IndexDescriptor* descriptor, QueryPlannerParams* plannerParams) final {
uassert(4822862,
- "IDHack plan is not supprted by SBE yet",
+ "IDHack plan is not supported by SBE yet",
!(_cq->metadataDeps()[DocumentMetadataFields::kSortKey] ||
_cq->getQueryRequest().returnKey() || _cq->getProj()));
diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp
index 31f7c87ac8a..42b7a086e90 100644
--- a/src/mongo/db/query/sbe_stage_builder_expression.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp
@@ -298,10 +298,6 @@ void generateStringCaseConversionExpression(ExpressionVisitorContext* _context,
sbe::makeE<sbe::ELocalBind>(frameId, std::move(str), std::move(totalCaseConversionExpr)));
}
-std::unique_ptr<sbe::EExpression> makeNot(std::unique_ptr<sbe::EExpression> e) {
- return sbe::makeE<sbe::EPrimUnary>(sbe::EPrimUnary::logicNot, std::move(e));
-}
-
void buildArrayAccessByConstantIndex(ExpressionVisitorContext* context,
const std::string& exprName,
int32_t index) {
@@ -330,6 +326,21 @@ void buildArrayAccessByConstantIndex(ExpressionVisitorContext* context,
sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(resultExpr)));
}
+/**
+ * Generate an EExpression representing a Regex function result upon null argument(s) depending on
+ * the type of the function: $regexMatch - false, $regexFind - null, $RegexFindAll - [].
+ */
+std::unique_ptr<sbe::EExpression> generateRegexNullResponse(StringData exprName) {
+ if (exprName.toString().compare(std::string("regexMatch")) == 0) {
+ return sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Boolean,
+ sbe::value::bitcastFrom<bool>(false));
+ } else if (exprName.toString().compare("regexFindAll") == 0) {
+ auto [arrTag, arrVal] = sbe::value::makeNewArray();
+ return sbe::makeE<sbe::EConstant>(arrTag, arrVal);
+ }
+ return sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0);
+}
+
class ExpressionPreVisitor final : public ExpressionVisitor {
public:
ExpressionPreVisitor(ExpressionVisitorContext* context) : _context{context} {}
@@ -1136,7 +1147,7 @@ public:
//
// 2) Check if the value in a given slot is an integral int64. This test is done by
// computing a lossless conversion of the value in s1 to an int64. The exposed
- // conversion function by the vm returns a value if there is no loss of precsision,
+ // conversion function by the vm returns a value if there is no loss of precision,
// otherwise it returns Nothing. In both the valid or Nothing case, we can store the result
// of the conversion in l2.0 of the inner let binding and test for existence. If the
// existence check fails we know the conversion is lossy and we can fail the query.
@@ -2116,13 +2127,13 @@ public:
unsupportedExpression("$convert");
}
void visit(ExpressionRegexFind* expr) final {
- unsupportedExpression("$regexFind");
+ generateRegexExpression(expr, "regexFind");
}
void visit(ExpressionRegexFindAll* expr) final {
- unsupportedExpression("$regexFind");
+ generateRegexExpression(expr, "regexFindAll");
}
void visit(ExpressionRegexMatch* expr) final {
- unsupportedExpression("$regexFind");
+ generateRegexExpression(expr, "regexMatch");
}
void visit(ExpressionCosine* expr) final {
generateTrigonometricExpressionWithBounds(
@@ -2701,6 +2712,86 @@ private:
sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(setExpr)));
}
+ /**
+ * Shared expression building logic for regex expressions.
+ */
+ void generateRegexExpression(ExpressionRegex* expr, StringData exprName) {
+ size_t arity = (expr->hasOptions()) ? 3 : 2;
+ _context->ensureArity(arity);
+
+ std::unique_ptr<sbe::EExpression> options =
+ (arity == 3) ? _context->popExpr() : sbe::makeE<sbe::EConstant>("");
+ auto pattern = _context->popExpr();
+ auto input = _context->popExpr();
+
+ auto pcreRegexExpr = [&]() {
+ auto [patternStr, optStr] = expr->getConstantPatternAndOptions();
+ if (patternStr) {
+ // Create the compiled Regex from constant pattern and options.
+ auto [regexTag, regexVal] = sbe::value::makeNewPcreRegex(patternStr.get(), optStr);
+ return sbe::makeE<sbe::EConstant>(regexTag, regexVal);
+ } else {
+ // Build a call to regexCompile function.
+ auto frameId = _context->frameIdGenerator->generate();
+ auto binds = sbe::makeEs(std::move(pattern));
+ sbe::EVariable patternRef(frameId, 0);
+
+ return sbe::makeE<sbe::ELocalBind>(
+ frameId,
+ std::move(binds),
+ buildMultiBranchConditional(
+ CaseValuePair{generateNullOrMissing(patternRef),
+ sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0)},
+ CaseValuePair{generateNonStringCheck(patternRef),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073400},
+ str::stream()
+ << "$" << exprName.toString()
+ << " expects string pattern")},
+ sbe::makeE<sbe::EFunction>(
+ "regexCompile", sbe::makeEs(patternRef.clone(), std::move(options)))));
+ }
+ }();
+
+ auto outerFrameId = _context->frameIdGenerator->generate();
+ auto outerBinds = sbe::makeEs(std::move(pcreRegexExpr), std::move(input));
+ sbe::EVariable regexRef(outerFrameId, 0);
+ sbe::EVariable inputRef(outerFrameId, 1);
+ auto innerFrameId = _context->frameIdGenerator->generate();
+ sbe::EVariable resRef(innerFrameId, 0);
+
+ auto regexWithErrorCheck = buildMultiBranchConditional(
+ CaseValuePair{sbe::makeE<sbe::EPrimBinary>(
+ sbe::EPrimBinary::logicOr,
+ generateNullOrMissing(inputRef),
+ sbe::makeE<sbe::EFunction>("isNull", sbe::makeEs(regexRef.clone()))),
+ generateRegexNullResponse(exprName)},
+ CaseValuePair{generateNonStringCheck(inputRef),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073401},
+ str::stream() << "$" << exprName.toString()
+ << " expects input of type string")},
+
+ CaseValuePair{
+ sbe::makeE<sbe::EPrimUnary>(
+ sbe::EPrimUnary::logicNot,
+ sbe::makeE<sbe::EFunction>("exists", sbe::makeEs(regexRef.clone()))),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073402}, "Invalid regular expression")},
+ sbe::makeE<sbe::ELocalBind>(
+ innerFrameId,
+ sbe::makeEs(sbe::makeE<sbe::EFunction>(
+ exprName.toString(), sbe::makeEs(regexRef.clone(), inputRef.clone()))),
+ sbe::makeE<sbe::EIf>(
+ sbe::makeE<sbe::EFunction>("exists", sbe::makeEs(resRef.clone())),
+ resRef.clone(),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{5073403},
+ str::stream()
+ << "Unexpected error occurred while executing "
+ << exprName.toString()
+ << ". For more details see the error logs."))));
+
+ _context->pushExpr(sbe::makeE<sbe::ELocalBind>(
+ outerFrameId, std::move(outerBinds), std::move(regexWithErrorCheck)));
+ }
+
void unsupportedExpression(const char* op) const {
uasserted(ErrorCodes::InternalErrorNotSupported,
str::stream() << "Expression is not supported in SBE: " << op);
diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp
index df2f2e3e096..e9080637b95 100644
--- a/src/mongo/db/query/sbe_stage_builder_filter.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp
@@ -1042,9 +1042,9 @@ public:
arr->reserve(regexes.size());
for (auto&& r : regexes) {
- auto regex = RegexMatchExpression::makeRegex(r->getString(), r->getFlags());
- arr->push_back(sbe::value::TypeTags::pcreRegex,
- sbe::value::bitcastFrom<pcrecpp::RE*>(regex.release()));
+ auto [regexTag, regexVal] =
+ sbe::value::makeNewPcreRegex(r->getString(), r->getFlags());
+ arr->push_back(regexTag, regexVal);
}
auto makePredicate =
@@ -1214,16 +1214,14 @@ public:
void visit(const RegexMatchExpression* expr) final {
auto makePredicate = [expr](sbe::value::SlotId inputSlot,
EvalStage inputStage) -> EvalExprStagePair {
- auto regex = RegexMatchExpression::makeRegex(expr->getString(), expr->getFlags());
- auto ownedRegexVal = sbe::value::bitcastFrom<pcrecpp::RE*>(regex.release());
-
+ auto [regexTag, regexVal] =
+ sbe::value::makeNewPcreRegex(expr->getString(), expr->getFlags());
// TODO: In the future, this needs to account for the fact that the regex match
// expression matches strings, but also matches stored regexes. For example,
// {$match: {a: /foo/}} matches the document {a: /foo/} in addition to {a: "foobar"}.
return {makeFillEmptyFalse(sbe::makeE<sbe::EFunction>(
"regexMatch",
- sbe::makeEs(sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::pcreRegex,
- ownedRegexVal),
+ sbe::makeEs(sbe::makeE<sbe::EConstant>(regexTag, regexVal),
sbe::makeE<sbe::EVariable>(inputSlot)))),
std::move(inputStage)};
};