diff options
author | Jennifer Peshansky <jennifer.peshansky@mongodb.com> | 2022-06-27 13:30:46 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-27 14:52:18 +0000 |
commit | 468f41278b6b30aa602e81010cf7ef7973d97e4d (patch) | |
tree | 82c1c168aa27fce91b4e39759ced055401f05de2 /src | |
parent | 16924398f1f7ebc78b94a42371f156de4a3b10ae (diff) | |
download | mongo-468f41278b6b30aa602e81010cf7ef7973d97e4d.tar.gz |
SERVER-67162 Integrate new PCRE2 wrapper
Diffstat (limited to 'src')
54 files changed, 367 insertions, 647 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 6cb7d85b326..9ef161c8bd6 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -233,7 +233,6 @@ baseEnv.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/third_party/shim_intel_decimal128', - '$BUILD_DIR/third_party/shim_pcrecpp', '$BUILD_DIR/third_party/shim_unwind' if use_libunwind else [], 'stdx/stdx', 'util/boost_assert_shim', diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index d16f0c9a8a8..53461bbf1e9 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -665,6 +665,7 @@ if wiredtiger: '$BUILD_DIR/mongo/unittest/unittest', '$BUILD_DIR/mongo/util/clock_source_mock', '$BUILD_DIR/mongo/util/fail_point', + '$BUILD_DIR/mongo/util/pcre_wrapper', 'catalog_control', 'catalog_helpers', 'catalog_test_fixture', diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp index 8346d0f1a29..1df4bf7fa1b 100644 --- a/src/mongo/db/catalog/database_test.cpp +++ b/src/mongo/db/catalog/database_test.cpp @@ -31,7 +31,6 @@ #include <boost/optional/optional_io.hpp> #include <memory> -#include <pcrecpp.h> #include "mongo/bson/util/builder.h" #include "mongo/db/catalog/collection_catalog.h" @@ -57,6 +56,7 @@ #include "mongo/db/repl/storage_interface_mock.h" #include "mongo/db/service_context_d_test_fixture.h" #include "mongo/unittest/unittest.h" +#include "mongo/util/pcre.h" #include "mongo/util/scopeguard.h" namespace mongo { @@ -350,10 +350,11 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom ASSERT_TRUE(db); auto model = "tmp%%%%"_sd; - pcrecpp::RE re(_nss.db() + "\\.tmp[0-9A-Za-z][0-9A-Za-z][0-9A-Za-z][0-9A-Za-z]"); + pcre::Regex re(_nss.db() + "\\.tmp[0-9A-Za-z][0-9A-Za-z][0-9A-Za-z][0-9A-Za-z]", + pcre::ANCHORED | pcre::ENDANCHORED); auto nss1 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model)); - if (!re.FullMatch(nss1.ns())) { + if (!re.matchView(nss1.ns())) { FAIL((StringBuilder() << "First generated namespace \"" << nss1.ns() << "\" does not match regular expression \"" << re.pattern() << "\"") @@ -370,7 +371,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom } auto nss2 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model)); - if (!re.FullMatch(nss2.ns())) { + if (!re.matchView(nss2.ns())) { FAIL((StringBuilder() << "Second generated namespace \"" << nss2.ns() << "\" does not match regular expression \"" << re.pattern() << "\"") diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 6ee97450f2b..485ab55fa82 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -28,7 +28,7 @@ env.Library( '$BUILD_DIR/mongo/db/query/datetime/date_time_support', '$BUILD_DIR/mongo/db/query/query_index_bounds', '$BUILD_DIR/mongo/db/storage/key_string', - '$BUILD_DIR/mongo/util/regex_util', + '$BUILD_DIR/mongo/util/pcre_util', ], ) diff --git a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp index 8486a7c65f8..e81dcd5627e 100644 --- a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp +++ b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp @@ -28,6 +28,7 @@ */ #include "mongo/db/exec/sbe/expression_test_base.h" +#include "mongo/util/pcre_util.h" namespace mongo::sbe { class SBERegexTest : public EExpressionTestFixture { @@ -39,7 +40,8 @@ protected: ASSERT_EQUALS(value::TypeTags::pcreRegex, tag); auto regex = value::getPcreRegexView(val); - std::string res = str::stream() << "/" << regex->pattern() << "/" << regex->options(); + std::string res = str::stream() + << "/" << regex->pattern() << "/" << pcre_util::optionsToFlags(regex->options()); ASSERT_EQUALS(res, regexString); } diff --git a/src/mongo/db/exec/sbe/values/slot.cpp b/src/mongo/db/exec/sbe/values/slot.cpp index 45cbc977980..0274df1b6e7 100644 --- a/src/mongo/db/exec/sbe/values/slot.cpp +++ b/src/mongo/db/exec/sbe/values/slot.cpp @@ -683,7 +683,7 @@ int getApproximateSize(TypeTags tag, Value val) { result += ConstDataView(getRawPointerView(val)).read<LittleEndian<uint32_t>>(); break; case TypeTags::pcreRegex: - result += getPcreRegexView(val)->getApproximateSize(); + result += getPcreRegexView(val)->codeSize(); break; case TypeTags::timeZoneDB: // This type points to a block of memory that it doesn't own, so we don't acccount diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index 5bbdc40170e..bff73fc7046 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -40,7 +40,8 @@ #include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/query/datetime/date_time_support.h" #include "mongo/db/storage/key_string.h" -#include "mongo/util/regex_util.h" +#include "mongo/util/errno_util.h" +#include "mongo/util/pcre_util.h" namespace mongo { namespace sbe { @@ -134,45 +135,15 @@ std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey) { } std::pair<TypeTags, Value> makeNewPcreRegex(StringData pattern, StringData options) { - auto regex = std::make_unique<PcreRegex>(pattern, options); - return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regex.release())}; + auto regex = + std::make_unique<pcre::Regex>(std::string{pattern}, pcre_util::flagsToOptions(options)); + uassert(5073402, str::stream() << "Invalid Regex: " << errorMessage(regex->error()), *regex); + return {TypeTags::pcreRegex, bitcastFrom<pcre::Regex*>(regex.release())}; } -std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex) { - auto regexCopy = std::make_unique<PcreRegex>(regex); - return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regexCopy.release())}; -} - -void PcreRegex::_compile() { - const auto pcreOptions = regex_util::flagsToPcreOptions(_options.c_str()).all_options(); - const char* compile_error; - int eoffset; - _pcrePtr = pcre_compile(_pattern.c_str(), pcreOptions, &compile_error, &eoffset, nullptr); - uassert(5073402, str::stream() << "Invalid Regex: " << compile_error, _pcrePtr != nullptr); -} - -int PcreRegex::execute(StringData stringView, int startPos, std::vector<int>& buf) { - return pcre_exec(_pcrePtr, - nullptr, - stringView.rawData(), - stringView.size(), - startPos, - 0, - &(buf.front()), - buf.size()); -} - -size_t PcreRegex::getNumberCaptures() const { - int numCaptures; - pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_CAPTURECOUNT, &numCaptures); - invariant(numCaptures >= 0); - return static_cast<size_t>(numCaptures); -} - -size_t PcreRegex::getApproximateSize() const { - size_t pcreSize; - pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_SIZE, &pcreSize); - return sizeof(PcreRegex) + _pattern.size() + 1 + _options.size() + 1 + pcreSize; +std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex) { + auto regexCopy = std::make_unique<pcre::Regex>(regex); + return {TypeTags::pcreRegex, bitcastFrom<pcre::Regex*>(regexCopy.release())}; } KeyString::Value SortSpec::generateSortKey(const BSONObj& obj, const CollatorInterface* collator) { diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index d0202b0f1c0..6ec0652cdf5 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -36,7 +36,6 @@ #include <boost/predef/hardware/simd.h> #include <cstdint> #include <ostream> -#include <pcre.h> #include <string> #include <utility> #include <vector> @@ -52,6 +51,7 @@ #include "mongo/platform/decimal128.h" #include "mongo/platform/endian.h" #include "mongo/util/assert_util.h" +#include "mongo/util/pcre.h" #include "mongo/util/represent_as.h" namespace mongo { @@ -874,68 +874,6 @@ private: bool operator==(const ArraySet& lhs, const ArraySet& rhs); bool operator!=(const ArraySet& lhs, const ArraySet& rhs); -/** - * Implements a wrapper of PCRE regular expression. - * Storing the pattern and the options allows for copying of the sbe::value::PcreRegex expression, - * which includes recompilation. - * The compiled expression pcre* allows for direct usage of the pcre C library functionality. - */ -class PcreRegex { -public: - PcreRegex(StringData pattern, StringData options) : _pattern(pattern), _options(options) { - _compile(); - } - - PcreRegex(const PcreRegex& other) : PcreRegex(other._pattern, other._options) {} - - PcreRegex& operator=(const PcreRegex& other) { - if (this != &other) { - (*pcre_free)(_pcrePtr); - _pattern = other._pattern; - _options = other._options; - _compile(); - } - return *this; - } - - ~PcreRegex() { - (*pcre_free)(_pcrePtr); - } - - const std::string& pattern() const { - return _pattern; - } - - const std::string& options() const { - return _options; - } - - /** - * Wrapper function for pcre_exec(). - * - input: The input string. - * - startPos: The position from where the search should start. - * - buf: Array populated with the found matched string and capture groups. - * Returns the number of matches or an error code: - * < -1 error - * = -1 no match - * = 0 there was a match, but not enough space in the buffer - * > 0 the number of matches - */ - int execute(StringData input, int startPos, std::vector<int>& buf); - - size_t getNumberCaptures() const; - - size_t getApproximateSize() const; - -private: - void _compile(); - - std::string _pattern; - std::string _options; - - pcre* _pcrePtr = nullptr; -}; - constexpr size_t kSmallStringMaxLength = 7; using ObjectIdType = std::array<uint8_t, 12>; static_assert(sizeof(ObjectIdType) == 12); @@ -1218,10 +1156,10 @@ inline KeyString::Value* getKeyStringView(Value val) noexcept { std::pair<TypeTags, Value> makeNewPcreRegex(StringData pattern, StringData options); -std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex); +std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex); -inline PcreRegex* getPcreRegexView(Value val) noexcept { - return reinterpret_cast<PcreRegex*>(val); +inline pcre::Regex* getPcreRegexView(Value val) noexcept { + return reinterpret_cast<pcre::Regex*>(val); } inline JsFunction* getJsFunctionView(Value val) noexcept { diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp index 90a43442329..a04d1407930 100644 --- a/src/mongo/db/exec/sbe/values/value_printer.cpp +++ b/src/mongo/db/exec/sbe/values/value_printer.cpp @@ -30,6 +30,7 @@ #include "mongo/db/exec/sbe/values/sort_spec.h" #include "mongo/db/exec/sbe/values/value.h" #include "mongo/platform/basic.h" +#include "mongo/util/pcre_util.h" namespace mongo::sbe::value { @@ -405,7 +406,8 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth) } case TypeTags::pcreRegex: { auto regex = getPcreRegexView(val); - stream << "PcreRegex(/" << regex->pattern() << "/" << regex->options() << ")"; + stream << "PcreRegex(/" << regex->pattern() << "/" + << pcre_util::optionsToFlags(regex->options()) << ")"; break; } case TypeTags::timeZoneDB: { diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 4f9329e7ed6..6b207f89d85 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -33,7 +33,6 @@ #include "mongo/db/exec/sbe/vm/vm.h" #include <boost/algorithm/string.hpp> -#include <pcre.h> #include "mongo/bson/oid.h" #include "mongo/db/client.h" @@ -52,6 +51,7 @@ #include "mongo/db/storage/key_string.h" #include "mongo/logv2/log.h" #include "mongo/util/fail_point.h" +#include "mongo/util/pcre.h" #include "mongo/util/str.h" #include "mongo/util/summation.h" @@ -1160,7 +1160,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::getArraySize(value::Ty } break; } - default: { return {false, value::TypeTags::Nothing, 0}; } + default: + return {false, value::TypeTags::Nothing, 0}; } return {false, value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(result)}; @@ -3670,81 +3671,57 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinSetEquals(Arity namespace { /** - * A helper function to create the result object {"match" : .., "idx" : ..., "captures" : - * ...} from the result of pcre_exec(). + * A helper function to extract the next match in the subject string using the compiled regex + * pattern. + * - pcre: The wrapper object containing the compiled pcre expression + * - inputString: The subject string. + * - startBytePos: The position from where the search should start given in bytes. + * - codePointPos: The same position in terms of code points. + * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result + * returned is true/false. */ -std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject( - StringData inputString, - const std::vector<int>& capturesBuffer, - size_t numCaptures, - uint32_t& startBytePos, - uint32_t& codePointPos) { - - auto verifyBounds = [&inputString](auto startPos, auto limitPos, auto isCapture) { - // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1. - // These bounds cannot occur for a match on the full string. - if (startPos == -1 && limitPos == -1 && isCapture) { - return true; - } - if (startPos == -1 || limitPos == -1) { - LOGV2_ERROR(5073412, - "Unexpected error occurred while executing regexFind.", - "startPos"_attr = startPos, - "limitPos"_attr = limitPos); - return false; - } - if (startPos < 0 || static_cast<size_t>(startPos) > inputString.size() || limitPos < 0 || - static_cast<size_t>(limitPos) > inputString.size() || startPos > limitPos) { - LOGV2_ERROR(5073413, - "Unexpected error occurred while executing regexFind.", - "startPos"_attr = startPos, - "limitPos"_attr = limitPos); - return false; - } - return true; - }; - - // Extract the matched string: its start and (end+1) indices are in the first two elements of - // capturesBuffer. - if (!verifyBounds(capturesBuffer[0], capturesBuffer[1], false)) { +std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(pcre::Regex* pcre, + StringData inputString, + uint32_t& startBytePos, + uint32_t& codePointPos, + bool isMatch) { + pcre::MatchData m = pcre->matchView(inputString, {}, startBytePos); + if (!m && m.error() != pcre::Errc::ERROR_NOMATCH) { + LOGV2_ERROR(5073414, + "Error occurred while executing regular expression.", + "execResult"_attr = errorMessage(m.error())); return {false, value::TypeTags::Nothing, 0}; } - auto matchStartIdx = capturesBuffer[0]; - auto matchedString = inputString.substr(matchStartIdx, capturesBuffer[1] - matchStartIdx); - auto [matchedTag, matchedVal] = value::makeNewString(matchedString); - value::ValueGuard matchedGuard{matchedTag, matchedVal}; - // We iterate through the input string's contents preceding the match index, in order to convert - // the byte offset to a code point offset. - for (auto byteIdx = startBytePos; byteIdx < static_cast<uint32_t>(matchStartIdx); - ++codePointPos) { - byteIdx += str::getCodePointLength(inputString[byteIdx]); + if (isMatch) { + // $regexMatch returns true or false. + return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(!!m)}; + } + // $regexFind and $regexFindAll build result object or return null. + if (!m) { + return {false, value::TypeTags::Null, 0}; } - startBytePos = matchStartIdx; + + // Create the result object {"match" : .., "idx" : ..., "captures" : ...} + // from the pcre::MatchData. + auto [matchedTag, matchedVal] = value::makeNewString(m[0]); + value::ValueGuard matchedGuard{matchedTag, matchedVal}; + + StringData precedesMatch(m.input().begin() + m.startPos(), m[0].begin()); + codePointPos += str::lengthInUTF8CodePoints(precedesMatch); + startBytePos += precedesMatch.size(); auto [arrTag, arrVal] = value::makeNewArray(); value::ValueGuard arrGuard{arrTag, arrVal}; auto arrayView = value::getArrayView(arrVal); - // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer' - // hold the (start, limit) pairs of indexes, for each of the capture groups. We skip the first - // two elements and start iteration from 3rd element so that we only construct the strings for - // capture groups. - if (numCaptures) { - arrayView->reserve(numCaptures); - for (size_t i = 0; i < numCaptures; ++i) { - const auto start = capturesBuffer[2 * (i + 1)]; - const auto limit = capturesBuffer[2 * (i + 1) + 1]; - if (!verifyBounds(start, limit, true)) { - return {false, value::TypeTags::Nothing, 0}; - } - - if (start == -1 && limit == -1) { - arrayView->push_back(value::TypeTags::Null, 0); - } else { - auto captureString = inputString.substr(start, limit - start); - auto [tag, val] = value::makeNewString(captureString); - arrayView->push_back(tag, val); - } + arrayView->reserve(m.captureCount()); + for (size_t i = 0; i < m.captureCount(); ++i) { + StringData cap = m[i + 1]; + if (!cap.rawData()) { + arrayView->push_back(value::TypeTags::Null, 0); + } else { + auto [tag, val] = value::makeNewString(cap); + arrayView->push_back(tag, val); } } @@ -3763,75 +3740,6 @@ std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject( } /** - * A helper function to extract the next match in the subject string using the compiled regex - * pattern. - * - pcre: The wrapper object containing the compiled pcre expression - * - inputString: The subject string. - * - capturesBuffer: Array to be populated with the found matched string and capture groups. - * - startBytePos: The position from where the search should start given in bytes. - * - codePointPos: The same position in terms of code points. - * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result - * returned is true/false. - */ -std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(value::PcreRegex* pcre, - StringData inputString, - std::vector<int>& capturesBuffer, - uint32_t& startBytePos, - uint32_t& codePointPos, - bool isMatch = false) { - auto execResult = pcre->execute(inputString, startBytePos, capturesBuffer); - - auto numCaptures = pcre->getNumberCaptures(); - if (execResult < -1 || execResult > static_cast<int>(numCaptures) + 1) { - LOGV2_ERROR(5073414, - "Error occurred while executing regular expression.", - "execResult"_attr = execResult); - return {false, value::TypeTags::Nothing, 0}; - } - - if (isMatch) { - // $regexMatch returns true or false. - bool match = (execResult != PCRE_ERROR_NOMATCH); - return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(match)}; - } else { - // $regexFind and $regexFindAll build result object or return null. - if (execResult == PCRE_ERROR_NOMATCH) { - return {false, value::TypeTags::Null, 0}; - } - return buildRegexMatchResultObject( - inputString, capturesBuffer, numCaptures, startBytePos, codePointPos); - } -} - -/** - * A helper function to extract the first match in the subject string using the compiled regex - * pattern. See 'pcreNextMatch' function for parameters description. - */ -std::tuple<bool, value::TypeTags, value::Value> pcreFirstMatch( - value::PcreRegex* pcre, - StringData inputString, - bool isMatch = false, - std::vector<int>* capturesBuffer = nullptr, - uint32_t* startBytePos = nullptr, - uint32_t* codePointPos = nullptr) { - std::vector<int> tmpCapturesBuffer; - uint32_t tmpStartBytePos = 0; - uint32_t tmpCodePointPos = 0; - - capturesBuffer = capturesBuffer ? capturesBuffer : &tmpCapturesBuffer; - startBytePos = startBytePos ? startBytePos : &tmpStartBytePos; - codePointPos = codePointPos ? codePointPos : &tmpCodePointPos; - - // The first two-thirds of the capturesBuffer is used to pass back captured substrings' start - // and (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec() - // while matching capturing subpatterns, and is not available for passing back information. - auto numCaptures = pcre->getNumberCaptures(); - capturesBuffer->resize((1 + numCaptures) * 3); - - return pcreNextMatch(pcre, inputString, *capturesBuffer, *startBytePos, *codePointPos, isMatch); -} - -/** * A helper function with common logic for $regexMatch and $regexFind functions. Both extract only * the first match to a regular expression, but return different result objects. */ @@ -3848,7 +3756,9 @@ std::tuple<bool, value::TypeTags, value::Value> genericPcreRegexSingleMatch( auto inputString = value::getStringOrSymbolView(typeTagInputStr, valueInputStr); auto pcreRegex = value::getPcreRegexView(valuePcreRegex); - return pcreFirstMatch(pcreRegex, inputString, isMatch); + uint32_t startBytePos = 0; + uint32_t codePointPos = 0; + return pcreNextMatch(pcreRegex, inputString, startBytePos, codePointPos, isMatch); } std::pair<value::TypeTags, value::Value> collComparisonKey(value::TypeTags tag, @@ -3934,10 +3844,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(Ar auto inputString = value::getStringView(typeTagInputStr, valueInputStr); auto pcre = value::getPcreRegexView(valuePcreRegex); - std::vector<int> capturesBuffer; uint32_t startBytePos = 0; uint32_t codePointPos = 0; - bool isFirstMatch = true; // Prepare the result array of matching objects. auto [arrTag, arrVal] = value::makeNewArray(); @@ -3946,14 +3854,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(Ar int resultSize = 0; do { - auto [_, matchTag, matchVal] = [&]() { - if (isFirstMatch) { - isFirstMatch = false; - return pcreFirstMatch( - pcre, inputString, false, &capturesBuffer, &startBytePos, &codePointPos); - } - return pcreNextMatch(pcre, inputString, capturesBuffer, startBytePos, codePointPos); - }(); + auto [_, matchTag, matchVal] = + pcreNextMatch(pcre, inputString, startBytePos, codePointPos, false); value::ValueGuard matchGuard{matchTag, matchVal}; if (matchTag == value::TypeTags::Null) { diff --git a/src/mongo/db/matcher/SConscript b/src/mongo/db/matcher/SConscript index a53845a04f4..115a323091f 100644 --- a/src/mongo/db/matcher/SConscript +++ b/src/mongo/db/matcher/SConscript @@ -75,8 +75,7 @@ env.Library( '$BUILD_DIR/mongo/db/query/query_knobs', '$BUILD_DIR/mongo/db/stats/counters', '$BUILD_DIR/mongo/idl/idl_parser', - '$BUILD_DIR/mongo/util/regex_util', - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/pcre_wrapper', 'path', ], ) diff --git a/src/mongo/db/matcher/doc_validation_error.cpp b/src/mongo/db/matcher/doc_validation_error.cpp index 3810b190132..85b4943f538 100644 --- a/src/mongo/db/matcher/doc_validation_error.cpp +++ b/src/mongo/db/matcher/doc_validation_error.cpp @@ -530,7 +530,8 @@ BSONArray findAdditionalProperties(const BSONObj& doc, if (!properties.contains(fieldName)) { bool additional = true; for (auto&& pattern : patternProperties) { - if (pattern.first.regex->PartialMatch(fieldName.toString())) { + auto&& re = pattern.first.regex; + if (re && re->matchView(fieldName)) { additional = false; break; } @@ -583,7 +584,8 @@ BSONElement findFailingProperty(const InternalSchemaAllowedPropertiesMatchExpres auto filter = patternSchema.second->getFilter(); for (auto&& elem : ctx->getCurrentDocument()) { auto field = elem.fieldNameStringData(); - if (pattern.regex->PartialMatch(field.toString()) && !filter->matchesBSONElement(elem)) { + auto&& re = pattern.regex; + if (re && *re && re->matchView(field) && !filter->matchesBSONElement(elem)) { return elem; } } diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp index 31157666e92..b5a6c0b1d8e 100644 --- a/src/mongo/db/matcher/expression_leaf.cpp +++ b/src/mongo/db/matcher/expression_leaf.cpp @@ -33,7 +33,6 @@ #include <cmath> #include <memory> -#include <pcrecpp.h> #include "mongo/bson/bsonelement_comparator.h" #include "mongo/bson/bsonmisc.h" @@ -44,7 +43,9 @@ #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/path.h" #include "mongo/db/query/collation/collator_interface.h" -#include "mongo/util/regex_util.h" +#include "mongo/util/errno_util.h" +#include "mongo/util/pcre.h" +#include "mongo/util/pcre_util.h" #include "mongo/util/represent_as.h" #include "mongo/util/str.h" @@ -226,9 +227,9 @@ constexpr StringData GTEMatchExpression::kName; const std::set<char> RegexMatchExpression::kValidRegexFlags = {'i', 'm', 's', 'x'}; -std::unique_ptr<pcrecpp::RE> RegexMatchExpression::makeRegex(const std::string& regex, +std::unique_ptr<pcre::Regex> RegexMatchExpression::makeRegex(const std::string& regex, const std::string& flags) { - return std::make_unique<pcrecpp::RE>(regex.c_str(), regex_util::flagsToPcreOptions(flags)); + return std::make_unique<pcre::Regex>(regex, pcre_util::flagsToOptions(flags)); } RegexMatchExpression::RegexMatchExpression(StringData path, @@ -238,15 +239,15 @@ RegexMatchExpression::RegexMatchExpression(StringData path, : LeafMatchExpression(REGEX, path, std::move(annotation)), _regex(regex.toString()), _flags(options.toString()), - _re(new pcrecpp::RE(_regex.c_str(), regex_util::flagsToPcreOptions(_flags))) { + _re(makeRegex(_regex, _flags)) { uassert(ErrorCodes::BadValue, "Regular expression cannot contain an embedded null byte", _regex.find('\0') == std::string::npos); uassert(51091, - str::stream() << "Regular expression is invalid: " << _re->error(), - _re->error().empty()); + str::stream() << "Regular expression is invalid: " << errorMessage(_re->error()), + *_re); } RegexMatchExpression::~RegexMatchExpression() {} @@ -263,14 +264,8 @@ bool RegexMatchExpression::equivalent(const MatchExpression* other) const { bool RegexMatchExpression::matchesSingleElement(const BSONElement& e, MatchDetails* details) const { switch (e.type()) { case String: - case Symbol: { - // String values stored in documents can contain embedded NUL bytes. We construct a - // pcrecpp::StringPiece instance using the full length of the string to avoid truncating - // 'data' early. - auto stringData = e.valueStringData(); - pcrecpp::StringPiece data{stringData.rawData(), static_cast<int>(stringData.size())}; - return _re->PartialMatch(data); - } + case Symbol: + return !!_re->matchView(e.valueStringData()); case RegEx: return _regex == e.regex() && _flags == e.regexFlags(); default: diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h index 46a80aa5e91..14fceae1f5f 100644 --- a/src/mongo/db/matcher/expression_leaf.h +++ b/src/mongo/db/matcher/expression_leaf.h @@ -42,10 +42,7 @@ #include "mongo/db/query/util/make_data_structure.h" #include "mongo/stdx/unordered_map.h" #include "mongo/util/assert_util.h" - -namespace pcrecpp { -class RE; -} // namespace pcrecpp +#include "mongo/util/pcre.h" namespace mongo { @@ -469,7 +466,7 @@ class RegexMatchExpression : public LeafMatchExpression { public: static const std::set<char> kValidRegexFlags; - static std::unique_ptr<pcrecpp::RE> makeRegex(const std::string& regex, + static std::unique_ptr<pcre::Regex> makeRegex(const std::string& regex, const std::string& flags); RegexMatchExpression(StringData path, Value e, clonable_ptr<ErrorAnnotation> annotation) @@ -554,7 +551,7 @@ private: std::string _regex; std::string _flags; - std::unique_ptr<pcrecpp::RE> _re; + std::unique_ptr<pcre::Regex> _re; boost::optional<InputParamId> _sourceRegexInputParamId; boost::optional<InputParamId> _compiledRegexInputParamId; diff --git a/src/mongo/db/matcher/expression_parser.cpp b/src/mongo/db/matcher/expression_parser.cpp index e6529908910..caef2981ef1 100644 --- a/src/mongo/db/matcher/expression_parser.cpp +++ b/src/mongo/db/matcher/expression_parser.cpp @@ -32,7 +32,6 @@ #include "mongo/db/matcher/expression_parser.h" #include <memory> -#include <pcrecpp.h> #include "mongo/base/init.h" #include "mongo/bson/bsonmisc.h" @@ -74,10 +73,9 @@ #include "mongo/util/str.h" #include "mongo/util/string_map.h" +namespace mongo { namespace { -using namespace mongo; - /** * Returns true if subtree contains MatchExpression 'type'. */ @@ -125,8 +123,6 @@ void addExpressionToRoot(const boost::intrusive_ptr<ExpressionContext>& expCtx, } } // namespace -namespace mongo { - using ErrorAnnotation = MatchExpression::ErrorAnnotation; using AnnotationMode = ErrorAnnotation::Mode; diff --git a/src/mongo/db/matcher/expression_with_placeholder.cpp b/src/mongo/db/matcher/expression_with_placeholder.cpp index ca0adfba08b..c4bc9d05dca 100644 --- a/src/mongo/db/matcher/expression_with_placeholder.cpp +++ b/src/mongo/db/matcher/expression_with_placeholder.cpp @@ -31,10 +31,9 @@ #include "mongo/db/matcher/expression_with_placeholder.h" -#include <pcrecpp.h> - #include "mongo/base/string_data.h" #include "mongo/db/matcher/expression_parser.h" +#include "mongo/util/pcre.h" #include "mongo/util/static_immortal.h" namespace mongo { @@ -43,8 +42,8 @@ namespace { bool matchesPlaceholderPattern(StringData placeholder) { // The placeholder must begin with a lowercase letter and contain no special characters. - static StaticImmortal<pcrecpp::RE> kRe("[[:lower:]][[:alnum:]]*"); - return kRe->FullMatch(pcrecpp::StringPiece(placeholder.rawData(), placeholder.size())); + static StaticImmortal<pcre::Regex> kRe("^[[:lower:]][[:alnum:]]*$"); + return !!kRe->matchView(placeholder); } /** diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp index 64b34aafc3a..861bdc2d989 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp @@ -30,6 +30,7 @@ #include "mongo/platform/basic.h" #include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h" +#include "mongo/util/errno_util.h" namespace mongo { constexpr StringData InternalSchemaAllowedPropertiesMatchExpression::kName; @@ -47,10 +48,10 @@ InternalSchemaAllowedPropertiesMatchExpression::InternalSchemaAllowedPropertiesM _otherwise(std::move(otherwise)) { for (auto&& constraint : _patternProperties) { - const auto& errorStr = constraint.first.regex->error(); + const auto& re = constraint.first.regex; uassert(ErrorCodes::BadValue, - str::stream() << "Invalid regular expression: " << errorStr, - errorStr.empty()); + str::stream() << "Invalid regular expression: " << errorMessage(re->error()), + *re); } } @@ -107,7 +108,7 @@ bool InternalSchemaAllowedPropertiesMatchExpression::_matchesBSONObj(const BSONO for (auto&& property : obj) { bool checkOtherwise = true; for (auto&& constraint : _patternProperties) { - if (constraint.first.regex->PartialMatch(property.fieldName())) { + if (constraint.first.regex->matchView(property.fieldName())) { checkOtherwise = false; if (!constraint.second->matchesBSONElement(property)) { return false; diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h index e95d0582d15..08e1a1affc6 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h @@ -31,12 +31,12 @@ #include <boost/optional.hpp> #include <memory> -#include <pcrecpp.h> #include <utility> #include <vector> #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_with_placeholder.h" +#include "mongo/util/pcre.h" namespace mongo { @@ -90,15 +90,15 @@ namespace mongo { class InternalSchemaAllowedPropertiesMatchExpression final : public MatchExpression { public: /** - * A container for regular expression data. Holds a pcrecpp::RE object, as well as the original + * A container for regular expression data. Holds a regex object, as well as the original * string pattern, which is used for comparisons and serialization. */ struct Pattern { explicit Pattern(StringData pattern) - : rawRegex(pattern), regex(std::make_unique<pcrecpp::RE>(pattern.toString())) {} + : rawRegex(pattern), regex(std::make_unique<pcre::Regex>(std::string{rawRegex})) {} StringData rawRegex; - std::unique_ptr<pcrecpp::RE> regex; + std::unique_ptr<pcre::Regex> regex; }; /** diff --git a/src/mongo/db/matcher/schema/json_schema_parser.cpp b/src/mongo/db/matcher/schema/json_schema_parser.cpp index dab65bab837..7b664daac63 100644 --- a/src/mongo/db/matcher/schema/json_schema_parser.cpp +++ b/src/mongo/db/matcher/schema/json_schema_parser.cpp @@ -739,8 +739,7 @@ StatusWithMatchExpression parseAllowedProperties( // that can't match documents. if (requiredMissingID) { for (const auto& pattern : patternPropertiesVec) { - // for (int i = 0; i < patternPropertiesVec.size(); ++i) { - if (pattern.first.regex->FullMatch("_id")) { + if (pattern.first.regex->matchView("_id", pcre::ANCHORED | pcre::ENDANCHORED)) { requiredMissingID = false; break; } diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 96c7d59a025..68fd5936a8d 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -121,7 +121,7 @@ env.Library( '$BUILD_DIR/mongo/scripting/scripting', '$BUILD_DIR/mongo/scripting/scripting_common', '$BUILD_DIR/mongo/util/intrusive_counter', - '$BUILD_DIR/mongo/util/regex_util', + '$BUILD_DIR/mongo/util/pcre_util', '$BUILD_DIR/mongo/util/summation', 'aggregation_request_helper', 'dependencies', diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 464d2ad6953..774a337f740 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -35,7 +35,6 @@ #include <algorithm> #include <boost/algorithm/string.hpp> #include <cstdio> -#include <pcrecpp.h> #include <utility> #include <vector> @@ -57,7 +56,9 @@ #include "mongo/db/stats/counters.h" #include "mongo/platform/bits.h" #include "mongo/platform/decimal128.h" -#include "mongo/util/regex_util.h" +#include "mongo/util/errno_util.h" +#include "mongo/util/pcre.h" +#include "mongo/util/pcre_util.h" #include "mongo/util/str.h" #include "mongo/util/string_map.h" #include "mongo/util/summation.h" @@ -7038,105 +7039,46 @@ ExpressionRegex::RegexExecutionState ExpressionRegex::buildInitialState( return executionState; } -int ExpressionRegex::execute(RegexExecutionState* regexState) const { +pcre::MatchData ExpressionRegex::execute(RegexExecutionState* regexState) const { invariant(regexState); invariant(!regexState->nullish()); invariant(regexState->pcrePtr); - int execResult = pcre_exec(regexState->pcrePtr.get(), - nullptr, - regexState->input->c_str(), - regexState->input->size(), - regexState->startBytePos, - 0, // No need to overwrite the options set during pcre_compile. - &(regexState->capturesBuffer.front()), - regexState->capturesBuffer.size()); - // The 'execResult' will be -1 if there is no match, 0 < execResult <= (numCaptures + 1) - // depending on how many capture groups match, negative (other than -1) if there is an error - // during execution, and zero if capturesBuffer's capacity is not sufficient to hold all the - // results. The latter scenario should never occur. + StringData in = *regexState->input; + auto m = regexState->pcrePtr->matchView(in, {}, regexState->startBytePos); uassert(51156, str::stream() << "Error occurred while executing the regular expression in " << _opName - << ". Result code: " << execResult, - execResult == -1 || (execResult > 0 && execResult <= (regexState->numCaptures + 1))); - return execResult; + << ". Result code: " << errorMessage(m.error()), + m || m.error() == pcre::Errc::ERROR_NOMATCH); + return m; } Value ExpressionRegex::nextMatch(RegexExecutionState* regexState) const { - int execResult = execute(regexState); - - // No match. - if (execResult < 0) { + auto m = execute(regexState); + if (!m) + // No match. return Value(BSONNULL); - } - - // Use 'input' as StringData throughout the function to avoid copying the string on 'substr' - // calls. - StringData input = *(regexState->input); - - auto verifyBounds = [&input, this](auto startPos, auto limitPos, auto isCapture) { - // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1. - // These bounds cannot occur for a match on the full string. - if (startPos == -1 || limitPos == -1) { - massert(31304, - str::stream() << "Unexpected error occurred while executing " << _opName - << ". startPos: " << startPos << ", limitPos: " << limitPos, - isCapture && startPos == -1 && limitPos == -1); - return; - } - massert(31305, - str::stream() << "Unexpected error occurred while executing " << _opName - << ". startPos: " << startPos, - (startPos >= 0 && static_cast<size_t>(startPos) <= input.size())); - massert(31306, - str::stream() << "Unexpected error occurred while executing " << _opName - << ". limitPos: " << limitPos, - (limitPos >= 0 && static_cast<size_t>(limitPos) <= input.size())); - massert(31307, - str::stream() << "Unexpected error occurred while executing " << _opName - << ". startPos: " << startPos << ", limitPos: " << limitPos, - startPos <= limitPos); - }; - - // The first and second entries of the 'capturesBuffer' will have the start and (end+1) indices - // of the matched string, as byte offsets. '(limit - startIndex)' would be the length of the - // captured string. - verifyBounds(regexState->capturesBuffer[0], regexState->capturesBuffer[1], false); - const int matchStartByteIndex = regexState->capturesBuffer[0]; - StringData matchedStr = - input.substr(matchStartByteIndex, regexState->capturesBuffer[1] - matchStartByteIndex); - - // We iterate through the input string's contents preceding the match index, in order to convert - // the byte offset to a code point offset. - for (int byteIx = regexState->startBytePos; byteIx < matchStartByteIndex; - ++(regexState->startCodePointPos)) { - byteIx += str::getCodePointLength(input[byteIx]); - } + StringData beforeMatch(m.input().begin() + m.startPos(), m[0].begin()); + regexState->startCodePointPos += str::lengthInUTF8CodePoints(beforeMatch); // Set the start index for match to the new one. - regexState->startBytePos = matchStartByteIndex; + regexState->startBytePos = m[0].begin() - m.input().begin(); std::vector<Value> captures; - captures.reserve(regexState->numCaptures); + captures.reserve(m.captureCount()); - // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer' will - // hold the start index and limit pairs, for each of the capture groups. We skip the first two - // elements and start iteration from 3rd element so that we only construct the strings for - // capture groups. - for (int i = 0; i < regexState->numCaptures; ++i) { - const int start = regexState->capturesBuffer[2 * (i + 1)]; - const int limit = regexState->capturesBuffer[2 * (i + 1) + 1]; - verifyBounds(start, limit, true); - - // The 'start' and 'limit' will be set to -1, if the 'input' didn't match the current - // capture group. In this case we put a 'null' placeholder in place of the capture group. - captures.push_back(start == -1 && limit == -1 ? Value(BSONNULL) - : Value(input.substr(start, limit - start))); + for (size_t i = 1; i < m.captureCount() + 1; ++i) { + if (StringData cap = m[i]; !cap.rawData()) { + // Use BSONNULL placeholder for unmatched capture groups. + captures.push_back(Value(BSONNULL)); + } else { + captures.push_back(Value(cap)); + } } MutableDocument match; - match.addField("match", Value(matchedStr)); + match.addField("match", Value(m[0])); match.addField("idx", Value(regexState->startCodePointPos)); match.addField("captures", Value(captures)); return match.freezeToValue(); @@ -7161,41 +7103,20 @@ boost::intrusive_ptr<Expression> ExpressionRegex::optimize() { } void ExpressionRegex::_compile(RegexExecutionState* executionState) const { - - const auto pcreOptions = - regex_util::flagsToPcreOptions(executionState->options.value_or(""), _opName).all_options(); - if (!executionState->pattern) { return; } - const char* compile_error; - int eoffset; - - // The C++ interface pcreccp.h doesn't have a way to capture the matched string (or the index of - // the match). So we are using the C interface. First we compile all the regex options to - // generate pcre object, which will later be used to match against the input string. - executionState->pcrePtr = std::shared_ptr<pcre>( - pcre_compile( - executionState->pattern->c_str(), pcreOptions, &compile_error, &eoffset, nullptr), - pcre_free); + auto re = std::make_shared<pcre::Regex>( + *executionState->pattern, + pcre_util::flagsToOptions(executionState->options.value_or(""), _opName)); uassert(51111, - str::stream() << "Invalid Regex in " << _opName << ": " << compile_error, - executionState->pcrePtr); + str::stream() << "Invalid Regex in " << _opName << ": " << errorMessage(re->error()), + *re); + executionState->pcrePtr = std::move(re); // Calculate the number of capture groups present in 'pattern' and store in 'numCaptures'. - const int pcre_retval = pcre_fullinfo(executionState->pcrePtr.get(), - nullptr, - PCRE_INFO_CAPTURECOUNT, - &executionState->numCaptures); - invariant(pcre_retval == 0); - - // The first two-thirds of the vector is used to pass back captured substrings' start and - // (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec() while - // matching capturing subpatterns, and is not available for passing back information. - // pcre_compile will error if there are too many capture groups in the pattern. As long as this - // memory is allocated after compile, the amount of memory allocated will not be too high. - executionState->capturesBuffer.resize((1 + executionState->numCaptures) * 3); + executionState->numCaptures = executionState->pcrePtr->captureCount(); } Value ExpressionRegex::serialize(bool explain) const { @@ -7420,9 +7341,11 @@ boost::intrusive_ptr<Expression> ExpressionRegexMatch::parse(ExpressionContext* } Value ExpressionRegexMatch::evaluate(const Document& root, Variables* variables) const { - auto executionState = buildInitialState(root, variables); - // Return output of execute only if regex is not nullish. - return executionState.nullish() ? Value(false) : Value(execute(&executionState) > 0); + auto state = buildInitialState(root, variables); + if (state.nullish()) + return Value(false); + pcre::MatchData m = execute(&state); + return Value(!!m); } /* -------------------------- ExpressionRandom ------------------------------ */ diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index 4b5745bb2b6..837513770fb 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -36,7 +36,6 @@ #include <boost/intrusive_ptr.hpp> #include <functional> #include <map> -#include <pcre.h> #include <string> #include <utility> #include <vector> @@ -57,6 +56,7 @@ #include "mongo/db/server_options.h" #include "mongo/db/update/pattern_cmp.h" #include "mongo/util/intrusive_counter.h" +#include "mongo/util/pcre.h" #include "mongo/util/str.h" namespace mongo { @@ -3719,7 +3719,7 @@ public: * and '_initialExecStateForConstantRegex'. If not, then the active RegexExecutionState is * the sole owner. */ - std::shared_ptr<pcre> pcrePtr; + std::shared_ptr<pcre::Regex> pcrePtr; /** * The input text and starting position for the current execution context. @@ -3744,11 +3744,11 @@ public: RegexExecutionState buildInitialState(const Document& root, Variables* variables) const; /** - * Checks if there is a match for the given input and pattern that are part of 'executionState'. - * The method will return a positive number if there is a match and '-1' if there is no match. - * Throws 'uassert()' for any errors. + * Checks if there is a match for the input, options, and pattern of 'executionState'. + * Returns the pcre::MatchData yielded by that match operation. + * Will uassert for any errors other than `pcre::Errc::ERROR_NOMATCH`. */ - int execute(RegexExecutionState* executionState) const; + pcre::MatchData execute(RegexExecutionState* executionState) const; /** * Finds the next possible match for the given input and pattern that are part of diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index ed8d4605af1..2f3a46adb83 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -268,7 +268,7 @@ env.Library( "$BUILD_DIR/mongo/db/service_context", '$BUILD_DIR/mongo/idl/feature_flag', '$BUILD_DIR/mongo/idl/server_parameter', - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/pcre_wrapper', ], ) diff --git a/src/mongo/db/query/plan_cache_size_parameter.cpp b/src/mongo/db/query/plan_cache_size_parameter.cpp index 46e42efafbf..5f1f66bcaf8 100644 --- a/src/mongo/db/query/plan_cache_size_parameter.cpp +++ b/src/mongo/db/query/plan_cache_size_parameter.cpp @@ -29,9 +29,8 @@ #include "mongo/db/query/plan_cache_size_parameter.h" -#include <pcrecpp.h> - #include "mongo/db/query/query_knobs_gen.h" +#include "mongo/util/pcre.h" namespace mongo::plan_cache_util { @@ -52,16 +51,14 @@ StatusWith<PlanCacheSizeUnits> parseUnitString(const std::string& strUnit) { } StatusWith<PlanCacheSizeParameter> PlanCacheSizeParameter::parse(const std::string& str) { - pcrecpp::RE_Options opt; - opt.set_caseless(true); // Looks for a floating point number with followed by a unit suffix (MB, GB, %). - pcrecpp::RE re("\\s*(\\d+\\.?\\d*)\\s*(MB|GB|%)\\s*", opt); - - double size{}; - std::string strUnit{}; - if (!re.FullMatch(str, &size, &strUnit)) { + static auto& re = *new pcre::Regex(R"re((?i)^\s*(\d+\.?\d*)\s*(MB|GB|%)\s*$)re"); + auto m = re.matchView(str); + if (!m) { return {ErrorCodes::Error{6007012}, "Unable to parse plan cache size string"}; } + double size = std::stod(std::string{m[1]}); + std::string strUnit{m[2]}; auto statusWithUnit = parseUnitString(strUnit); if (!statusWithUnit.isOK()) { diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 679586aab68..0e9631f111b 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -317,6 +317,7 @@ env.Library( '$BUILD_DIR/mongo/s/coreshard', '$BUILD_DIR/mongo/s/query/cluster_aggregate', '$BUILD_DIR/mongo/util/log_and_backoff', + '$BUILD_DIR/mongo/util/pcre_wrapper', 'forwardable_operation_metadata', 'sharding_logging', 'user_writes_recoverable_critical_section', diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index fc2c42a59c1..22d0d7faa45 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -32,7 +32,6 @@ #include <algorithm> #include <memory> -#include <pcrecpp.h> #include <string> #include "mongo/base/status_with.h" @@ -64,6 +63,7 @@ #include "mongo/util/concurrency/idle_thread_block.h" #include "mongo/util/exit.h" #include "mongo/util/fail_point.h" +#include "mongo/util/pcre.h" #include "mongo/util/timer.h" #include "mongo/util/version.h" @@ -143,13 +143,12 @@ private: * in the cluster. */ void warnOnMultiVersion(const vector<ClusterStatistics::ShardStatistics>& clusterStats) { - static const auto& majorMinorRE = *new pcrecpp::RE(R"re(^(\d+)\.(\d+)\.)re"); + static const auto& majorMinorRE = *new pcre::Regex(R"re(^(\d+)\.(\d+)\.)re"); auto&& vii = VersionInfoInterface::instance(); auto hasMyVersion = [&](auto&& stat) { - int major; - int minor; - return majorMinorRE.PartialMatch(pcrecpp::StringPiece(stat.mongoVersion), &major, &minor) && - major == vii.majorVersion() && minor == vii.minorVersion(); + auto m = majorMinorRE.match(stat.mongoVersion); + return m && std::stoi(std::string{m[1]}) == vii.majorVersion() && + std::stoi(std::string{m[2]}) == vii.minorVersion(); }; // If we're all the same version, don't message diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp index 83462451f76..2af9f43bc2d 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp @@ -30,7 +30,7 @@ #include "mongo/db/s/config/sharding_catalog_manager.h" -#include <pcrecpp.h> +#include <fmt/format.h> #include "mongo/bson/util/bson_extract.h" #include "mongo/db/dbdirectclient.h" @@ -48,6 +48,8 @@ #include "mongo/s/grid.h" #include "mongo/s/shard_util.h" #include "mongo/s/sharding_feature_flags_gen.h" +#include "mongo/util/pcre.h" +#include "mongo/util/pcre_util.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding @@ -55,6 +57,8 @@ namespace mongo { namespace { +using namespace fmt::literals; + /** * Selects an optimal shard on which to place a newly created database from the set of available * shards. Will return ShardNotFound if shard could not be found. @@ -150,9 +154,8 @@ DatabaseType ShardingCatalogManager::createDatabase( // Check if a database already exists with the same name (case sensitive), and if so, return the // existing entry. BSONObjBuilder queryBuilder; - queryBuilder.appendRegex(DatabaseType::kNameFieldName, - (std::string) "^" + pcrecpp::RE::QuoteMeta(dbName.toString()) + "$", - "i"); + queryBuilder.appendRegex( + DatabaseType::kNameFieldName, "^{}$"_format(pcre_util::quoteMeta(dbName)), "i"); auto dbDoc = client.findOne(NamespaceString::kConfigDatabasesNamespace, queryBuilder.obj()); auto const [primaryShardPtr, database] = [&] { diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp index 947ec9fb3c2..27bbbdb74b5 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp @@ -33,7 +33,6 @@ #include "mongo/db/s/config/sharding_catalog_manager.h" #include <iomanip> -#include <pcrecpp.h> #include <set> #include "mongo/base/status_with.h" diff --git a/src/mongo/rpc/SConscript b/src/mongo/rpc/SConscript index 1184b58d111..a2c8fc5473f 100644 --- a/src/mongo/rpc/SConscript +++ b/src/mongo/rpc/SConscript @@ -91,7 +91,7 @@ env.Library( '$BUILD_DIR/mongo/bson/mutable/mutable_bson', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/s/is_mongos', - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/pcre_wrapper', 'message', ], ) diff --git a/src/mongo/rpc/rewrite_state_change_errors.cpp b/src/mongo/rpc/rewrite_state_change_errors.cpp index 7ee8621307c..7f09b36806e 100644 --- a/src/mongo/rpc/rewrite_state_change_errors.cpp +++ b/src/mongo/rpc/rewrite_state_change_errors.cpp @@ -37,7 +37,6 @@ #include <boost/optional.hpp> #include <fmt/format.h> -#include <pcrecpp.h> #include "mongo/bson/mutable/document.h" #include "mongo/bson/mutable/element.h" @@ -51,6 +50,7 @@ #include "mongo/rpc/rewrite_state_change_errors_server_parameter_gen.h" #include "mongo/s/is_mongos.h" #include "mongo/util/assert_util.h" +#include "mongo/util/pcre.h" #include "mongo/util/static_immortal.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork @@ -80,12 +80,13 @@ auto enabledForOperation = OperationContext::declareDecoration<RewriteEnabled>() */ boost::optional<std::string> scrubErrmsg(StringData val) { struct Scrub { - pcrecpp::RE pat; + Scrub(std::string pat, std::string sub) : pat(std::move(pat)), sub(std::move(sub)) {} + pcre::Regex pat; std::string sub; }; static const StaticImmortal scrubs = std::array{ - Scrub{pcrecpp::RE("not master"), "(NOT_PRIMARY)"}, - Scrub{pcrecpp::RE("node is recovering"), "(NODE_IS_RECOVERING)"}, + Scrub{"not master", "(NOT_PRIMARY)"}, + Scrub{"node is recovering", "(NODE_IS_RECOVERING)"}, }; // Fast scan for the common case that no key phrase is present. static const StaticImmortal fastScan = [] { @@ -96,16 +97,14 @@ boost::optional<std::string> scrubErrmsg(StringData val) { out = format_to(out, FMT_STRING("{}({})"), sep, scrub.pat.pattern()); sep = "|"_sd; } - return pcrecpp::RE(pat); + return pcre::Regex(pat); }(); - pcrecpp::StringPiece pcreVal(val.rawData(), val.size()); - - if (fastScan->PartialMatch(pcreVal)) { + if (fastScan->matchView(val)) { std::string s{val}; bool didSub = false; for (auto&& scrub : *scrubs) { - bool subOk = scrub.pat.GlobalReplace(scrub.sub, &s); + bool subOk = scrub.pat.substitute(scrub.sub, &s, pcre::SUBSTITUTE_GLOBAL); didSub = (didSub || subOk); } if (didSub) diff --git a/src/mongo/s/catalog/SConscript b/src/mongo/s/catalog/SConscript index 6b6d5a43151..53ce04524d4 100644 --- a/src/mongo/s/catalog/SConscript +++ b/src/mongo/s/catalog/SConscript @@ -30,6 +30,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/logical_session_id_helpers', + '$BUILD_DIR/mongo/util/pcre_wrapper', ], ) diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp index 55ae5007f48..5941d8b3499 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp +++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp @@ -32,8 +32,8 @@ #include "mongo/s/catalog/sharding_catalog_client_impl.h" +#include <fmt/format.h> #include <iomanip> -#include <pcrecpp.h> #include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/util/bson_extract.h" @@ -72,6 +72,8 @@ #include "mongo/s/write_ops/batched_command_response.h" #include "mongo/util/assert_util.h" #include "mongo/util/net/hostandport.h" +#include "mongo/util/pcre.h" +#include "mongo/util/pcre_util.h" #include "mongo/util/str.h" #include "mongo/util/time_support.h" @@ -90,6 +92,8 @@ using str::stream; namespace { +using namespace fmt::literals; + const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{}); const ReadPreferenceSetting kConfigPrimaryPreferredSelector(ReadPreference::PrimaryPreferred, TagSet{}); @@ -455,9 +459,7 @@ std::vector<CollectionType> ShardingCatalogClientImpl::getCollections( OperationContext* opCtx, StringData dbName, repl::ReadConcernLevel readConcernLevel) { BSONObjBuilder b; if (!dbName.empty()) - b.appendRegex(CollectionType::kNssFieldName, - std::string(str::stream() - << "^" << pcrecpp::RE::QuoteMeta(dbName.toString()) << "\\.")); + b.appendRegex(CollectionType::kNssFieldName, "^{}\\."_format(pcre_util::quoteMeta(dbName))); auto collDocs = uassertStatusOK(_exhaustiveFindOnConfig(opCtx, kConfigReadSelector, diff --git a/src/mongo/s/catalog/sharding_catalog_client_test.cpp b/src/mongo/s/catalog/sharding_catalog_client_test.cpp index 92f2ce78a8d..be537a1341a 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_test.cpp +++ b/src/mongo/s/catalog/sharding_catalog_client_test.cpp @@ -27,7 +27,7 @@ * it in the license file. */ -#include <pcrecpp.h> +#include "mongo/platform/basic.h" #include "mongo/bson/json.h" #include "mongo/client/remote_command_targeter_mock.h" diff --git a/src/mongo/shell/SConscript b/src/mongo/shell/SConscript index 650f8f72f1f..9a7ea68c5e9 100644 --- a/src/mongo/shell/SConscript +++ b/src/mongo/shell/SConscript @@ -23,7 +23,8 @@ env.Library( '$BUILD_DIR/mongo/scripting/bson_template_evaluator', ], LIBDEPS_PRIVATE=[ - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/pcre_util', + '$BUILD_DIR/mongo/util/pcre_wrapper', ], ) @@ -277,7 +278,6 @@ if not has_option('noshell') and jsEngine: "$BUILD_DIR/mongo/util/processinfo", "$BUILD_DIR/mongo/util/signal_handlers", "$BUILD_DIR/mongo/util/version_impl", - "$BUILD_DIR/third_party/shim_pcrecpp", "benchrun", "encrypted_dbclient" if get_option('ssl') == 'on' else '', "kms_shell" if get_option('ssl') == 'on' else '', @@ -305,8 +305,8 @@ if not has_option('noshell') and jsEngine: "$BUILD_DIR/mongo/s/write_ops/batch_write_types", "$BUILD_DIR/mongo/transport/transport_layer", "$BUILD_DIR/mongo/util/net/ssl_manager", + "$BUILD_DIR/mongo/util/pcre_wrapper", "$BUILD_DIR/mongo/util/signal_handlers", - "$BUILD_DIR/third_party/shim_pcrecpp", "linenoise", "mongo_initializers", "shell_utils", diff --git a/src/mongo/shell/bench.cpp b/src/mongo/shell/bench.cpp index d0c02886304..0c7b257e5de 100644 --- a/src/mongo/shell/bench.cpp +++ b/src/mongo/shell/bench.cpp @@ -32,7 +32,6 @@ #include "mongo/shell/bench.h" -#include <pcrecpp.h> #include <string> #include "mongo/base/shim.h" @@ -45,6 +44,8 @@ #include "mongo/scripting/bson_template_evaluator.h" #include "mongo/stdx/thread.h" #include "mongo/util/md5.h" +#include "mongo/util/pcre.h" +#include "mongo/util/pcre_util.h" #include "mongo/util/time_support.h" #include "mongo/util/timer.h" #include "mongo/util/version.h" @@ -95,21 +96,6 @@ private: BenchRunState& _brState; }; -pcrecpp::RE_Options flags2options(const char* flags) { - pcrecpp::RE_Options options; - options.set_utf8(true); - while (flags && *flags) { - if (*flags == 'i') - options.set_caseless(true); - else if (*flags == 'm') - options.set_multiline(true); - else if (*flags == 'x') - options.set_extended(true); - flags++; - } - return options; -} - bool hasSpecial(const BSONObj& obj) { BSONObjIterator i(obj); while (i.more()) { @@ -673,6 +659,11 @@ BenchRunOp opFromBson(const BSONObj& op) { void BenchRunConfig::initializeFromBson(const BSONObj& args) { initializeToDefaults(); + auto argToRegex = [](auto&& arg) { + return std::make_shared<pcre::Regex>(arg.regex(), + pcre_util::flagsToOptions(arg.regexFlags())); + }; + for (auto arg : args) { auto name = arg.fieldNameStringData(); if (name == "host") { @@ -750,25 +741,13 @@ void BenchRunConfig::initializeFromBson(const BSONObj& args) { } else if (name == "breakOnTrap") { breakOnTrap = arg.trueValue(); } else if (name == "trapPattern") { - const char* regex = arg.regex(); - const char* flags = arg.regexFlags(); - trapPattern = - std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags))); + trapPattern = argToRegex(arg); } else if (name == "noTrapPattern") { - const char* regex = arg.regex(); - const char* flags = arg.regexFlags(); - noTrapPattern = - std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags))); + noTrapPattern = argToRegex(arg); } else if (name == "watchPattern") { - const char* regex = arg.regex(); - const char* flags = arg.regexFlags(); - watchPattern = - std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags))); + watchPattern = argToRegex(arg); } else if (name == "noWatchPattern") { - const char* regex = arg.regex(); - const char* flags = arg.regexFlags(); - noWatchPattern = - std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags))); + noWatchPattern = argToRegex(arg); } else if (name == "ops") { // iterate through the objects in ops // create an BenchRunOp per @@ -946,10 +925,12 @@ void BenchRunWorker::generateLoadOnConnection(DBClientBase* conn) { op.executeOnce(conn, lsid, *_config, &opState); } catch (const DBException& ex) { if (!_config->hideErrors || op.showError) { - bool yesWatch = - (_config->watchPattern && _config->watchPattern->FullMatch(ex.what())); - bool noWatch = - (_config->noWatchPattern && _config->noWatchPattern->FullMatch(ex.what())); + bool yesWatch = (_config->watchPattern && + _config->watchPattern->matchView( + ex.what(), pcre::ANCHORED | pcre::ENDANCHORED)); + bool noWatch = (_config->noWatchPattern && + _config->noWatchPattern->matchView( + ex.what(), pcre::ANCHORED | pcre::ENDANCHORED)); if ((!_config->watchPattern && _config->noWatchPattern && !noWatch) || // If we're just ignoring things @@ -962,9 +943,12 @@ void BenchRunWorker::generateLoadOnConnection(DBClientBase* conn) { "error"_attr = causedBy(ex)); } - bool yesTrap = (_config->trapPattern && _config->trapPattern->FullMatch(ex.what())); - bool noTrap = - (_config->noTrapPattern && _config->noTrapPattern->FullMatch(ex.what())); + bool yesTrap = (_config->trapPattern && + _config->trapPattern->matchView( + ex.what(), pcre::ANCHORED | pcre::ENDANCHORED)); + bool noTrap = (_config->noTrapPattern && + _config->noTrapPattern->matchView( + ex.what(), pcre::ANCHORED | pcre::ENDANCHORED)); if ((!_config->trapPattern && _config->noTrapPattern && !noTrap) || (!_config->noTrapPattern && _config->trapPattern && yesTrap) || diff --git a/src/mongo/shell/bench.h b/src/mongo/shell/bench.h index a7d1f70c0ed..90831a8ab53 100644 --- a/src/mongo/shell/bench.h +++ b/src/mongo/shell/bench.h @@ -40,12 +40,9 @@ #include "mongo/platform/mutex.h" #include "mongo/stdx/condition_variable.h" #include "mongo/stdx/thread.h" +#include "mongo/util/pcre.h" #include "mongo/util/timer.h" -namespace pcrecpp { -class RE; -} // namespace pcrecpp - namespace mongo { enum class OpType { @@ -238,10 +235,10 @@ public: bool handleErrors; bool hideErrors; - std::shared_ptr<pcrecpp::RE> trapPattern; - std::shared_ptr<pcrecpp::RE> noTrapPattern; - std::shared_ptr<pcrecpp::RE> watchPattern; - std::shared_ptr<pcrecpp::RE> noWatchPattern; + std::shared_ptr<pcre::Regex> trapPattern; + std::shared_ptr<pcre::Regex> noTrapPattern; + std::shared_ptr<pcre::Regex> watchPattern; + std::shared_ptr<pcre::Regex> noWatchPattern; /** * Operation description. A list of BenchRunOps, each describing a single diff --git a/src/mongo/shell/mongo_main.cpp b/src/mongo/shell/mongo_main.cpp index dd9231caa27..51ccb760ee7 100644 --- a/src/mongo/shell/mongo_main.cpp +++ b/src/mongo/shell/mongo_main.cpp @@ -39,7 +39,6 @@ #include <boost/log/sinks.hpp> #include <fstream> #include <iostream> -#include <pcrecpp.h> #include <signal.h> #include <stdio.h> #include <string.h> @@ -79,6 +78,7 @@ #include "mongo/util/net/ocsp/ocsp_manager.h" #include "mongo/util/net/ssl_options.h" #include "mongo/util/password.h" +#include "mongo/util/pcre.h" #include "mongo/util/quick_exit.h" #include "mongo/util/scopeguard.h" #include "mongo/util/signal_handlers.h" @@ -278,16 +278,16 @@ void shellHistoryAdd(const char* line) { // be able to add things like `.author`, so be smart about how this is // detected by using regular expresions. This is so we can avoid storing passwords // in the history file in plaintext. - static pcrecpp::RE hiddenHelpers( + static pcre::Regex hiddenHelpers( "\\.\\s*(auth|createUser|updateUser|changeUserPassword)\\s*\\("); // Also don't want the raw user management commands to show in the shell when run directly // via runCommand. - static pcrecpp::RE hiddenCommands( + static pcre::Regex hiddenCommands( "(run|admin)Command\\s*\\(\\s*{\\s*(createUser|updateUser)\\s*:"); - static pcrecpp::RE hiddenFLEConstructor(".*Mongo\\(([\\s\\S]*)secretAccessKey([\\s\\S]*)"); - if (!hiddenHelpers.PartialMatch(line) && !hiddenCommands.PartialMatch(line) && - !hiddenFLEConstructor.PartialMatch(line)) { + static pcre::Regex hiddenFLEConstructor(".*Mongo\\(([\\s\\S]*)secretAccessKey([\\s\\S]*)"); + if (!hiddenHelpers.matchView(line) && !hiddenCommands.matchView(line) && + !hiddenFLEConstructor.matchView(line)) { linenoiseHistoryAdd(line); } } diff --git a/src/mongo/unittest/SConscript b/src/mongo/unittest/SConscript index 7abf76854de..b625fcd23ab 100644 --- a/src/mongo/unittest/SConscript +++ b/src/mongo/unittest/SConscript @@ -24,7 +24,8 @@ utEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/server_options_core', '$BUILD_DIR/mongo/util/options_parser/options_parser', - '$BUILD_DIR/third_party/shim_pcrecpp', + '$BUILD_DIR/mongo/util/pcre_util', + '$BUILD_DIR/mongo/util/pcre_wrapper', '$BUILD_DIR/third_party/shim_yaml', ], AIB_COMPONENT='unittests', diff --git a/src/mongo/unittest/death_test.cpp b/src/mongo/unittest/death_test.cpp index e7a778c97ea..40dc410aa22 100644 --- a/src/mongo/unittest/death_test.cpp +++ b/src/mongo/unittest/death_test.cpp @@ -30,7 +30,6 @@ #include "mongo/platform/basic.h" #include <fmt/format.h> -#include <pcrecpp.h> #include <stdio.h> #include "mongo/bson/json.h" @@ -58,6 +57,7 @@ #include "mongo/logv2/log.h" #include "mongo/util/assert_util.h" #include "mongo/util/debugger.h" +#include "mongo/util/pcre_util.h" #include "mongo/util/quick_exit.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest @@ -233,7 +233,7 @@ void DeathTestBase::Subprocess::execChild(std::string tempPath) { stripOption(av, "tempPath"); const TestInfo* info = UnitTest::getInstance()->currentTestInfo(); av.push_back("--suite={}"_format(info->suiteName())); - av.push_back("--filter=^{}$"_format(pcrecpp::RE::QuoteMeta(std::string{info->testName()}))); + av.push_back("--filter=^{}$"_format(pcre_util::quoteMeta(info->testName()))); av.push_back("--tempPath={}"_format(tempPath)); // The presence of this flag is how the test body in the child process knows it's in the // child process, and therefore to not exec again. Its value is ignored. diff --git a/src/mongo/unittest/golden_test.cpp b/src/mongo/unittest/golden_test.cpp index d2174e8f067..890be64d01b 100644 --- a/src/mongo/unittest/golden_test.cpp +++ b/src/mongo/unittest/golden_test.cpp @@ -38,7 +38,6 @@ #include <boost/program_options.hpp> #include <fmt/format.h> #include <fmt/ostream.h> -#include <pcrecpp.h> #include <yaml-cpp/yaml.h> #include "mongo/base/init.h" @@ -46,6 +45,7 @@ #include "mongo/logv2/log.h" #include "mongo/unittest/golden_test.h" #include "mongo/util/ctype.h" +#include "mongo/util/pcre.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest @@ -56,7 +56,7 @@ namespace po = ::boost::program_options; using namespace fmt::literals; -static const pcrecpp::RE validNameRegex(R"([[:alnum:]_\-]*)"); +static const pcre::Regex validNameRegex(R"(^[[:alnum:]_\-]*$)"); std::string readFile(const fs::path& path) { ASSERT_FALSE(is_directory(path)); @@ -115,7 +115,7 @@ std::string GoldenTestContext::toSnakeCase(const std::string& str) { } std::string GoldenTestContext::sanitizeName(const std::string& str) { - if (!validNameRegex.FullMatch(str)) { + if (!validNameRegex.matchView(str)) { FAIL("Unsupported characters in name '{}'"_format(str)); } diff --git a/src/mongo/unittest/matcher.cpp b/src/mongo/unittest/matcher.cpp index da0bbcb5a6c..db1a2d62139 100644 --- a/src/mongo/unittest/matcher.cpp +++ b/src/mongo/unittest/matcher.cpp @@ -29,19 +29,19 @@ #include "mongo/unittest/matcher.h" +#include <fmt/format.h> #include <memory> #include <utility> -#include <fmt/format.h> -#include <pcrecpp.h> +#include "mongo/util/pcre.h" namespace mongo::unittest::match { using namespace fmt::literals; struct ContainsRegex::Impl { - explicit Impl(pcrecpp::RE pat) : re(std::move(pat)) {} - pcrecpp::RE re; + explicit Impl(std::string pat) : re(std::move(pat)) {} + pcre::Regex re; }; ContainsRegex::ContainsRegex(std::string pattern) @@ -50,9 +50,7 @@ ContainsRegex::ContainsRegex(std::string pattern) ContainsRegex::~ContainsRegex() = default; MatchResult ContainsRegex::match(StringData x) const { - bool res = - _impl->re.PartialMatch(pcrecpp::StringPiece{x.rawData(), static_cast<int>(x.size())}); - if (res) + if (_impl->re.matchView(x)) return {}; return MatchResult(false, ""); } diff --git a/src/mongo/unittest/unittest.cpp b/src/mongo/unittest/unittest.cpp index 01bfb7c756f..073f09ca7c5 100644 --- a/src/mongo/unittest/unittest.cpp +++ b/src/mongo/unittest/unittest.cpp @@ -39,7 +39,6 @@ #include <iostream> #include <map> #include <memory> -#include <pcrecpp.h> #include "mongo/base/checked_cast.h" #include "mongo/base/init.h" @@ -55,6 +54,7 @@ #include "mongo/logv2/plain_formatter.h" #include "mongo/platform/mutex.h" #include "mongo/util/assert_util.h" +#include "mongo/util/pcre.h" #include "mongo/util/signal_handlers_synchronous.h" #include "mongo/util/stacktrace.h" #include "mongo/util/timer.h" @@ -79,7 +79,7 @@ auto& suitesMap() { } // namespace bool searchRegex(const std::string& pattern, const std::string& string) { - return pcrecpp::RE(pattern).PartialMatch(string); + return !!pcre::Regex(pattern).matchView(string); } class Result { @@ -396,20 +396,20 @@ std::unique_ptr<Result> Suite::run(const std::string& filter, Timer timer; auto r = std::make_unique<Result>(_name); - boost::optional<pcrecpp::RE> filterRe; - boost::optional<pcrecpp::RE> fileNameFilterRe; + boost::optional<pcre::Regex> filterRe; + boost::optional<pcre::Regex> fileNameFilterRe; if (!filter.empty()) filterRe.emplace(filter); if (!fileNameFilter.empty()) fileNameFilterRe.emplace(fileNameFilter); for (const auto& tc : _tests) { - if (filterRe && !filterRe->PartialMatch(tc.name)) { + if (filterRe && !filterRe->matchView(tc.name)) { LOGV2_DEBUG(23057, 1, "skipped due to filter", "test"_attr = tc.name); continue; } - if (fileNameFilterRe && !fileNameFilterRe->PartialMatch(tc.fileName)) { + if (fileNameFilterRe && !fileNameFilterRe->matchView(tc.fileName)) { LOGV2_DEBUG(23058, 1, "skipped due to fileNameFilter", "testFile"_attr = tc.fileName); continue; } diff --git a/src/mongo/unittest/unittest.h b/src/mongo/unittest/unittest.h index 514a8e7d8d9..3b37a565261 100644 --- a/src/mongo/unittest/unittest.h +++ b/src/mongo/unittest/unittest.h @@ -38,7 +38,6 @@ #include <cmath> #include <fmt/format.h> #include <functional> -#include <pcrecpp.h> #include <sstream> #include <string> #include <tuple> diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript index 308e3777a44..8f3c500a76e 100644 --- a/src/mongo/util/SConscript +++ b/src/mongo/util/SConscript @@ -131,17 +131,6 @@ env.Library( ) env.Library( - target='regex_util', - source=[ - 'regex_util.cpp', - ], - LIBDEPS=[ - '$BUILD_DIR/mongo/base', - '$BUILD_DIR/third_party/shim_pcrecpp', - ], -) - -env.Library( target='summation', source=[ 'summation.cpp', @@ -201,7 +190,7 @@ env.Library( "$BUILD_DIR/mongo/base", ], LIBDEPS_PRIVATE=[ - '$BUILD_DIR/third_party/shim_pcrecpp', + 'pcre_wrapper', ], ) @@ -615,7 +604,7 @@ if env.TargetOSIs('linux'): '$BUILD_DIR/mongo/base', ], LIBDEPS_PRIVATE=[ - '$BUILD_DIR/third_party/shim_pcrecpp', + 'pcre_wrapper', ], ) @@ -838,7 +827,7 @@ if use_libunwind: ) stacktrace_test_LIBDEPS = stacktraceEnv.get('LIBDEPS', []).copy() -insort_wrapper(stacktrace_test_LIBDEPS, '$BUILD_DIR/third_party/shim_pcrecpp') +insort_wrapper(stacktrace_test_LIBDEPS, 'pcre_wrapper') stacktraceEnv.CppUnitTest( target='stacktrace_test', diff --git a/src/mongo/util/pcre.cpp b/src/mongo/util/pcre.cpp index fe0f71fa640..00ced908410 100644 --- a/src/mongo/util/pcre.cpp +++ b/src/mongo/util/pcre.cpp @@ -123,10 +123,8 @@ namespace detail { class MatchDataImpl; -// Global. Value is historical carryover from pcre1 and pcrecpp. -// It's user-facing, so record and enforce its value even if the -// engine can now support longer patterns. -inline constexpr size_t kMaxPatternLength = 32761; +// Global. +inline constexpr size_t kMaxPatternLength = 16384; /** Wrapper around a pcre2_compile_context. */ class CompileContext { diff --git a/src/mongo/util/pcre.h b/src/mongo/util/pcre.h index ff9f705f4fc..5f5652918c9 100644 --- a/src/mongo/util/pcre.h +++ b/src/mongo/util/pcre.h @@ -488,24 +488,6 @@ public: MatchData matchView(StringData input) const; /** - * True if all of `input` matches. - * If possible, add '^' and '$' to the `Regex` pattern instead, as this - * optimizes better than match-supplied options. - * - * Legacy: prefer `Regex::matchView` with `ANCHOR|ENDANCHOR` options. - */ - bool fullMatch(StringData input) const; - - /** - * True if a substring of `input` matches. - * Note that PCRE2 documentation uses the term "partial match" to mean - * something very different. - * - * Legacy: prefer `Regex::matchView`. - */ - bool partialMatch(StringData input) const; - - /** * Replaces occurrences in `str` of this pattern with `replacement`. * Additional substitute `options` can change behavior. Important ones: * @@ -626,14 +608,6 @@ inline MatchData Regex::matchView(StringData input) const { return matchView(input, MatchOptions{}, 0); } -inline bool Regex::fullMatch(StringData input) const { - return !matchView(input, ANCHORED | ENDANCHORED).error(); -} - -inline bool Regex::partialMatch(StringData input) const { - return !matchView(input).error(); -} - } // namespace mongo::pcre namespace std { diff --git a/src/mongo/util/pcre_test.cpp b/src/mongo/util/pcre_test.cpp index 75e2a575d65..d6d0c4c6b84 100644 --- a/src/mongo/util/pcre_test.cpp +++ b/src/mongo/util/pcre_test.cpp @@ -170,24 +170,6 @@ TEST(PcreTest, StartPos) { ASSERT_EQ(hiRe.matchView(ohi, {}, i).startPos(), i) << " i="_format(i); } -TEST(PcreTest, FullMatch) { - Regex re{"hi"}; - ASSERT_FALSE(re.fullMatch("hello")); - ASSERT_TRUE(re.fullMatch("hi")); - ASSERT_FALSE(re.fullMatch("hii")); - ASSERT_FALSE(re.fullMatch("hhi")); -} - -TEST(PcreTest, PartialMatch) { - Regex re{"abc"}; - ASSERT_FALSE(re.partialMatch("")); - ASSERT_FALSE(re.partialMatch("a")); - ASSERT_FALSE(re.partialMatch("bc")); - ASSERT_TRUE(re.partialMatch("abc")); - ASSERT_TRUE(re.partialMatch("zabc")); - ASSERT_TRUE(re.partialMatch("abcz")); -} - TEST(PcreTest, CompileOptions) { std::string pattern = "a.b"; std::array subjects{"a\nb"s, "A_b"s, "A\nb"s}; @@ -203,7 +185,7 @@ TEST(PcreTest, CompileOptions) { }) { Regex re{pattern, opt}; for (size_t i = 0; i < subjects.size(); ++i) - ASSERT_EQ(re.fullMatch(subjects[i]), outMatch[i]) + ASSERT_EQ(!!re.matchView(subjects[i], pcre::ANCHORED | pcre::ENDANCHORED), outMatch[i]) << "opt={}, subject={}"_format(uint32_t(opt), subjects[i]); } } diff --git a/src/mongo/util/pcre_util.cpp b/src/mongo/util/pcre_util.cpp index 4f4dd3c63a8..bd5d51bf4b0 100644 --- a/src/mongo/util/pcre_util.cpp +++ b/src/mongo/util/pcre_util.cpp @@ -38,7 +38,7 @@ namespace mongo::pcre_util { using namespace fmt::literals; -pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName) { +pcre::CompileOptions flagsToOptions(StringData optionFlags, StringData opName) { pcre::CompileOptions opt = pcre::UTF; for (char flag : optionFlags) { switch (flag) { @@ -48,21 +48,34 @@ pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName) { case 'm': // newlines match ^ and $ opt |= pcre::MULTILINE; continue; - case 'x': // extended mode - opt |= pcre::EXTENDED; - continue; case 's': // allows dot to include newline chars opt |= pcre::DOTALL; continue; case 'u': continue; + case 'x': // extended mode + opt |= pcre::EXTENDED; + continue; default: - uasserted(6527600, "{} invalid flag in regex options: {}"_format(opName, flag)); + uasserted(51108, "{} invalid flag in regex options: {}"_format(opName, flag)); } } return opt; } +std::string optionsToFlags(pcre::CompileOptions opt) { + std::string optionFlags = ""; + if (opt & pcre::CASELESS) + optionFlags += 'i'; + if (opt & pcre::MULTILINE) + optionFlags += 'm'; + if (opt & pcre::DOTALL) + optionFlags += 's'; + if (opt & pcre::EXTENDED) + optionFlags += 'x'; + return optionFlags; +} + std::string quoteMeta(StringData str) { std::string result; for (char c : str) { diff --git a/src/mongo/util/pcre_util.h b/src/mongo/util/pcre_util.h index deb7129e9bc..1cdd7891890 100644 --- a/src/mongo/util/pcre_util.h +++ b/src/mongo/util/pcre_util.h @@ -49,7 +49,14 @@ namespace mongo::pcre_util { * 'u': UTF (redundant, but accepted) * 'x': EXTENDED */ -pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName = ""); +pcre::CompileOptions flagsToOptions(StringData optionFlags, StringData opName = ""); + +/** + * Builds an std::string of flag characters from the input 'pcre::CompileOptions'. + * These flags are the same as those documented in flagsToOptions. They are returned in alphabetical + * order. Since 'u' is redundant, it will never be output by this function. + */ +std::string optionsToFlags(pcre::CompileOptions opt); /** * Escapes all potentially meaningful regex characters in the provided string. diff --git a/src/mongo/util/pcre_util_test.cpp b/src/mongo/util/pcre_util_test.cpp index 720ff5c31ad..66029835c54 100644 --- a/src/mongo/util/pcre_util_test.cpp +++ b/src/mongo/util/pcre_util_test.cpp @@ -42,9 +42,9 @@ namespace { using namespace fmt::literals; // Test compares `CompileOptions` as integers. -TEST(PcreUtilTest, ParseOptions) { +TEST(PcreUtilTest, FlagsToOptions) { using namespace pcre::options; - auto parse = [](StringData flags) { return static_cast<uint32_t>(parseOptions(flags)); }; + auto parse = [](StringData flags) { return static_cast<uint32_t>(flagsToOptions(flags)); }; auto expect = [](pcre::CompileOptions o) { return static_cast<uint32_t>(o); }; ASSERT_EQ(parse(""), expect(UTF)) << " UTF is on by default"; ASSERT_EQ(parse("i"), expect(UTF | CASELESS)); @@ -60,6 +60,25 @@ TEST(PcreUtilTest, ParseOptions) { ASSERT_THROWS_WITH_CHECK(parse("iz"), DBException, isBadFlagException); } +// Test compares `CompileOptions` as strings of option flags. +TEST(PcreUtilTest, OptionsToFlags) { + using namespace pcre::options; + auto parse = [](pcre::CompileOptions flags) { + return static_cast<std::string>(optionsToFlags(flags)); + }; + auto expect = [](std::string o) { return (o); }; + ASSERT_EQ(parse(UTF | CASELESS), expect("i")); + ASSERT_EQ(parse(UTF | MULTILINE), expect("m")); + ASSERT_EQ(parse(UTF | DOTALL), expect("s")); + ASSERT_EQ(parse(UTF), expect("")) << " UTF is on by default"; + ASSERT_EQ(parse(UTF | EXTENDED), expect("x")); + ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | DOTALL | EXTENDED), expect("imsx")); + ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | DOTALL), expect("ims")); + ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | EXTENDED), expect("imx")); + ASSERT_EQ(parse(UTF | CASELESS | DOTALL | EXTENDED), expect("isx")); + ASSERT_EQ(parse(UTF | MULTILINE | DOTALL | EXTENDED), expect("msx")); +} + TEST(PcreUtilTest, QuoteMeta) { ASSERT_EQ(quoteMeta(""), ""); ASSERT_EQ(quoteMeta("abc_def_123"_sd), "abc_def_123"); diff --git a/src/mongo/util/processinfo_linux.cpp b/src/mongo/util/processinfo_linux.cpp index d5df28c2b49..a8e2f8f42ad 100644 --- a/src/mongo/util/processinfo_linux.cpp +++ b/src/mongo/util/processinfo_linux.cpp @@ -35,14 +35,15 @@ #include <fstream> #include <iostream> #include <malloc.h> -#include <pcrecpp.h> #include <sched.h> #include <stdio.h> +#include <string> #include <sys/mman.h> #include <sys/resource.h> #include <sys/time.h> #include <sys/utsname.h> #include <unistd.h> + #ifdef __BIONIC__ #include <android/api-level.h> #elif __UCLIBC__ @@ -55,12 +56,12 @@ #include <boost/none.hpp> #include <boost/optional.hpp> #include <fmt/format.h> -#include <pcrecpp.h> #include "mongo/base/parse_number.h" #include "mongo/logv2/log.h" #include "mongo/util/ctype.h" #include "mongo/util/file.h" +#include "mongo/util/pcre.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl @@ -257,21 +258,34 @@ namespace { // (1)(2)(3:4)(5) (6) (7) (8) (9) (10) (11) struct MountRecord { bool parseLine(const std::string& line) { - static const pcrecpp::RE kRe{ - // (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) - R"re((\d+) (\d+) (\d+):(\d+) (\S+) (\S+) (\S+) ((?:\S+:\S+ ?)*) - (\S+) (\S+) (\S+))re"}; - return kRe.FullMatch(line, - &mountId, - &parentId, - &major, - &minor, - &root, - &mountPoint, - &options, - &fields, - &type, - &source, - &superOpt); + static const pcre::Regex kRe{ + // (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) + R"re(^(\d+) (\d+) (\d+):(\d+) (\S+) (\S+) (\S+) ((?:\S+:\S+ ?)*) - (\S+) (\S+) (\S+)$)re"}; + auto m = kRe.matchView(line); + if (!m) + return false; + size_t i = 1; + auto load = [&](auto& var) { + using T = std::decay_t<decltype(var)>; + std::string nextString{m[i++]}; + if constexpr (std::is_same_v<T, int>) { + var = std::stoi(nextString); + } else { + var = std::move(nextString); + } + }; + load(mountId); + load(parentId); + load(major); + load(minor); + load(root); + load(mountPoint); + load(options); + load(fields); + load(type); + load(source); + load(superOpt); + return true; } void appendBSON(BSONObjBuilder& bob) const { @@ -319,7 +333,9 @@ void appendMountInfo(BSONObjBuilder& bob) { class CpuInfoParser { public: struct LineProcessor { - pcrecpp::RE regex; + LineProcessor(std::string pattern, std::function<void(const std::string&)> f) + : regex{std::make_shared<pcre::Regex>(std::move(pattern))}, f{std::move(f)} {} + std::shared_ptr<pcre::Regex> regex; std::function<void(const std::string&)> f; }; std::vector<LineProcessor> lineProcessors; @@ -331,17 +347,18 @@ public: bool readSuccess; bool unprocessed = false; - static StaticImmortal<pcrecpp::RE> lineRegex(R"re((.*?)\s*:\s*(.*))re"); + static StaticImmortal<pcre::Regex> lineRegex(R"re(^(.*?)\s*:\s*(.*)$)re"); do { std::string fstr; readSuccess = f && std::getline(f, fstr); if (readSuccess && !fstr.empty()) { - std::string key; - std::string value; - if (!lineRegex->FullMatch(fstr, &key, &value)) + auto m = lineRegex->matchView(fstr); + if (!m) continue; + std::string key{m[1]}; + std::string value{m[2]}; for (auto&& [lpr, lpf] : lineProcessors) { - if (lpr.FullMatch(key)) + if (lpr->matchView(key, pcre::ANCHORED | pcre::ENDANCHORED)) lpf(value); } unprocessed = true; diff --git a/src/mongo/util/procparser.cpp b/src/mongo/util/procparser.cpp index bbe212325ad..67611e1e5ae 100644 --- a/src/mongo/util/procparser.cpp +++ b/src/mongo/util/procparser.cpp @@ -38,7 +38,6 @@ #include <boost/algorithm/string/split.hpp> #include <boost/filesystem.hpp> #include <fcntl.h> -#include <pcrecpp.h> #include <string> #include <sys/stat.h> #include <sys/types.h> @@ -51,6 +50,7 @@ #include "mongo/base/string_data.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/logv2/log.h" +#include "mongo/util/pcre.h" #include "mongo/util/scopeguard.h" #include "mongo/util/str.h" #include "mongo/util/text.h" @@ -655,9 +655,9 @@ Status parseProcSelfMountStatsImpl( // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue // | | | | | | | | | | // (1)(2)(3:4)(5) (6) (7) (8) (9) (10) (11) - static const pcrecpp::RE kRe(R"re(\d+ \d+ \d+:\d+ \S+ (\S+))re"); - std::string mountPoint; - if (kRe.PartialMatch(line, &mountPoint)) { + static const pcre::Regex kRe(R"re(\d+ \d+ \d+:\d+ \S+ (\S+))re"); + if (auto m = kRe.matchView(line)) { + std::string mountPoint{m[1]}; boost::filesystem::path p(mountPoint); boost::system::error_code ec; boost::filesystem::space_info spaceInfo = getSpace(p, ec); diff --git a/src/mongo/util/regex_util.cpp b/src/mongo/util/regex_util.cpp index 77f875c6e13..7d0242e3083 100644 --- a/src/mongo/util/regex_util.cpp +++ b/src/mongo/util/regex_util.cpp @@ -58,7 +58,7 @@ pcrecpp::RE_Options flagsToPcreOptions(StringData optionFlags, StringData opName // must accept this flag without an error as some drivers send it by default. continue; default: - uasserted(51108, + uasserted(6716200, str::stream() << opName << " invalid flag in regex options: " << flag); } } diff --git a/src/mongo/util/stacktrace_test.cpp b/src/mongo/util/stacktrace_test.cpp index f747f7a12bf..9d93f652257 100644 --- a/src/mongo/util/stacktrace_test.cpp +++ b/src/mongo/util/stacktrace_test.cpp @@ -38,13 +38,13 @@ #include <fmt/printf.h> #include <functional> #include <map> -#include <pcrecpp.h> #include <random> #include <signal.h> #include <sstream> #include <utility> #include <vector> +#include "mongo/base/parse_number.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/json.h" #include "mongo/config.h" @@ -55,6 +55,7 @@ #include "mongo/unittest/unittest.h" #include "mongo/util/debug_util.h" #include "mongo/util/hex.h" +#include "mongo/util/pcre.h" #include "mongo/util/stacktrace.h" /** `sigaltstack` was introduced in glibc-2.12 in 2010. */ @@ -153,6 +154,15 @@ uintptr_t fromHex(const std::string& s) { return static_cast<uintptr_t>(std::stoull(s, nullptr, 16)); } +bool consume(const pcre::Regex& re, StringData* in, std::string* out) { + auto m = re.matchView(*in); + if (!m) + return false; + *in = in->substr(m[0].size()); + *out = std::string{m[1]}; + return true; +} + // Break down a printStackTrace output for a contrived call tree and sanity-check it. TEST(StackTrace, PosixFormat) { if (kIsWindows) { @@ -174,18 +184,18 @@ TEST(StackTrace, PosixFormat) { // Each "Frame:" line holds a full json object, but we only examine its "a" field here. std::string jsonLine; std::vector<uintptr_t> humanAddrs; - pcrecpp::StringPiece in{trace}; - static const pcrecpp::RE jsonLineRE(R"re(BACKTRACE: (\{.*\})\n?)re"); - ASSERT_TRUE(jsonLineRE.Consume(&in, &jsonLine)) << "\"" << in.as_string() << "\""; + StringData in{trace}; + static const pcre::Regex jsonLineRE(R"re(^BACKTRACE: (\{.*\})\n?)re"); + ASSERT_TRUE(consume(jsonLineRE, &in, &jsonLine)) << "\"" << in << "\""; while (true) { std::string frameLine; - static const pcrecpp::RE frameRE(R"re( Frame: (\{.*\})\n?)re"); - if (!frameRE.Consume(&in, &frameLine)) + static const pcre::Regex frameRE(R"re(^ Frame: (\{.*\})\n?)re"); + if (!consume(frameRE, &in, &frameLine)) break; BSONObj frameObj = fromjson(frameLine); // throwy humanAddrs.push_back(fromHex(frameObj["a"].String())); } - ASSERT_TRUE(in.empty()) << "must be consumed fully: \"" << in.as_string() << "\""; + ASSERT_TRUE(in.empty()) << "must be consumed fully: \"" << in << "\""; BSONObj jsonObj = fromjson(jsonLine); // throwy ASSERT_TRUE(jsonObj.hasField("backtrace")); @@ -255,14 +265,18 @@ TEST(StackTrace, WindowsFormat) { std::vector<std::string> lines = splitLines(trace); - std::string jsonLine; - ASSERT_TRUE(pcrecpp::RE(R"re(BACKTRACE: (\{.*\}))re").FullMatch(lines[0], &jsonLine)); + auto re = pcre::Regex(R"re(^BACKTRACE: (\{.*\})$)re"); + auto m = re.matchView(lines[0]); + ASSERT_TRUE(!!m); + std::string jsonLine{m[1]}; std::vector<uintptr_t> humanAddrs; for (size_t i = 1; i < lines.size(); ++i) { - static const pcrecpp::RE re(R"re( Frame: (?:\{"a":"(.*?)",.*\}))re"); + static const pcre::Regex re(R"re(^ Frame: (?:\{"a":"(.*?)",.*\})$)re"); uintptr_t addr; - ASSERT_TRUE(re.FullMatch(lines[i], pcrecpp::Hex(&addr))) << lines[i]; + auto m = re.matchView(lines[i]); + ASSERT_TRUE(!!m) << lines[i]; + ASSERT_OK(NumberParser{}.base(16)(m[1], &addr)); humanAddrs.push_back(addr); } |