summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJennifer Peshansky <jennifer.peshansky@mongodb.com>2022-06-27 13:30:46 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-27 14:52:18 +0000
commit468f41278b6b30aa602e81010cf7ef7973d97e4d (patch)
tree82c1c168aa27fce91b4e39759ced055401f05de2 /src
parent16924398f1f7ebc78b94a42371f156de4a3b10ae (diff)
downloadmongo-468f41278b6b30aa602e81010cf7ef7973d97e4d.tar.gz
SERVER-67162 Integrate new PCRE2 wrapper
Diffstat (limited to 'src')
-rw-r--r--src/mongo/SConscript1
-rw-r--r--src/mongo/db/catalog/SConscript1
-rw-r--r--src/mongo/db/catalog/database_test.cpp9
-rw-r--r--src/mongo/db/exec/sbe/SConscript2
-rw-r--r--src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp4
-rw-r--r--src/mongo/db/exec/sbe/values/slot.cpp2
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp47
-rw-r--r--src/mongo/db/exec/sbe/values/value.h70
-rw-r--r--src/mongo/db/exec/sbe/values/value_printer.cpp4
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp198
-rw-r--r--src/mongo/db/matcher/SConscript3
-rw-r--r--src/mongo/db/matcher/doc_validation_error.cpp6
-rw-r--r--src/mongo/db/matcher/expression_leaf.cpp25
-rw-r--r--src/mongo/db/matcher/expression_leaf.h9
-rw-r--r--src/mongo/db/matcher/expression_parser.cpp6
-rw-r--r--src/mongo/db/matcher/expression_with_placeholder.cpp7
-rw-r--r--src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp9
-rw-r--r--src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h8
-rw-r--r--src/mongo/db/matcher/schema/json_schema_parser.cpp3
-rw-r--r--src/mongo/db/pipeline/SConscript2
-rw-r--r--src/mongo/db/pipeline/expression.cpp149
-rw-r--r--src/mongo/db/pipeline/expression.h12
-rw-r--r--src/mongo/db/query/SConscript2
-rw-r--r--src/mongo/db/query/plan_cache_size_parameter.cpp15
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/balancer/balancer.cpp11
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp11
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp1
-rw-r--r--src/mongo/rpc/SConscript2
-rw-r--r--src/mongo/rpc/rewrite_state_change_errors.cpp17
-rw-r--r--src/mongo/s/catalog/SConscript1
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_impl.cpp10
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_test.cpp2
-rw-r--r--src/mongo/shell/SConscript6
-rw-r--r--src/mongo/shell/bench.cpp62
-rw-r--r--src/mongo/shell/bench.h13
-rw-r--r--src/mongo/shell/mongo_main.cpp12
-rw-r--r--src/mongo/unittest/SConscript3
-rw-r--r--src/mongo/unittest/death_test.cpp4
-rw-r--r--src/mongo/unittest/golden_test.cpp6
-rw-r--r--src/mongo/unittest/matcher.cpp12
-rw-r--r--src/mongo/unittest/unittest.cpp12
-rw-r--r--src/mongo/unittest/unittest.h1
-rw-r--r--src/mongo/util/SConscript17
-rw-r--r--src/mongo/util/pcre.cpp6
-rw-r--r--src/mongo/util/pcre.h26
-rw-r--r--src/mongo/util/pcre_test.cpp20
-rw-r--r--src/mongo/util/pcre_util.cpp23
-rw-r--r--src/mongo/util/pcre_util.h9
-rw-r--r--src/mongo/util/pcre_util_test.cpp23
-rw-r--r--src/mongo/util/processinfo_linux.cpp63
-rw-r--r--src/mongo/util/procparser.cpp8
-rw-r--r--src/mongo/util/regex_util.cpp2
-rw-r--r--src/mongo/util/stacktrace_test.cpp36
54 files changed, 367 insertions, 647 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index 6cb7d85b326..9ef161c8bd6 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -233,7 +233,6 @@ baseEnv.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/third_party/shim_intel_decimal128',
- '$BUILD_DIR/third_party/shim_pcrecpp',
'$BUILD_DIR/third_party/shim_unwind' if use_libunwind else [],
'stdx/stdx',
'util/boost_assert_shim',
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index d16f0c9a8a8..53461bbf1e9 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -665,6 +665,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/unittest/unittest',
'$BUILD_DIR/mongo/util/clock_source_mock',
'$BUILD_DIR/mongo/util/fail_point',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'catalog_control',
'catalog_helpers',
'catalog_test_fixture',
diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp
index 8346d0f1a29..1df4bf7fa1b 100644
--- a/src/mongo/db/catalog/database_test.cpp
+++ b/src/mongo/db/catalog/database_test.cpp
@@ -31,7 +31,6 @@
#include <boost/optional/optional_io.hpp>
#include <memory>
-#include <pcrecpp.h>
#include "mongo/bson/util/builder.h"
#include "mongo/db/catalog/collection_catalog.h"
@@ -57,6 +56,7 @@
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/scopeguard.h"
namespace mongo {
@@ -350,10 +350,11 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
ASSERT_TRUE(db);
auto model = "tmp%%%%"_sd;
- pcrecpp::RE re(_nss.db() + "\\.tmp[0-9A-Za-z][0-9A-Za-z][0-9A-Za-z][0-9A-Za-z]");
+ pcre::Regex re(_nss.db() + "\\.tmp[0-9A-Za-z][0-9A-Za-z][0-9A-Za-z][0-9A-Za-z]",
+ pcre::ANCHORED | pcre::ENDANCHORED);
auto nss1 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
- if (!re.FullMatch(nss1.ns())) {
+ if (!re.matchView(nss1.ns())) {
FAIL((StringBuilder() << "First generated namespace \"" << nss1.ns()
<< "\" does not match regular expression \"" << re.pattern()
<< "\"")
@@ -370,7 +371,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
}
auto nss2 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
- if (!re.FullMatch(nss2.ns())) {
+ if (!re.matchView(nss2.ns())) {
FAIL((StringBuilder() << "Second generated namespace \"" << nss2.ns()
<< "\" does not match regular expression \"" << re.pattern()
<< "\"")
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 6ee97450f2b..485ab55fa82 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -28,7 +28,7 @@ env.Library(
'$BUILD_DIR/mongo/db/query/datetime/date_time_support',
'$BUILD_DIR/mongo/db/query/query_index_bounds',
'$BUILD_DIR/mongo/db/storage/key_string',
- '$BUILD_DIR/mongo/util/regex_util',
+ '$BUILD_DIR/mongo/util/pcre_util',
],
)
diff --git a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp
index 8486a7c65f8..e81dcd5627e 100644
--- a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp
+++ b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp
@@ -28,6 +28,7 @@
*/
#include "mongo/db/exec/sbe/expression_test_base.h"
+#include "mongo/util/pcre_util.h"
namespace mongo::sbe {
class SBERegexTest : public EExpressionTestFixture {
@@ -39,7 +40,8 @@ protected:
ASSERT_EQUALS(value::TypeTags::pcreRegex, tag);
auto regex = value::getPcreRegexView(val);
- std::string res = str::stream() << "/" << regex->pattern() << "/" << regex->options();
+ std::string res = str::stream()
+ << "/" << regex->pattern() << "/" << pcre_util::optionsToFlags(regex->options());
ASSERT_EQUALS(res, regexString);
}
diff --git a/src/mongo/db/exec/sbe/values/slot.cpp b/src/mongo/db/exec/sbe/values/slot.cpp
index 45cbc977980..0274df1b6e7 100644
--- a/src/mongo/db/exec/sbe/values/slot.cpp
+++ b/src/mongo/db/exec/sbe/values/slot.cpp
@@ -683,7 +683,7 @@ int getApproximateSize(TypeTags tag, Value val) {
result += ConstDataView(getRawPointerView(val)).read<LittleEndian<uint32_t>>();
break;
case TypeTags::pcreRegex:
- result += getPcreRegexView(val)->getApproximateSize();
+ result += getPcreRegexView(val)->codeSize();
break;
case TypeTags::timeZoneDB:
// This type points to a block of memory that it doesn't own, so we don't acccount
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index 5bbdc40170e..bff73fc7046 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -40,7 +40,8 @@
#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/datetime/date_time_support.h"
#include "mongo/db/storage/key_string.h"
-#include "mongo/util/regex_util.h"
+#include "mongo/util/errno_util.h"
+#include "mongo/util/pcre_util.h"
namespace mongo {
namespace sbe {
@@ -134,45 +135,15 @@ std::pair<TypeTags, Value> makeCopyKeyString(const KeyString::Value& inKey) {
}
std::pair<TypeTags, Value> makeNewPcreRegex(StringData pattern, StringData options) {
- auto regex = std::make_unique<PcreRegex>(pattern, options);
- return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regex.release())};
+ auto regex =
+ std::make_unique<pcre::Regex>(std::string{pattern}, pcre_util::flagsToOptions(options));
+ uassert(5073402, str::stream() << "Invalid Regex: " << errorMessage(regex->error()), *regex);
+ return {TypeTags::pcreRegex, bitcastFrom<pcre::Regex*>(regex.release())};
}
-std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex) {
- auto regexCopy = std::make_unique<PcreRegex>(regex);
- return {TypeTags::pcreRegex, bitcastFrom<PcreRegex*>(regexCopy.release())};
-}
-
-void PcreRegex::_compile() {
- const auto pcreOptions = regex_util::flagsToPcreOptions(_options.c_str()).all_options();
- const char* compile_error;
- int eoffset;
- _pcrePtr = pcre_compile(_pattern.c_str(), pcreOptions, &compile_error, &eoffset, nullptr);
- uassert(5073402, str::stream() << "Invalid Regex: " << compile_error, _pcrePtr != nullptr);
-}
-
-int PcreRegex::execute(StringData stringView, int startPos, std::vector<int>& buf) {
- return pcre_exec(_pcrePtr,
- nullptr,
- stringView.rawData(),
- stringView.size(),
- startPos,
- 0,
- &(buf.front()),
- buf.size());
-}
-
-size_t PcreRegex::getNumberCaptures() const {
- int numCaptures;
- pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_CAPTURECOUNT, &numCaptures);
- invariant(numCaptures >= 0);
- return static_cast<size_t>(numCaptures);
-}
-
-size_t PcreRegex::getApproximateSize() const {
- size_t pcreSize;
- pcre_fullinfo(_pcrePtr, nullptr, PCRE_INFO_SIZE, &pcreSize);
- return sizeof(PcreRegex) + _pattern.size() + 1 + _options.size() + 1 + pcreSize;
+std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex) {
+ auto regexCopy = std::make_unique<pcre::Regex>(regex);
+ return {TypeTags::pcreRegex, bitcastFrom<pcre::Regex*>(regexCopy.release())};
}
KeyString::Value SortSpec::generateSortKey(const BSONObj& obj, const CollatorInterface* collator) {
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index d0202b0f1c0..6ec0652cdf5 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -36,7 +36,6 @@
#include <boost/predef/hardware/simd.h>
#include <cstdint>
#include <ostream>
-#include <pcre.h>
#include <string>
#include <utility>
#include <vector>
@@ -52,6 +51,7 @@
#include "mongo/platform/decimal128.h"
#include "mongo/platform/endian.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/represent_as.h"
namespace mongo {
@@ -874,68 +874,6 @@ private:
bool operator==(const ArraySet& lhs, const ArraySet& rhs);
bool operator!=(const ArraySet& lhs, const ArraySet& rhs);
-/**
- * Implements a wrapper of PCRE regular expression.
- * Storing the pattern and the options allows for copying of the sbe::value::PcreRegex expression,
- * which includes recompilation.
- * The compiled expression pcre* allows for direct usage of the pcre C library functionality.
- */
-class PcreRegex {
-public:
- PcreRegex(StringData pattern, StringData options) : _pattern(pattern), _options(options) {
- _compile();
- }
-
- PcreRegex(const PcreRegex& other) : PcreRegex(other._pattern, other._options) {}
-
- PcreRegex& operator=(const PcreRegex& other) {
- if (this != &other) {
- (*pcre_free)(_pcrePtr);
- _pattern = other._pattern;
- _options = other._options;
- _compile();
- }
- return *this;
- }
-
- ~PcreRegex() {
- (*pcre_free)(_pcrePtr);
- }
-
- const std::string& pattern() const {
- return _pattern;
- }
-
- const std::string& options() const {
- return _options;
- }
-
- /**
- * Wrapper function for pcre_exec().
- * - input: The input string.
- * - startPos: The position from where the search should start.
- * - buf: Array populated with the found matched string and capture groups.
- * Returns the number of matches or an error code:
- * < -1 error
- * = -1 no match
- * = 0 there was a match, but not enough space in the buffer
- * > 0 the number of matches
- */
- int execute(StringData input, int startPos, std::vector<int>& buf);
-
- size_t getNumberCaptures() const;
-
- size_t getApproximateSize() const;
-
-private:
- void _compile();
-
- std::string _pattern;
- std::string _options;
-
- pcre* _pcrePtr = nullptr;
-};
-
constexpr size_t kSmallStringMaxLength = 7;
using ObjectIdType = std::array<uint8_t, 12>;
static_assert(sizeof(ObjectIdType) == 12);
@@ -1218,10 +1156,10 @@ inline KeyString::Value* getKeyStringView(Value val) noexcept {
std::pair<TypeTags, Value> makeNewPcreRegex(StringData pattern, StringData options);
-std::pair<TypeTags, Value> makeCopyPcreRegex(const PcreRegex& regex);
+std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex);
-inline PcreRegex* getPcreRegexView(Value val) noexcept {
- return reinterpret_cast<PcreRegex*>(val);
+inline pcre::Regex* getPcreRegexView(Value val) noexcept {
+ return reinterpret_cast<pcre::Regex*>(val);
}
inline JsFunction* getJsFunctionView(Value val) noexcept {
diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp
index 90a43442329..a04d1407930 100644
--- a/src/mongo/db/exec/sbe/values/value_printer.cpp
+++ b/src/mongo/db/exec/sbe/values/value_printer.cpp
@@ -30,6 +30,7 @@
#include "mongo/db/exec/sbe/values/sort_spec.h"
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/platform/basic.h"
+#include "mongo/util/pcre_util.h"
namespace mongo::sbe::value {
@@ -405,7 +406,8 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth)
}
case TypeTags::pcreRegex: {
auto regex = getPcreRegexView(val);
- stream << "PcreRegex(/" << regex->pattern() << "/" << regex->options() << ")";
+ stream << "PcreRegex(/" << regex->pattern() << "/"
+ << pcre_util::optionsToFlags(regex->options()) << ")";
break;
}
case TypeTags::timeZoneDB: {
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 4f9329e7ed6..6b207f89d85 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -33,7 +33,6 @@
#include "mongo/db/exec/sbe/vm/vm.h"
#include <boost/algorithm/string.hpp>
-#include <pcre.h>
#include "mongo/bson/oid.h"
#include "mongo/db/client.h"
@@ -52,6 +51,7 @@
#include "mongo/db/storage/key_string.h"
#include "mongo/logv2/log.h"
#include "mongo/util/fail_point.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/str.h"
#include "mongo/util/summation.h"
@@ -1160,7 +1160,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::getArraySize(value::Ty
}
break;
}
- default: { return {false, value::TypeTags::Nothing, 0}; }
+ default:
+ return {false, value::TypeTags::Nothing, 0};
}
return {false, value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(result)};
@@ -3670,81 +3671,57 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinSetEquals(Arity
namespace {
/**
- * A helper function to create the result object {"match" : .., "idx" : ..., "captures" :
- * ...} from the result of pcre_exec().
+ * A helper function to extract the next match in the subject string using the compiled regex
+ * pattern.
+ * - pcre: The wrapper object containing the compiled pcre expression
+ * - inputString: The subject string.
+ * - startBytePos: The position from where the search should start given in bytes.
+ * - codePointPos: The same position in terms of code points.
+ * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result
+ * returned is true/false.
*/
-std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject(
- StringData inputString,
- const std::vector<int>& capturesBuffer,
- size_t numCaptures,
- uint32_t& startBytePos,
- uint32_t& codePointPos) {
-
- auto verifyBounds = [&inputString](auto startPos, auto limitPos, auto isCapture) {
- // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1.
- // These bounds cannot occur for a match on the full string.
- if (startPos == -1 && limitPos == -1 && isCapture) {
- return true;
- }
- if (startPos == -1 || limitPos == -1) {
- LOGV2_ERROR(5073412,
- "Unexpected error occurred while executing regexFind.",
- "startPos"_attr = startPos,
- "limitPos"_attr = limitPos);
- return false;
- }
- if (startPos < 0 || static_cast<size_t>(startPos) > inputString.size() || limitPos < 0 ||
- static_cast<size_t>(limitPos) > inputString.size() || startPos > limitPos) {
- LOGV2_ERROR(5073413,
- "Unexpected error occurred while executing regexFind.",
- "startPos"_attr = startPos,
- "limitPos"_attr = limitPos);
- return false;
- }
- return true;
- };
-
- // Extract the matched string: its start and (end+1) indices are in the first two elements of
- // capturesBuffer.
- if (!verifyBounds(capturesBuffer[0], capturesBuffer[1], false)) {
+std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(pcre::Regex* pcre,
+ StringData inputString,
+ uint32_t& startBytePos,
+ uint32_t& codePointPos,
+ bool isMatch) {
+ pcre::MatchData m = pcre->matchView(inputString, {}, startBytePos);
+ if (!m && m.error() != pcre::Errc::ERROR_NOMATCH) {
+ LOGV2_ERROR(5073414,
+ "Error occurred while executing regular expression.",
+ "execResult"_attr = errorMessage(m.error()));
return {false, value::TypeTags::Nothing, 0};
}
- auto matchStartIdx = capturesBuffer[0];
- auto matchedString = inputString.substr(matchStartIdx, capturesBuffer[1] - matchStartIdx);
- auto [matchedTag, matchedVal] = value::makeNewString(matchedString);
- value::ValueGuard matchedGuard{matchedTag, matchedVal};
- // We iterate through the input string's contents preceding the match index, in order to convert
- // the byte offset to a code point offset.
- for (auto byteIdx = startBytePos; byteIdx < static_cast<uint32_t>(matchStartIdx);
- ++codePointPos) {
- byteIdx += str::getCodePointLength(inputString[byteIdx]);
+ if (isMatch) {
+ // $regexMatch returns true or false.
+ return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(!!m)};
+ }
+ // $regexFind and $regexFindAll build result object or return null.
+ if (!m) {
+ return {false, value::TypeTags::Null, 0};
}
- startBytePos = matchStartIdx;
+
+ // Create the result object {"match" : .., "idx" : ..., "captures" : ...}
+ // from the pcre::MatchData.
+ auto [matchedTag, matchedVal] = value::makeNewString(m[0]);
+ value::ValueGuard matchedGuard{matchedTag, matchedVal};
+
+ StringData precedesMatch(m.input().begin() + m.startPos(), m[0].begin());
+ codePointPos += str::lengthInUTF8CodePoints(precedesMatch);
+ startBytePos += precedesMatch.size();
auto [arrTag, arrVal] = value::makeNewArray();
value::ValueGuard arrGuard{arrTag, arrVal};
auto arrayView = value::getArrayView(arrVal);
- // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer'
- // hold the (start, limit) pairs of indexes, for each of the capture groups. We skip the first
- // two elements and start iteration from 3rd element so that we only construct the strings for
- // capture groups.
- if (numCaptures) {
- arrayView->reserve(numCaptures);
- for (size_t i = 0; i < numCaptures; ++i) {
- const auto start = capturesBuffer[2 * (i + 1)];
- const auto limit = capturesBuffer[2 * (i + 1) + 1];
- if (!verifyBounds(start, limit, true)) {
- return {false, value::TypeTags::Nothing, 0};
- }
-
- if (start == -1 && limit == -1) {
- arrayView->push_back(value::TypeTags::Null, 0);
- } else {
- auto captureString = inputString.substr(start, limit - start);
- auto [tag, val] = value::makeNewString(captureString);
- arrayView->push_back(tag, val);
- }
+ arrayView->reserve(m.captureCount());
+ for (size_t i = 0; i < m.captureCount(); ++i) {
+ StringData cap = m[i + 1];
+ if (!cap.rawData()) {
+ arrayView->push_back(value::TypeTags::Null, 0);
+ } else {
+ auto [tag, val] = value::makeNewString(cap);
+ arrayView->push_back(tag, val);
}
}
@@ -3763,75 +3740,6 @@ std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject(
}
/**
- * A helper function to extract the next match in the subject string using the compiled regex
- * pattern.
- * - pcre: The wrapper object containing the compiled pcre expression
- * - inputString: The subject string.
- * - capturesBuffer: Array to be populated with the found matched string and capture groups.
- * - startBytePos: The position from where the search should start given in bytes.
- * - codePointPos: The same position in terms of code points.
- * - isMatch: Boolean flag to mark if the caller function is $regexMatch, in which case the result
- * returned is true/false.
- */
-std::tuple<bool, value::TypeTags, value::Value> pcreNextMatch(value::PcreRegex* pcre,
- StringData inputString,
- std::vector<int>& capturesBuffer,
- uint32_t& startBytePos,
- uint32_t& codePointPos,
- bool isMatch = false) {
- auto execResult = pcre->execute(inputString, startBytePos, capturesBuffer);
-
- auto numCaptures = pcre->getNumberCaptures();
- if (execResult < -1 || execResult > static_cast<int>(numCaptures) + 1) {
- LOGV2_ERROR(5073414,
- "Error occurred while executing regular expression.",
- "execResult"_attr = execResult);
- return {false, value::TypeTags::Nothing, 0};
- }
-
- if (isMatch) {
- // $regexMatch returns true or false.
- bool match = (execResult != PCRE_ERROR_NOMATCH);
- return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(match)};
- } else {
- // $regexFind and $regexFindAll build result object or return null.
- if (execResult == PCRE_ERROR_NOMATCH) {
- return {false, value::TypeTags::Null, 0};
- }
- return buildRegexMatchResultObject(
- inputString, capturesBuffer, numCaptures, startBytePos, codePointPos);
- }
-}
-
-/**
- * A helper function to extract the first match in the subject string using the compiled regex
- * pattern. See 'pcreNextMatch' function for parameters description.
- */
-std::tuple<bool, value::TypeTags, value::Value> pcreFirstMatch(
- value::PcreRegex* pcre,
- StringData inputString,
- bool isMatch = false,
- std::vector<int>* capturesBuffer = nullptr,
- uint32_t* startBytePos = nullptr,
- uint32_t* codePointPos = nullptr) {
- std::vector<int> tmpCapturesBuffer;
- uint32_t tmpStartBytePos = 0;
- uint32_t tmpCodePointPos = 0;
-
- capturesBuffer = capturesBuffer ? capturesBuffer : &tmpCapturesBuffer;
- startBytePos = startBytePos ? startBytePos : &tmpStartBytePos;
- codePointPos = codePointPos ? codePointPos : &tmpCodePointPos;
-
- // The first two-thirds of the capturesBuffer is used to pass back captured substrings' start
- // and (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec()
- // while matching capturing subpatterns, and is not available for passing back information.
- auto numCaptures = pcre->getNumberCaptures();
- capturesBuffer->resize((1 + numCaptures) * 3);
-
- return pcreNextMatch(pcre, inputString, *capturesBuffer, *startBytePos, *codePointPos, isMatch);
-}
-
-/**
* A helper function with common logic for $regexMatch and $regexFind functions. Both extract only
* the first match to a regular expression, but return different result objects.
*/
@@ -3848,7 +3756,9 @@ std::tuple<bool, value::TypeTags, value::Value> genericPcreRegexSingleMatch(
auto inputString = value::getStringOrSymbolView(typeTagInputStr, valueInputStr);
auto pcreRegex = value::getPcreRegexView(valuePcreRegex);
- return pcreFirstMatch(pcreRegex, inputString, isMatch);
+ uint32_t startBytePos = 0;
+ uint32_t codePointPos = 0;
+ return pcreNextMatch(pcreRegex, inputString, startBytePos, codePointPos, isMatch);
}
std::pair<value::TypeTags, value::Value> collComparisonKey(value::TypeTags tag,
@@ -3934,10 +3844,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(Ar
auto inputString = value::getStringView(typeTagInputStr, valueInputStr);
auto pcre = value::getPcreRegexView(valuePcreRegex);
- std::vector<int> capturesBuffer;
uint32_t startBytePos = 0;
uint32_t codePointPos = 0;
- bool isFirstMatch = true;
// Prepare the result array of matching objects.
auto [arrTag, arrVal] = value::makeNewArray();
@@ -3946,14 +3854,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinRegexFindAll(Ar
int resultSize = 0;
do {
- auto [_, matchTag, matchVal] = [&]() {
- if (isFirstMatch) {
- isFirstMatch = false;
- return pcreFirstMatch(
- pcre, inputString, false, &capturesBuffer, &startBytePos, &codePointPos);
- }
- return pcreNextMatch(pcre, inputString, capturesBuffer, startBytePos, codePointPos);
- }();
+ auto [_, matchTag, matchVal] =
+ pcreNextMatch(pcre, inputString, startBytePos, codePointPos, false);
value::ValueGuard matchGuard{matchTag, matchVal};
if (matchTag == value::TypeTags::Null) {
diff --git a/src/mongo/db/matcher/SConscript b/src/mongo/db/matcher/SConscript
index a53845a04f4..115a323091f 100644
--- a/src/mongo/db/matcher/SConscript
+++ b/src/mongo/db/matcher/SConscript
@@ -75,8 +75,7 @@ env.Library(
'$BUILD_DIR/mongo/db/query/query_knobs',
'$BUILD_DIR/mongo/db/stats/counters',
'$BUILD_DIR/mongo/idl/idl_parser',
- '$BUILD_DIR/mongo/util/regex_util',
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'path',
],
)
diff --git a/src/mongo/db/matcher/doc_validation_error.cpp b/src/mongo/db/matcher/doc_validation_error.cpp
index 3810b190132..85b4943f538 100644
--- a/src/mongo/db/matcher/doc_validation_error.cpp
+++ b/src/mongo/db/matcher/doc_validation_error.cpp
@@ -530,7 +530,8 @@ BSONArray findAdditionalProperties(const BSONObj& doc,
if (!properties.contains(fieldName)) {
bool additional = true;
for (auto&& pattern : patternProperties) {
- if (pattern.first.regex->PartialMatch(fieldName.toString())) {
+ auto&& re = pattern.first.regex;
+ if (re && re->matchView(fieldName)) {
additional = false;
break;
}
@@ -583,7 +584,8 @@ BSONElement findFailingProperty(const InternalSchemaAllowedPropertiesMatchExpres
auto filter = patternSchema.second->getFilter();
for (auto&& elem : ctx->getCurrentDocument()) {
auto field = elem.fieldNameStringData();
- if (pattern.regex->PartialMatch(field.toString()) && !filter->matchesBSONElement(elem)) {
+ auto&& re = pattern.regex;
+ if (re && *re && re->matchView(field) && !filter->matchesBSONElement(elem)) {
return elem;
}
}
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp
index 31157666e92..b5a6c0b1d8e 100644
--- a/src/mongo/db/matcher/expression_leaf.cpp
+++ b/src/mongo/db/matcher/expression_leaf.cpp
@@ -33,7 +33,6 @@
#include <cmath>
#include <memory>
-#include <pcrecpp.h>
#include "mongo/bson/bsonelement_comparator.h"
#include "mongo/bson/bsonmisc.h"
@@ -44,7 +43,9 @@
#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/matcher/path.h"
#include "mongo/db/query/collation/collator_interface.h"
-#include "mongo/util/regex_util.h"
+#include "mongo/util/errno_util.h"
+#include "mongo/util/pcre.h"
+#include "mongo/util/pcre_util.h"
#include "mongo/util/represent_as.h"
#include "mongo/util/str.h"
@@ -226,9 +227,9 @@ constexpr StringData GTEMatchExpression::kName;
const std::set<char> RegexMatchExpression::kValidRegexFlags = {'i', 'm', 's', 'x'};
-std::unique_ptr<pcrecpp::RE> RegexMatchExpression::makeRegex(const std::string& regex,
+std::unique_ptr<pcre::Regex> RegexMatchExpression::makeRegex(const std::string& regex,
const std::string& flags) {
- return std::make_unique<pcrecpp::RE>(regex.c_str(), regex_util::flagsToPcreOptions(flags));
+ return std::make_unique<pcre::Regex>(regex, pcre_util::flagsToOptions(flags));
}
RegexMatchExpression::RegexMatchExpression(StringData path,
@@ -238,15 +239,15 @@ RegexMatchExpression::RegexMatchExpression(StringData path,
: LeafMatchExpression(REGEX, path, std::move(annotation)),
_regex(regex.toString()),
_flags(options.toString()),
- _re(new pcrecpp::RE(_regex.c_str(), regex_util::flagsToPcreOptions(_flags))) {
+ _re(makeRegex(_regex, _flags)) {
uassert(ErrorCodes::BadValue,
"Regular expression cannot contain an embedded null byte",
_regex.find('\0') == std::string::npos);
uassert(51091,
- str::stream() << "Regular expression is invalid: " << _re->error(),
- _re->error().empty());
+ str::stream() << "Regular expression is invalid: " << errorMessage(_re->error()),
+ *_re);
}
RegexMatchExpression::~RegexMatchExpression() {}
@@ -263,14 +264,8 @@ bool RegexMatchExpression::equivalent(const MatchExpression* other) const {
bool RegexMatchExpression::matchesSingleElement(const BSONElement& e, MatchDetails* details) const {
switch (e.type()) {
case String:
- case Symbol: {
- // String values stored in documents can contain embedded NUL bytes. We construct a
- // pcrecpp::StringPiece instance using the full length of the string to avoid truncating
- // 'data' early.
- auto stringData = e.valueStringData();
- pcrecpp::StringPiece data{stringData.rawData(), static_cast<int>(stringData.size())};
- return _re->PartialMatch(data);
- }
+ case Symbol:
+ return !!_re->matchView(e.valueStringData());
case RegEx:
return _regex == e.regex() && _flags == e.regexFlags();
default:
diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h
index 46a80aa5e91..14fceae1f5f 100644
--- a/src/mongo/db/matcher/expression_leaf.h
+++ b/src/mongo/db/matcher/expression_leaf.h
@@ -42,10 +42,7 @@
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/stdx/unordered_map.h"
#include "mongo/util/assert_util.h"
-
-namespace pcrecpp {
-class RE;
-} // namespace pcrecpp
+#include "mongo/util/pcre.h"
namespace mongo {
@@ -469,7 +466,7 @@ class RegexMatchExpression : public LeafMatchExpression {
public:
static const std::set<char> kValidRegexFlags;
- static std::unique_ptr<pcrecpp::RE> makeRegex(const std::string& regex,
+ static std::unique_ptr<pcre::Regex> makeRegex(const std::string& regex,
const std::string& flags);
RegexMatchExpression(StringData path, Value e, clonable_ptr<ErrorAnnotation> annotation)
@@ -554,7 +551,7 @@ private:
std::string _regex;
std::string _flags;
- std::unique_ptr<pcrecpp::RE> _re;
+ std::unique_ptr<pcre::Regex> _re;
boost::optional<InputParamId> _sourceRegexInputParamId;
boost::optional<InputParamId> _compiledRegexInputParamId;
diff --git a/src/mongo/db/matcher/expression_parser.cpp b/src/mongo/db/matcher/expression_parser.cpp
index e6529908910..caef2981ef1 100644
--- a/src/mongo/db/matcher/expression_parser.cpp
+++ b/src/mongo/db/matcher/expression_parser.cpp
@@ -32,7 +32,6 @@
#include "mongo/db/matcher/expression_parser.h"
#include <memory>
-#include <pcrecpp.h>
#include "mongo/base/init.h"
#include "mongo/bson/bsonmisc.h"
@@ -74,10 +73,9 @@
#include "mongo/util/str.h"
#include "mongo/util/string_map.h"
+namespace mongo {
namespace {
-using namespace mongo;
-
/**
* Returns true if subtree contains MatchExpression 'type'.
*/
@@ -125,8 +123,6 @@ void addExpressionToRoot(const boost::intrusive_ptr<ExpressionContext>& expCtx,
}
} // namespace
-namespace mongo {
-
using ErrorAnnotation = MatchExpression::ErrorAnnotation;
using AnnotationMode = ErrorAnnotation::Mode;
diff --git a/src/mongo/db/matcher/expression_with_placeholder.cpp b/src/mongo/db/matcher/expression_with_placeholder.cpp
index ca0adfba08b..c4bc9d05dca 100644
--- a/src/mongo/db/matcher/expression_with_placeholder.cpp
+++ b/src/mongo/db/matcher/expression_with_placeholder.cpp
@@ -31,10 +31,9 @@
#include "mongo/db/matcher/expression_with_placeholder.h"
-#include <pcrecpp.h>
-
#include "mongo/base/string_data.h"
#include "mongo/db/matcher/expression_parser.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/static_immortal.h"
namespace mongo {
@@ -43,8 +42,8 @@ namespace {
bool matchesPlaceholderPattern(StringData placeholder) {
// The placeholder must begin with a lowercase letter and contain no special characters.
- static StaticImmortal<pcrecpp::RE> kRe("[[:lower:]][[:alnum:]]*");
- return kRe->FullMatch(pcrecpp::StringPiece(placeholder.rawData(), placeholder.size()));
+ static StaticImmortal<pcre::Regex> kRe("^[[:lower:]][[:alnum:]]*$");
+ return !!kRe->matchView(placeholder);
}
/**
diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
index 64b34aafc3a..861bdc2d989 100644
--- a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
+++ b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
@@ -30,6 +30,7 @@
#include "mongo/platform/basic.h"
#include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h"
+#include "mongo/util/errno_util.h"
namespace mongo {
constexpr StringData InternalSchemaAllowedPropertiesMatchExpression::kName;
@@ -47,10 +48,10 @@ InternalSchemaAllowedPropertiesMatchExpression::InternalSchemaAllowedPropertiesM
_otherwise(std::move(otherwise)) {
for (auto&& constraint : _patternProperties) {
- const auto& errorStr = constraint.first.regex->error();
+ const auto& re = constraint.first.regex;
uassert(ErrorCodes::BadValue,
- str::stream() << "Invalid regular expression: " << errorStr,
- errorStr.empty());
+ str::stream() << "Invalid regular expression: " << errorMessage(re->error()),
+ *re);
}
}
@@ -107,7 +108,7 @@ bool InternalSchemaAllowedPropertiesMatchExpression::_matchesBSONObj(const BSONO
for (auto&& property : obj) {
bool checkOtherwise = true;
for (auto&& constraint : _patternProperties) {
- if (constraint.first.regex->PartialMatch(property.fieldName())) {
+ if (constraint.first.regex->matchView(property.fieldName())) {
checkOtherwise = false;
if (!constraint.second->matchesBSONElement(property)) {
return false;
diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h
index e95d0582d15..08e1a1affc6 100644
--- a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h
+++ b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h
@@ -31,12 +31,12 @@
#include <boost/optional.hpp>
#include <memory>
-#include <pcrecpp.h>
#include <utility>
#include <vector>
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_with_placeholder.h"
+#include "mongo/util/pcre.h"
namespace mongo {
@@ -90,15 +90,15 @@ namespace mongo {
class InternalSchemaAllowedPropertiesMatchExpression final : public MatchExpression {
public:
/**
- * A container for regular expression data. Holds a pcrecpp::RE object, as well as the original
+ * A container for regular expression data. Holds a regex object, as well as the original
* string pattern, which is used for comparisons and serialization.
*/
struct Pattern {
explicit Pattern(StringData pattern)
- : rawRegex(pattern), regex(std::make_unique<pcrecpp::RE>(pattern.toString())) {}
+ : rawRegex(pattern), regex(std::make_unique<pcre::Regex>(std::string{rawRegex})) {}
StringData rawRegex;
- std::unique_ptr<pcrecpp::RE> regex;
+ std::unique_ptr<pcre::Regex> regex;
};
/**
diff --git a/src/mongo/db/matcher/schema/json_schema_parser.cpp b/src/mongo/db/matcher/schema/json_schema_parser.cpp
index dab65bab837..7b664daac63 100644
--- a/src/mongo/db/matcher/schema/json_schema_parser.cpp
+++ b/src/mongo/db/matcher/schema/json_schema_parser.cpp
@@ -739,8 +739,7 @@ StatusWithMatchExpression parseAllowedProperties(
// that can't match documents.
if (requiredMissingID) {
for (const auto& pattern : patternPropertiesVec) {
- // for (int i = 0; i < patternPropertiesVec.size(); ++i) {
- if (pattern.first.regex->FullMatch("_id")) {
+ if (pattern.first.regex->matchView("_id", pcre::ANCHORED | pcre::ENDANCHORED)) {
requiredMissingID = false;
break;
}
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 96c7d59a025..68fd5936a8d 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -121,7 +121,7 @@ env.Library(
'$BUILD_DIR/mongo/scripting/scripting',
'$BUILD_DIR/mongo/scripting/scripting_common',
'$BUILD_DIR/mongo/util/intrusive_counter',
- '$BUILD_DIR/mongo/util/regex_util',
+ '$BUILD_DIR/mongo/util/pcre_util',
'$BUILD_DIR/mongo/util/summation',
'aggregation_request_helper',
'dependencies',
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 464d2ad6953..774a337f740 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -35,7 +35,6 @@
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <cstdio>
-#include <pcrecpp.h>
#include <utility>
#include <vector>
@@ -57,7 +56,9 @@
#include "mongo/db/stats/counters.h"
#include "mongo/platform/bits.h"
#include "mongo/platform/decimal128.h"
-#include "mongo/util/regex_util.h"
+#include "mongo/util/errno_util.h"
+#include "mongo/util/pcre.h"
+#include "mongo/util/pcre_util.h"
#include "mongo/util/str.h"
#include "mongo/util/string_map.h"
#include "mongo/util/summation.h"
@@ -7038,105 +7039,46 @@ ExpressionRegex::RegexExecutionState ExpressionRegex::buildInitialState(
return executionState;
}
-int ExpressionRegex::execute(RegexExecutionState* regexState) const {
+pcre::MatchData ExpressionRegex::execute(RegexExecutionState* regexState) const {
invariant(regexState);
invariant(!regexState->nullish());
invariant(regexState->pcrePtr);
- int execResult = pcre_exec(regexState->pcrePtr.get(),
- nullptr,
- regexState->input->c_str(),
- regexState->input->size(),
- regexState->startBytePos,
- 0, // No need to overwrite the options set during pcre_compile.
- &(regexState->capturesBuffer.front()),
- regexState->capturesBuffer.size());
- // The 'execResult' will be -1 if there is no match, 0 < execResult <= (numCaptures + 1)
- // depending on how many capture groups match, negative (other than -1) if there is an error
- // during execution, and zero if capturesBuffer's capacity is not sufficient to hold all the
- // results. The latter scenario should never occur.
+ StringData in = *regexState->input;
+ auto m = regexState->pcrePtr->matchView(in, {}, regexState->startBytePos);
uassert(51156,
str::stream() << "Error occurred while executing the regular expression in " << _opName
- << ". Result code: " << execResult,
- execResult == -1 || (execResult > 0 && execResult <= (regexState->numCaptures + 1)));
- return execResult;
+ << ". Result code: " << errorMessage(m.error()),
+ m || m.error() == pcre::Errc::ERROR_NOMATCH);
+ return m;
}
Value ExpressionRegex::nextMatch(RegexExecutionState* regexState) const {
- int execResult = execute(regexState);
-
- // No match.
- if (execResult < 0) {
+ auto m = execute(regexState);
+ if (!m)
+ // No match.
return Value(BSONNULL);
- }
-
- // Use 'input' as StringData throughout the function to avoid copying the string on 'substr'
- // calls.
- StringData input = *(regexState->input);
-
- auto verifyBounds = [&input, this](auto startPos, auto limitPos, auto isCapture) {
- // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1.
- // These bounds cannot occur for a match on the full string.
- if (startPos == -1 || limitPos == -1) {
- massert(31304,
- str::stream() << "Unexpected error occurred while executing " << _opName
- << ". startPos: " << startPos << ", limitPos: " << limitPos,
- isCapture && startPos == -1 && limitPos == -1);
- return;
- }
- massert(31305,
- str::stream() << "Unexpected error occurred while executing " << _opName
- << ". startPos: " << startPos,
- (startPos >= 0 && static_cast<size_t>(startPos) <= input.size()));
- massert(31306,
- str::stream() << "Unexpected error occurred while executing " << _opName
- << ". limitPos: " << limitPos,
- (limitPos >= 0 && static_cast<size_t>(limitPos) <= input.size()));
- massert(31307,
- str::stream() << "Unexpected error occurred while executing " << _opName
- << ". startPos: " << startPos << ", limitPos: " << limitPos,
- startPos <= limitPos);
- };
-
- // The first and second entries of the 'capturesBuffer' will have the start and (end+1) indices
- // of the matched string, as byte offsets. '(limit - startIndex)' would be the length of the
- // captured string.
- verifyBounds(regexState->capturesBuffer[0], regexState->capturesBuffer[1], false);
- const int matchStartByteIndex = regexState->capturesBuffer[0];
- StringData matchedStr =
- input.substr(matchStartByteIndex, regexState->capturesBuffer[1] - matchStartByteIndex);
-
- // We iterate through the input string's contents preceding the match index, in order to convert
- // the byte offset to a code point offset.
- for (int byteIx = regexState->startBytePos; byteIx < matchStartByteIndex;
- ++(regexState->startCodePointPos)) {
- byteIx += str::getCodePointLength(input[byteIx]);
- }
+ StringData beforeMatch(m.input().begin() + m.startPos(), m[0].begin());
+ regexState->startCodePointPos += str::lengthInUTF8CodePoints(beforeMatch);
// Set the start index for match to the new one.
- regexState->startBytePos = matchStartByteIndex;
+ regexState->startBytePos = m[0].begin() - m.input().begin();
std::vector<Value> captures;
- captures.reserve(regexState->numCaptures);
+ captures.reserve(m.captureCount());
- // The next '2 * numCaptures' entries (after the first two entries) of 'capturesBuffer' will
- // hold the start index and limit pairs, for each of the capture groups. We skip the first two
- // elements and start iteration from 3rd element so that we only construct the strings for
- // capture groups.
- for (int i = 0; i < regexState->numCaptures; ++i) {
- const int start = regexState->capturesBuffer[2 * (i + 1)];
- const int limit = regexState->capturesBuffer[2 * (i + 1) + 1];
- verifyBounds(start, limit, true);
-
- // The 'start' and 'limit' will be set to -1, if the 'input' didn't match the current
- // capture group. In this case we put a 'null' placeholder in place of the capture group.
- captures.push_back(start == -1 && limit == -1 ? Value(BSONNULL)
- : Value(input.substr(start, limit - start)));
+ for (size_t i = 1; i < m.captureCount() + 1; ++i) {
+ if (StringData cap = m[i]; !cap.rawData()) {
+ // Use BSONNULL placeholder for unmatched capture groups.
+ captures.push_back(Value(BSONNULL));
+ } else {
+ captures.push_back(Value(cap));
+ }
}
MutableDocument match;
- match.addField("match", Value(matchedStr));
+ match.addField("match", Value(m[0]));
match.addField("idx", Value(regexState->startCodePointPos));
match.addField("captures", Value(captures));
return match.freezeToValue();
@@ -7161,41 +7103,20 @@ boost::intrusive_ptr<Expression> ExpressionRegex::optimize() {
}
void ExpressionRegex::_compile(RegexExecutionState* executionState) const {
-
- const auto pcreOptions =
- regex_util::flagsToPcreOptions(executionState->options.value_or(""), _opName).all_options();
-
if (!executionState->pattern) {
return;
}
- const char* compile_error;
- int eoffset;
-
- // The C++ interface pcreccp.h doesn't have a way to capture the matched string (or the index of
- // the match). So we are using the C interface. First we compile all the regex options to
- // generate pcre object, which will later be used to match against the input string.
- executionState->pcrePtr = std::shared_ptr<pcre>(
- pcre_compile(
- executionState->pattern->c_str(), pcreOptions, &compile_error, &eoffset, nullptr),
- pcre_free);
+ auto re = std::make_shared<pcre::Regex>(
+ *executionState->pattern,
+ pcre_util::flagsToOptions(executionState->options.value_or(""), _opName));
uassert(51111,
- str::stream() << "Invalid Regex in " << _opName << ": " << compile_error,
- executionState->pcrePtr);
+ str::stream() << "Invalid Regex in " << _opName << ": " << errorMessage(re->error()),
+ *re);
+ executionState->pcrePtr = std::move(re);
// Calculate the number of capture groups present in 'pattern' and store in 'numCaptures'.
- const int pcre_retval = pcre_fullinfo(executionState->pcrePtr.get(),
- nullptr,
- PCRE_INFO_CAPTURECOUNT,
- &executionState->numCaptures);
- invariant(pcre_retval == 0);
-
- // The first two-thirds of the vector is used to pass back captured substrings' start and
- // (end+1) indexes. The remaining third of the vector is used as workspace by pcre_exec() while
- // matching capturing subpatterns, and is not available for passing back information.
- // pcre_compile will error if there are too many capture groups in the pattern. As long as this
- // memory is allocated after compile, the amount of memory allocated will not be too high.
- executionState->capturesBuffer.resize((1 + executionState->numCaptures) * 3);
+ executionState->numCaptures = executionState->pcrePtr->captureCount();
}
Value ExpressionRegex::serialize(bool explain) const {
@@ -7420,9 +7341,11 @@ boost::intrusive_ptr<Expression> ExpressionRegexMatch::parse(ExpressionContext*
}
Value ExpressionRegexMatch::evaluate(const Document& root, Variables* variables) const {
- auto executionState = buildInitialState(root, variables);
- // Return output of execute only if regex is not nullish.
- return executionState.nullish() ? Value(false) : Value(execute(&executionState) > 0);
+ auto state = buildInitialState(root, variables);
+ if (state.nullish())
+ return Value(false);
+ pcre::MatchData m = execute(&state);
+ return Value(!!m);
}
/* -------------------------- ExpressionRandom ------------------------------ */
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index 4b5745bb2b6..837513770fb 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -36,7 +36,6 @@
#include <boost/intrusive_ptr.hpp>
#include <functional>
#include <map>
-#include <pcre.h>
#include <string>
#include <utility>
#include <vector>
@@ -57,6 +56,7 @@
#include "mongo/db/server_options.h"
#include "mongo/db/update/pattern_cmp.h"
#include "mongo/util/intrusive_counter.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/str.h"
namespace mongo {
@@ -3719,7 +3719,7 @@ public:
* and '_initialExecStateForConstantRegex'. If not, then the active RegexExecutionState is
* the sole owner.
*/
- std::shared_ptr<pcre> pcrePtr;
+ std::shared_ptr<pcre::Regex> pcrePtr;
/**
* The input text and starting position for the current execution context.
@@ -3744,11 +3744,11 @@ public:
RegexExecutionState buildInitialState(const Document& root, Variables* variables) const;
/**
- * Checks if there is a match for the given input and pattern that are part of 'executionState'.
- * The method will return a positive number if there is a match and '-1' if there is no match.
- * Throws 'uassert()' for any errors.
+ * Checks if there is a match for the input, options, and pattern of 'executionState'.
+ * Returns the pcre::MatchData yielded by that match operation.
+ * Will uassert for any errors other than `pcre::Errc::ERROR_NOMATCH`.
*/
- int execute(RegexExecutionState* executionState) const;
+ pcre::MatchData execute(RegexExecutionState* executionState) const;
/**
* Finds the next possible match for the given input and pattern that are part of
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index ed8d4605af1..2f3a46adb83 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -268,7 +268,7 @@ env.Library(
"$BUILD_DIR/mongo/db/service_context",
'$BUILD_DIR/mongo/idl/feature_flag',
'$BUILD_DIR/mongo/idl/server_parameter',
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
],
)
diff --git a/src/mongo/db/query/plan_cache_size_parameter.cpp b/src/mongo/db/query/plan_cache_size_parameter.cpp
index 46e42efafbf..5f1f66bcaf8 100644
--- a/src/mongo/db/query/plan_cache_size_parameter.cpp
+++ b/src/mongo/db/query/plan_cache_size_parameter.cpp
@@ -29,9 +29,8 @@
#include "mongo/db/query/plan_cache_size_parameter.h"
-#include <pcrecpp.h>
-
#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/util/pcre.h"
namespace mongo::plan_cache_util {
@@ -52,16 +51,14 @@ StatusWith<PlanCacheSizeUnits> parseUnitString(const std::string& strUnit) {
}
StatusWith<PlanCacheSizeParameter> PlanCacheSizeParameter::parse(const std::string& str) {
- pcrecpp::RE_Options opt;
- opt.set_caseless(true);
// Looks for a floating point number with followed by a unit suffix (MB, GB, %).
- pcrecpp::RE re("\\s*(\\d+\\.?\\d*)\\s*(MB|GB|%)\\s*", opt);
-
- double size{};
- std::string strUnit{};
- if (!re.FullMatch(str, &size, &strUnit)) {
+ static auto& re = *new pcre::Regex(R"re((?i)^\s*(\d+\.?\d*)\s*(MB|GB|%)\s*$)re");
+ auto m = re.matchView(str);
+ if (!m) {
return {ErrorCodes::Error{6007012}, "Unable to parse plan cache size string"};
}
+ double size = std::stod(std::string{m[1]});
+ std::string strUnit{m[2]};
auto statusWithUnit = parseUnitString(strUnit);
if (!statusWithUnit.isOK()) {
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 679586aab68..0e9631f111b 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -317,6 +317,7 @@ env.Library(
'$BUILD_DIR/mongo/s/coreshard',
'$BUILD_DIR/mongo/s/query/cluster_aggregate',
'$BUILD_DIR/mongo/util/log_and_backoff',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'forwardable_operation_metadata',
'sharding_logging',
'user_writes_recoverable_critical_section',
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp
index fc2c42a59c1..22d0d7faa45 100644
--- a/src/mongo/db/s/balancer/balancer.cpp
+++ b/src/mongo/db/s/balancer/balancer.cpp
@@ -32,7 +32,6 @@
#include <algorithm>
#include <memory>
-#include <pcrecpp.h>
#include <string>
#include "mongo/base/status_with.h"
@@ -64,6 +63,7 @@
#include "mongo/util/concurrency/idle_thread_block.h"
#include "mongo/util/exit.h"
#include "mongo/util/fail_point.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/timer.h"
#include "mongo/util/version.h"
@@ -143,13 +143,12 @@ private:
* in the cluster.
*/
void warnOnMultiVersion(const vector<ClusterStatistics::ShardStatistics>& clusterStats) {
- static const auto& majorMinorRE = *new pcrecpp::RE(R"re(^(\d+)\.(\d+)\.)re");
+ static const auto& majorMinorRE = *new pcre::Regex(R"re(^(\d+)\.(\d+)\.)re");
auto&& vii = VersionInfoInterface::instance();
auto hasMyVersion = [&](auto&& stat) {
- int major;
- int minor;
- return majorMinorRE.PartialMatch(pcrecpp::StringPiece(stat.mongoVersion), &major, &minor) &&
- major == vii.majorVersion() && minor == vii.minorVersion();
+ auto m = majorMinorRE.match(stat.mongoVersion);
+ return m && std::stoi(std::string{m[1]}) == vii.majorVersion() &&
+ std::stoi(std::string{m[2]}) == vii.minorVersion();
};
// If we're all the same version, don't message
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
index 83462451f76..2af9f43bc2d 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
@@ -30,7 +30,7 @@
#include "mongo/db/s/config/sharding_catalog_manager.h"
-#include <pcrecpp.h>
+#include <fmt/format.h>
#include "mongo/bson/util/bson_extract.h"
#include "mongo/db/dbdirectclient.h"
@@ -48,6 +48,8 @@
#include "mongo/s/grid.h"
#include "mongo/s/shard_util.h"
#include "mongo/s/sharding_feature_flags_gen.h"
+#include "mongo/util/pcre.h"
+#include "mongo/util/pcre_util.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
@@ -55,6 +57,8 @@
namespace mongo {
namespace {
+using namespace fmt::literals;
+
/**
* Selects an optimal shard on which to place a newly created database from the set of available
* shards. Will return ShardNotFound if shard could not be found.
@@ -150,9 +154,8 @@ DatabaseType ShardingCatalogManager::createDatabase(
// Check if a database already exists with the same name (case sensitive), and if so, return the
// existing entry.
BSONObjBuilder queryBuilder;
- queryBuilder.appendRegex(DatabaseType::kNameFieldName,
- (std::string) "^" + pcrecpp::RE::QuoteMeta(dbName.toString()) + "$",
- "i");
+ queryBuilder.appendRegex(
+ DatabaseType::kNameFieldName, "^{}$"_format(pcre_util::quoteMeta(dbName)), "i");
auto dbDoc = client.findOne(NamespaceString::kConfigDatabasesNamespace, queryBuilder.obj());
auto const [primaryShardPtr, database] = [&] {
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index 947ec9fb3c2..27bbbdb74b5 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -33,7 +33,6 @@
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include <iomanip>
-#include <pcrecpp.h>
#include <set>
#include "mongo/base/status_with.h"
diff --git a/src/mongo/rpc/SConscript b/src/mongo/rpc/SConscript
index 1184b58d111..a2c8fc5473f 100644
--- a/src/mongo/rpc/SConscript
+++ b/src/mongo/rpc/SConscript
@@ -91,7 +91,7 @@ env.Library(
'$BUILD_DIR/mongo/bson/mutable/mutable_bson',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/s/is_mongos',
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'message',
],
)
diff --git a/src/mongo/rpc/rewrite_state_change_errors.cpp b/src/mongo/rpc/rewrite_state_change_errors.cpp
index 7ee8621307c..7f09b36806e 100644
--- a/src/mongo/rpc/rewrite_state_change_errors.cpp
+++ b/src/mongo/rpc/rewrite_state_change_errors.cpp
@@ -37,7 +37,6 @@
#include <boost/optional.hpp>
#include <fmt/format.h>
-#include <pcrecpp.h>
#include "mongo/bson/mutable/document.h"
#include "mongo/bson/mutable/element.h"
@@ -51,6 +50,7 @@
#include "mongo/rpc/rewrite_state_change_errors_server_parameter_gen.h"
#include "mongo/s/is_mongos.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/static_immortal.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork
@@ -80,12 +80,13 @@ auto enabledForOperation = OperationContext::declareDecoration<RewriteEnabled>()
*/
boost::optional<std::string> scrubErrmsg(StringData val) {
struct Scrub {
- pcrecpp::RE pat;
+ Scrub(std::string pat, std::string sub) : pat(std::move(pat)), sub(std::move(sub)) {}
+ pcre::Regex pat;
std::string sub;
};
static const StaticImmortal scrubs = std::array{
- Scrub{pcrecpp::RE("not master"), "(NOT_PRIMARY)"},
- Scrub{pcrecpp::RE("node is recovering"), "(NODE_IS_RECOVERING)"},
+ Scrub{"not master", "(NOT_PRIMARY)"},
+ Scrub{"node is recovering", "(NODE_IS_RECOVERING)"},
};
// Fast scan for the common case that no key phrase is present.
static const StaticImmortal fastScan = [] {
@@ -96,16 +97,14 @@ boost::optional<std::string> scrubErrmsg(StringData val) {
out = format_to(out, FMT_STRING("{}({})"), sep, scrub.pat.pattern());
sep = "|"_sd;
}
- return pcrecpp::RE(pat);
+ return pcre::Regex(pat);
}();
- pcrecpp::StringPiece pcreVal(val.rawData(), val.size());
-
- if (fastScan->PartialMatch(pcreVal)) {
+ if (fastScan->matchView(val)) {
std::string s{val};
bool didSub = false;
for (auto&& scrub : *scrubs) {
- bool subOk = scrub.pat.GlobalReplace(scrub.sub, &s);
+ bool subOk = scrub.pat.substitute(scrub.sub, &s, pcre::SUBSTITUTE_GLOBAL);
didSub = (didSub || subOk);
}
if (didSub)
diff --git a/src/mongo/s/catalog/SConscript b/src/mongo/s/catalog/SConscript
index 6b6d5a43151..53ce04524d4 100644
--- a/src/mongo/s/catalog/SConscript
+++ b/src/mongo/s/catalog/SConscript
@@ -30,6 +30,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/logical_session_id_helpers',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
],
)
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
index 55ae5007f48..5941d8b3499 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
@@ -32,8 +32,8 @@
#include "mongo/s/catalog/sharding_catalog_client_impl.h"
+#include <fmt/format.h>
#include <iomanip>
-#include <pcrecpp.h>
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/util/bson_extract.h"
@@ -72,6 +72,8 @@
#include "mongo/s/write_ops/batched_command_response.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/net/hostandport.h"
+#include "mongo/util/pcre.h"
+#include "mongo/util/pcre_util.h"
#include "mongo/util/str.h"
#include "mongo/util/time_support.h"
@@ -90,6 +92,8 @@ using str::stream;
namespace {
+using namespace fmt::literals;
+
const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{});
const ReadPreferenceSetting kConfigPrimaryPreferredSelector(ReadPreference::PrimaryPreferred,
TagSet{});
@@ -455,9 +459,7 @@ std::vector<CollectionType> ShardingCatalogClientImpl::getCollections(
OperationContext* opCtx, StringData dbName, repl::ReadConcernLevel readConcernLevel) {
BSONObjBuilder b;
if (!dbName.empty())
- b.appendRegex(CollectionType::kNssFieldName,
- std::string(str::stream()
- << "^" << pcrecpp::RE::QuoteMeta(dbName.toString()) << "\\."));
+ b.appendRegex(CollectionType::kNssFieldName, "^{}\\."_format(pcre_util::quoteMeta(dbName)));
auto collDocs = uassertStatusOK(_exhaustiveFindOnConfig(opCtx,
kConfigReadSelector,
diff --git a/src/mongo/s/catalog/sharding_catalog_client_test.cpp b/src/mongo/s/catalog/sharding_catalog_client_test.cpp
index 92f2ce78a8d..be537a1341a 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_test.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_test.cpp
@@ -27,7 +27,7 @@
* it in the license file.
*/
-#include <pcrecpp.h>
+#include "mongo/platform/basic.h"
#include "mongo/bson/json.h"
#include "mongo/client/remote_command_targeter_mock.h"
diff --git a/src/mongo/shell/SConscript b/src/mongo/shell/SConscript
index 650f8f72f1f..9a7ea68c5e9 100644
--- a/src/mongo/shell/SConscript
+++ b/src/mongo/shell/SConscript
@@ -23,7 +23,8 @@ env.Library(
'$BUILD_DIR/mongo/scripting/bson_template_evaluator',
],
LIBDEPS_PRIVATE=[
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/pcre_util',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
],
)
@@ -277,7 +278,6 @@ if not has_option('noshell') and jsEngine:
"$BUILD_DIR/mongo/util/processinfo",
"$BUILD_DIR/mongo/util/signal_handlers",
"$BUILD_DIR/mongo/util/version_impl",
- "$BUILD_DIR/third_party/shim_pcrecpp",
"benchrun",
"encrypted_dbclient" if get_option('ssl') == 'on' else '',
"kms_shell" if get_option('ssl') == 'on' else '',
@@ -305,8 +305,8 @@ if not has_option('noshell') and jsEngine:
"$BUILD_DIR/mongo/s/write_ops/batch_write_types",
"$BUILD_DIR/mongo/transport/transport_layer",
"$BUILD_DIR/mongo/util/net/ssl_manager",
+ "$BUILD_DIR/mongo/util/pcre_wrapper",
"$BUILD_DIR/mongo/util/signal_handlers",
- "$BUILD_DIR/third_party/shim_pcrecpp",
"linenoise",
"mongo_initializers",
"shell_utils",
diff --git a/src/mongo/shell/bench.cpp b/src/mongo/shell/bench.cpp
index d0c02886304..0c7b257e5de 100644
--- a/src/mongo/shell/bench.cpp
+++ b/src/mongo/shell/bench.cpp
@@ -32,7 +32,6 @@
#include "mongo/shell/bench.h"
-#include <pcrecpp.h>
#include <string>
#include "mongo/base/shim.h"
@@ -45,6 +44,8 @@
#include "mongo/scripting/bson_template_evaluator.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/md5.h"
+#include "mongo/util/pcre.h"
+#include "mongo/util/pcre_util.h"
#include "mongo/util/time_support.h"
#include "mongo/util/timer.h"
#include "mongo/util/version.h"
@@ -95,21 +96,6 @@ private:
BenchRunState& _brState;
};
-pcrecpp::RE_Options flags2options(const char* flags) {
- pcrecpp::RE_Options options;
- options.set_utf8(true);
- while (flags && *flags) {
- if (*flags == 'i')
- options.set_caseless(true);
- else if (*flags == 'm')
- options.set_multiline(true);
- else if (*flags == 'x')
- options.set_extended(true);
- flags++;
- }
- return options;
-}
-
bool hasSpecial(const BSONObj& obj) {
BSONObjIterator i(obj);
while (i.more()) {
@@ -673,6 +659,11 @@ BenchRunOp opFromBson(const BSONObj& op) {
void BenchRunConfig::initializeFromBson(const BSONObj& args) {
initializeToDefaults();
+ auto argToRegex = [](auto&& arg) {
+ return std::make_shared<pcre::Regex>(arg.regex(),
+ pcre_util::flagsToOptions(arg.regexFlags()));
+ };
+
for (auto arg : args) {
auto name = arg.fieldNameStringData();
if (name == "host") {
@@ -750,25 +741,13 @@ void BenchRunConfig::initializeFromBson(const BSONObj& args) {
} else if (name == "breakOnTrap") {
breakOnTrap = arg.trueValue();
} else if (name == "trapPattern") {
- const char* regex = arg.regex();
- const char* flags = arg.regexFlags();
- trapPattern =
- std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags)));
+ trapPattern = argToRegex(arg);
} else if (name == "noTrapPattern") {
- const char* regex = arg.regex();
- const char* flags = arg.regexFlags();
- noTrapPattern =
- std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags)));
+ noTrapPattern = argToRegex(arg);
} else if (name == "watchPattern") {
- const char* regex = arg.regex();
- const char* flags = arg.regexFlags();
- watchPattern =
- std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags)));
+ watchPattern = argToRegex(arg);
} else if (name == "noWatchPattern") {
- const char* regex = arg.regex();
- const char* flags = arg.regexFlags();
- noWatchPattern =
- std::shared_ptr<pcrecpp::RE>(new pcrecpp::RE(regex, flags2options(flags)));
+ noWatchPattern = argToRegex(arg);
} else if (name == "ops") {
// iterate through the objects in ops
// create an BenchRunOp per
@@ -946,10 +925,12 @@ void BenchRunWorker::generateLoadOnConnection(DBClientBase* conn) {
op.executeOnce(conn, lsid, *_config, &opState);
} catch (const DBException& ex) {
if (!_config->hideErrors || op.showError) {
- bool yesWatch =
- (_config->watchPattern && _config->watchPattern->FullMatch(ex.what()));
- bool noWatch =
- (_config->noWatchPattern && _config->noWatchPattern->FullMatch(ex.what()));
+ bool yesWatch = (_config->watchPattern &&
+ _config->watchPattern->matchView(
+ ex.what(), pcre::ANCHORED | pcre::ENDANCHORED));
+ bool noWatch = (_config->noWatchPattern &&
+ _config->noWatchPattern->matchView(
+ ex.what(), pcre::ANCHORED | pcre::ENDANCHORED));
if ((!_config->watchPattern && _config->noWatchPattern &&
!noWatch) || // If we're just ignoring things
@@ -962,9 +943,12 @@ void BenchRunWorker::generateLoadOnConnection(DBClientBase* conn) {
"error"_attr = causedBy(ex));
}
- bool yesTrap = (_config->trapPattern && _config->trapPattern->FullMatch(ex.what()));
- bool noTrap =
- (_config->noTrapPattern && _config->noTrapPattern->FullMatch(ex.what()));
+ bool yesTrap = (_config->trapPattern &&
+ _config->trapPattern->matchView(
+ ex.what(), pcre::ANCHORED | pcre::ENDANCHORED));
+ bool noTrap = (_config->noTrapPattern &&
+ _config->noTrapPattern->matchView(
+ ex.what(), pcre::ANCHORED | pcre::ENDANCHORED));
if ((!_config->trapPattern && _config->noTrapPattern && !noTrap) ||
(!_config->noTrapPattern && _config->trapPattern && yesTrap) ||
diff --git a/src/mongo/shell/bench.h b/src/mongo/shell/bench.h
index a7d1f70c0ed..90831a8ab53 100644
--- a/src/mongo/shell/bench.h
+++ b/src/mongo/shell/bench.h
@@ -40,12 +40,9 @@
#include "mongo/platform/mutex.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/thread.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/timer.h"
-namespace pcrecpp {
-class RE;
-} // namespace pcrecpp
-
namespace mongo {
enum class OpType {
@@ -238,10 +235,10 @@ public:
bool handleErrors;
bool hideErrors;
- std::shared_ptr<pcrecpp::RE> trapPattern;
- std::shared_ptr<pcrecpp::RE> noTrapPattern;
- std::shared_ptr<pcrecpp::RE> watchPattern;
- std::shared_ptr<pcrecpp::RE> noWatchPattern;
+ std::shared_ptr<pcre::Regex> trapPattern;
+ std::shared_ptr<pcre::Regex> noTrapPattern;
+ std::shared_ptr<pcre::Regex> watchPattern;
+ std::shared_ptr<pcre::Regex> noWatchPattern;
/**
* Operation description. A list of BenchRunOps, each describing a single
diff --git a/src/mongo/shell/mongo_main.cpp b/src/mongo/shell/mongo_main.cpp
index dd9231caa27..51ccb760ee7 100644
--- a/src/mongo/shell/mongo_main.cpp
+++ b/src/mongo/shell/mongo_main.cpp
@@ -39,7 +39,6 @@
#include <boost/log/sinks.hpp>
#include <fstream>
#include <iostream>
-#include <pcrecpp.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
@@ -79,6 +78,7 @@
#include "mongo/util/net/ocsp/ocsp_manager.h"
#include "mongo/util/net/ssl_options.h"
#include "mongo/util/password.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/quick_exit.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/signal_handlers.h"
@@ -278,16 +278,16 @@ void shellHistoryAdd(const char* line) {
// be able to add things like `.author`, so be smart about how this is
// detected by using regular expresions. This is so we can avoid storing passwords
// in the history file in plaintext.
- static pcrecpp::RE hiddenHelpers(
+ static pcre::Regex hiddenHelpers(
"\\.\\s*(auth|createUser|updateUser|changeUserPassword)\\s*\\(");
// Also don't want the raw user management commands to show in the shell when run directly
// via runCommand.
- static pcrecpp::RE hiddenCommands(
+ static pcre::Regex hiddenCommands(
"(run|admin)Command\\s*\\(\\s*{\\s*(createUser|updateUser)\\s*:");
- static pcrecpp::RE hiddenFLEConstructor(".*Mongo\\(([\\s\\S]*)secretAccessKey([\\s\\S]*)");
- if (!hiddenHelpers.PartialMatch(line) && !hiddenCommands.PartialMatch(line) &&
- !hiddenFLEConstructor.PartialMatch(line)) {
+ static pcre::Regex hiddenFLEConstructor(".*Mongo\\(([\\s\\S]*)secretAccessKey([\\s\\S]*)");
+ if (!hiddenHelpers.matchView(line) && !hiddenCommands.matchView(line) &&
+ !hiddenFLEConstructor.matchView(line)) {
linenoiseHistoryAdd(line);
}
}
diff --git a/src/mongo/unittest/SConscript b/src/mongo/unittest/SConscript
index 7abf76854de..b625fcd23ab 100644
--- a/src/mongo/unittest/SConscript
+++ b/src/mongo/unittest/SConscript
@@ -24,7 +24,8 @@ utEnv.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/server_options_core',
'$BUILD_DIR/mongo/util/options_parser/options_parser',
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ '$BUILD_DIR/mongo/util/pcre_util',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'$BUILD_DIR/third_party/shim_yaml',
],
AIB_COMPONENT='unittests',
diff --git a/src/mongo/unittest/death_test.cpp b/src/mongo/unittest/death_test.cpp
index e7a778c97ea..40dc410aa22 100644
--- a/src/mongo/unittest/death_test.cpp
+++ b/src/mongo/unittest/death_test.cpp
@@ -30,7 +30,6 @@
#include "mongo/platform/basic.h"
#include <fmt/format.h>
-#include <pcrecpp.h>
#include <stdio.h>
#include "mongo/bson/json.h"
@@ -58,6 +57,7 @@
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/debugger.h"
+#include "mongo/util/pcre_util.h"
#include "mongo/util/quick_exit.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
@@ -233,7 +233,7 @@ void DeathTestBase::Subprocess::execChild(std::string tempPath) {
stripOption(av, "tempPath");
const TestInfo* info = UnitTest::getInstance()->currentTestInfo();
av.push_back("--suite={}"_format(info->suiteName()));
- av.push_back("--filter=^{}$"_format(pcrecpp::RE::QuoteMeta(std::string{info->testName()})));
+ av.push_back("--filter=^{}$"_format(pcre_util::quoteMeta(info->testName())));
av.push_back("--tempPath={}"_format(tempPath));
// The presence of this flag is how the test body in the child process knows it's in the
// child process, and therefore to not exec again. Its value is ignored.
diff --git a/src/mongo/unittest/golden_test.cpp b/src/mongo/unittest/golden_test.cpp
index d2174e8f067..890be64d01b 100644
--- a/src/mongo/unittest/golden_test.cpp
+++ b/src/mongo/unittest/golden_test.cpp
@@ -38,7 +38,6 @@
#include <boost/program_options.hpp>
#include <fmt/format.h>
#include <fmt/ostream.h>
-#include <pcrecpp.h>
#include <yaml-cpp/yaml.h>
#include "mongo/base/init.h"
@@ -46,6 +45,7 @@
#include "mongo/logv2/log.h"
#include "mongo/unittest/golden_test.h"
#include "mongo/util/ctype.h"
+#include "mongo/util/pcre.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
@@ -56,7 +56,7 @@ namespace po = ::boost::program_options;
using namespace fmt::literals;
-static const pcrecpp::RE validNameRegex(R"([[:alnum:]_\-]*)");
+static const pcre::Regex validNameRegex(R"(^[[:alnum:]_\-]*$)");
std::string readFile(const fs::path& path) {
ASSERT_FALSE(is_directory(path));
@@ -115,7 +115,7 @@ std::string GoldenTestContext::toSnakeCase(const std::string& str) {
}
std::string GoldenTestContext::sanitizeName(const std::string& str) {
- if (!validNameRegex.FullMatch(str)) {
+ if (!validNameRegex.matchView(str)) {
FAIL("Unsupported characters in name '{}'"_format(str));
}
diff --git a/src/mongo/unittest/matcher.cpp b/src/mongo/unittest/matcher.cpp
index da0bbcb5a6c..db1a2d62139 100644
--- a/src/mongo/unittest/matcher.cpp
+++ b/src/mongo/unittest/matcher.cpp
@@ -29,19 +29,19 @@
#include "mongo/unittest/matcher.h"
+#include <fmt/format.h>
#include <memory>
#include <utility>
-#include <fmt/format.h>
-#include <pcrecpp.h>
+#include "mongo/util/pcre.h"
namespace mongo::unittest::match {
using namespace fmt::literals;
struct ContainsRegex::Impl {
- explicit Impl(pcrecpp::RE pat) : re(std::move(pat)) {}
- pcrecpp::RE re;
+ explicit Impl(std::string pat) : re(std::move(pat)) {}
+ pcre::Regex re;
};
ContainsRegex::ContainsRegex(std::string pattern)
@@ -50,9 +50,7 @@ ContainsRegex::ContainsRegex(std::string pattern)
ContainsRegex::~ContainsRegex() = default;
MatchResult ContainsRegex::match(StringData x) const {
- bool res =
- _impl->re.PartialMatch(pcrecpp::StringPiece{x.rawData(), static_cast<int>(x.size())});
- if (res)
+ if (_impl->re.matchView(x))
return {};
return MatchResult(false, "");
}
diff --git a/src/mongo/unittest/unittest.cpp b/src/mongo/unittest/unittest.cpp
index 01bfb7c756f..073f09ca7c5 100644
--- a/src/mongo/unittest/unittest.cpp
+++ b/src/mongo/unittest/unittest.cpp
@@ -39,7 +39,6 @@
#include <iostream>
#include <map>
#include <memory>
-#include <pcrecpp.h>
#include "mongo/base/checked_cast.h"
#include "mongo/base/init.h"
@@ -55,6 +54,7 @@
#include "mongo/logv2/plain_formatter.h"
#include "mongo/platform/mutex.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/signal_handlers_synchronous.h"
#include "mongo/util/stacktrace.h"
#include "mongo/util/timer.h"
@@ -79,7 +79,7 @@ auto& suitesMap() {
} // namespace
bool searchRegex(const std::string& pattern, const std::string& string) {
- return pcrecpp::RE(pattern).PartialMatch(string);
+ return !!pcre::Regex(pattern).matchView(string);
}
class Result {
@@ -396,20 +396,20 @@ std::unique_ptr<Result> Suite::run(const std::string& filter,
Timer timer;
auto r = std::make_unique<Result>(_name);
- boost::optional<pcrecpp::RE> filterRe;
- boost::optional<pcrecpp::RE> fileNameFilterRe;
+ boost::optional<pcre::Regex> filterRe;
+ boost::optional<pcre::Regex> fileNameFilterRe;
if (!filter.empty())
filterRe.emplace(filter);
if (!fileNameFilter.empty())
fileNameFilterRe.emplace(fileNameFilter);
for (const auto& tc : _tests) {
- if (filterRe && !filterRe->PartialMatch(tc.name)) {
+ if (filterRe && !filterRe->matchView(tc.name)) {
LOGV2_DEBUG(23057, 1, "skipped due to filter", "test"_attr = tc.name);
continue;
}
- if (fileNameFilterRe && !fileNameFilterRe->PartialMatch(tc.fileName)) {
+ if (fileNameFilterRe && !fileNameFilterRe->matchView(tc.fileName)) {
LOGV2_DEBUG(23058, 1, "skipped due to fileNameFilter", "testFile"_attr = tc.fileName);
continue;
}
diff --git a/src/mongo/unittest/unittest.h b/src/mongo/unittest/unittest.h
index 514a8e7d8d9..3b37a565261 100644
--- a/src/mongo/unittest/unittest.h
+++ b/src/mongo/unittest/unittest.h
@@ -38,7 +38,6 @@
#include <cmath>
#include <fmt/format.h>
#include <functional>
-#include <pcrecpp.h>
#include <sstream>
#include <string>
#include <tuple>
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 308e3777a44..8f3c500a76e 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -131,17 +131,6 @@ env.Library(
)
env.Library(
- target='regex_util',
- source=[
- 'regex_util.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/base',
- '$BUILD_DIR/third_party/shim_pcrecpp',
- ],
-)
-
-env.Library(
target='summation',
source=[
'summation.cpp',
@@ -201,7 +190,7 @@ env.Library(
"$BUILD_DIR/mongo/base",
],
LIBDEPS_PRIVATE=[
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ 'pcre_wrapper',
],
)
@@ -615,7 +604,7 @@ if env.TargetOSIs('linux'):
'$BUILD_DIR/mongo/base',
],
LIBDEPS_PRIVATE=[
- '$BUILD_DIR/third_party/shim_pcrecpp',
+ 'pcre_wrapper',
],
)
@@ -838,7 +827,7 @@ if use_libunwind:
)
stacktrace_test_LIBDEPS = stacktraceEnv.get('LIBDEPS', []).copy()
-insort_wrapper(stacktrace_test_LIBDEPS, '$BUILD_DIR/third_party/shim_pcrecpp')
+insort_wrapper(stacktrace_test_LIBDEPS, 'pcre_wrapper')
stacktraceEnv.CppUnitTest(
target='stacktrace_test',
diff --git a/src/mongo/util/pcre.cpp b/src/mongo/util/pcre.cpp
index fe0f71fa640..00ced908410 100644
--- a/src/mongo/util/pcre.cpp
+++ b/src/mongo/util/pcre.cpp
@@ -123,10 +123,8 @@ namespace detail {
class MatchDataImpl;
-// Global. Value is historical carryover from pcre1 and pcrecpp.
-// It's user-facing, so record and enforce its value even if the
-// engine can now support longer patterns.
-inline constexpr size_t kMaxPatternLength = 32761;
+// Global.
+inline constexpr size_t kMaxPatternLength = 16384;
/** Wrapper around a pcre2_compile_context. */
class CompileContext {
diff --git a/src/mongo/util/pcre.h b/src/mongo/util/pcre.h
index ff9f705f4fc..5f5652918c9 100644
--- a/src/mongo/util/pcre.h
+++ b/src/mongo/util/pcre.h
@@ -488,24 +488,6 @@ public:
MatchData matchView(StringData input) const;
/**
- * True if all of `input` matches.
- * If possible, add '^' and '$' to the `Regex` pattern instead, as this
- * optimizes better than match-supplied options.
- *
- * Legacy: prefer `Regex::matchView` with `ANCHOR|ENDANCHOR` options.
- */
- bool fullMatch(StringData input) const;
-
- /**
- * True if a substring of `input` matches.
- * Note that PCRE2 documentation uses the term "partial match" to mean
- * something very different.
- *
- * Legacy: prefer `Regex::matchView`.
- */
- bool partialMatch(StringData input) const;
-
- /**
* Replaces occurrences in `str` of this pattern with `replacement`.
* Additional substitute `options` can change behavior. Important ones:
*
@@ -626,14 +608,6 @@ inline MatchData Regex::matchView(StringData input) const {
return matchView(input, MatchOptions{}, 0);
}
-inline bool Regex::fullMatch(StringData input) const {
- return !matchView(input, ANCHORED | ENDANCHORED).error();
-}
-
-inline bool Regex::partialMatch(StringData input) const {
- return !matchView(input).error();
-}
-
} // namespace mongo::pcre
namespace std {
diff --git a/src/mongo/util/pcre_test.cpp b/src/mongo/util/pcre_test.cpp
index 75e2a575d65..d6d0c4c6b84 100644
--- a/src/mongo/util/pcre_test.cpp
+++ b/src/mongo/util/pcre_test.cpp
@@ -170,24 +170,6 @@ TEST(PcreTest, StartPos) {
ASSERT_EQ(hiRe.matchView(ohi, {}, i).startPos(), i) << " i="_format(i);
}
-TEST(PcreTest, FullMatch) {
- Regex re{"hi"};
- ASSERT_FALSE(re.fullMatch("hello"));
- ASSERT_TRUE(re.fullMatch("hi"));
- ASSERT_FALSE(re.fullMatch("hii"));
- ASSERT_FALSE(re.fullMatch("hhi"));
-}
-
-TEST(PcreTest, PartialMatch) {
- Regex re{"abc"};
- ASSERT_FALSE(re.partialMatch(""));
- ASSERT_FALSE(re.partialMatch("a"));
- ASSERT_FALSE(re.partialMatch("bc"));
- ASSERT_TRUE(re.partialMatch("abc"));
- ASSERT_TRUE(re.partialMatch("zabc"));
- ASSERT_TRUE(re.partialMatch("abcz"));
-}
-
TEST(PcreTest, CompileOptions) {
std::string pattern = "a.b";
std::array subjects{"a\nb"s, "A_b"s, "A\nb"s};
@@ -203,7 +185,7 @@ TEST(PcreTest, CompileOptions) {
}) {
Regex re{pattern, opt};
for (size_t i = 0; i < subjects.size(); ++i)
- ASSERT_EQ(re.fullMatch(subjects[i]), outMatch[i])
+ ASSERT_EQ(!!re.matchView(subjects[i], pcre::ANCHORED | pcre::ENDANCHORED), outMatch[i])
<< "opt={}, subject={}"_format(uint32_t(opt), subjects[i]);
}
}
diff --git a/src/mongo/util/pcre_util.cpp b/src/mongo/util/pcre_util.cpp
index 4f4dd3c63a8..bd5d51bf4b0 100644
--- a/src/mongo/util/pcre_util.cpp
+++ b/src/mongo/util/pcre_util.cpp
@@ -38,7 +38,7 @@ namespace mongo::pcre_util {
using namespace fmt::literals;
-pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName) {
+pcre::CompileOptions flagsToOptions(StringData optionFlags, StringData opName) {
pcre::CompileOptions opt = pcre::UTF;
for (char flag : optionFlags) {
switch (flag) {
@@ -48,21 +48,34 @@ pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName) {
case 'm': // newlines match ^ and $
opt |= pcre::MULTILINE;
continue;
- case 'x': // extended mode
- opt |= pcre::EXTENDED;
- continue;
case 's': // allows dot to include newline chars
opt |= pcre::DOTALL;
continue;
case 'u':
continue;
+ case 'x': // extended mode
+ opt |= pcre::EXTENDED;
+ continue;
default:
- uasserted(6527600, "{} invalid flag in regex options: {}"_format(opName, flag));
+ uasserted(51108, "{} invalid flag in regex options: {}"_format(opName, flag));
}
}
return opt;
}
+std::string optionsToFlags(pcre::CompileOptions opt) {
+ std::string optionFlags = "";
+ if (opt & pcre::CASELESS)
+ optionFlags += 'i';
+ if (opt & pcre::MULTILINE)
+ optionFlags += 'm';
+ if (opt & pcre::DOTALL)
+ optionFlags += 's';
+ if (opt & pcre::EXTENDED)
+ optionFlags += 'x';
+ return optionFlags;
+}
+
std::string quoteMeta(StringData str) {
std::string result;
for (char c : str) {
diff --git a/src/mongo/util/pcre_util.h b/src/mongo/util/pcre_util.h
index deb7129e9bc..1cdd7891890 100644
--- a/src/mongo/util/pcre_util.h
+++ b/src/mongo/util/pcre_util.h
@@ -49,7 +49,14 @@ namespace mongo::pcre_util {
* 'u': UTF (redundant, but accepted)
* 'x': EXTENDED
*/
-pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName = "");
+pcre::CompileOptions flagsToOptions(StringData optionFlags, StringData opName = "");
+
+/**
+ * Builds an std::string of flag characters from the input 'pcre::CompileOptions'.
+ * These flags are the same as those documented in flagsToOptions. They are returned in alphabetical
+ * order. Since 'u' is redundant, it will never be output by this function.
+ */
+std::string optionsToFlags(pcre::CompileOptions opt);
/**
* Escapes all potentially meaningful regex characters in the provided string.
diff --git a/src/mongo/util/pcre_util_test.cpp b/src/mongo/util/pcre_util_test.cpp
index 720ff5c31ad..66029835c54 100644
--- a/src/mongo/util/pcre_util_test.cpp
+++ b/src/mongo/util/pcre_util_test.cpp
@@ -42,9 +42,9 @@ namespace {
using namespace fmt::literals;
// Test compares `CompileOptions` as integers.
-TEST(PcreUtilTest, ParseOptions) {
+TEST(PcreUtilTest, FlagsToOptions) {
using namespace pcre::options;
- auto parse = [](StringData flags) { return static_cast<uint32_t>(parseOptions(flags)); };
+ auto parse = [](StringData flags) { return static_cast<uint32_t>(flagsToOptions(flags)); };
auto expect = [](pcre::CompileOptions o) { return static_cast<uint32_t>(o); };
ASSERT_EQ(parse(""), expect(UTF)) << " UTF is on by default";
ASSERT_EQ(parse("i"), expect(UTF | CASELESS));
@@ -60,6 +60,25 @@ TEST(PcreUtilTest, ParseOptions) {
ASSERT_THROWS_WITH_CHECK(parse("iz"), DBException, isBadFlagException);
}
+// Test compares `CompileOptions` as strings of option flags.
+TEST(PcreUtilTest, OptionsToFlags) {
+ using namespace pcre::options;
+ auto parse = [](pcre::CompileOptions flags) {
+ return static_cast<std::string>(optionsToFlags(flags));
+ };
+ auto expect = [](std::string o) { return (o); };
+ ASSERT_EQ(parse(UTF | CASELESS), expect("i"));
+ ASSERT_EQ(parse(UTF | MULTILINE), expect("m"));
+ ASSERT_EQ(parse(UTF | DOTALL), expect("s"));
+ ASSERT_EQ(parse(UTF), expect("")) << " UTF is on by default";
+ ASSERT_EQ(parse(UTF | EXTENDED), expect("x"));
+ ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | DOTALL | EXTENDED), expect("imsx"));
+ ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | DOTALL), expect("ims"));
+ ASSERT_EQ(parse(UTF | CASELESS | MULTILINE | EXTENDED), expect("imx"));
+ ASSERT_EQ(parse(UTF | CASELESS | DOTALL | EXTENDED), expect("isx"));
+ ASSERT_EQ(parse(UTF | MULTILINE | DOTALL | EXTENDED), expect("msx"));
+}
+
TEST(PcreUtilTest, QuoteMeta) {
ASSERT_EQ(quoteMeta(""), "");
ASSERT_EQ(quoteMeta("abc_def_123"_sd), "abc_def_123");
diff --git a/src/mongo/util/processinfo_linux.cpp b/src/mongo/util/processinfo_linux.cpp
index d5df28c2b49..a8e2f8f42ad 100644
--- a/src/mongo/util/processinfo_linux.cpp
+++ b/src/mongo/util/processinfo_linux.cpp
@@ -35,14 +35,15 @@
#include <fstream>
#include <iostream>
#include <malloc.h>
-#include <pcrecpp.h>
#include <sched.h>
#include <stdio.h>
+#include <string>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/utsname.h>
#include <unistd.h>
+
#ifdef __BIONIC__
#include <android/api-level.h>
#elif __UCLIBC__
@@ -55,12 +56,12 @@
#include <boost/none.hpp>
#include <boost/optional.hpp>
#include <fmt/format.h>
-#include <pcrecpp.h>
#include "mongo/base/parse_number.h"
#include "mongo/logv2/log.h"
#include "mongo/util/ctype.h"
#include "mongo/util/file.h"
+#include "mongo/util/pcre.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
@@ -257,21 +258,34 @@ namespace {
// (1)(2)(3:4)(5) (6) (7) (8) (9) (10) (11)
struct MountRecord {
bool parseLine(const std::string& line) {
- static const pcrecpp::RE kRe{
- // (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11)
- R"re((\d+) (\d+) (\d+):(\d+) (\S+) (\S+) (\S+) ((?:\S+:\S+ ?)*) - (\S+) (\S+) (\S+))re"};
- return kRe.FullMatch(line,
- &mountId,
- &parentId,
- &major,
- &minor,
- &root,
- &mountPoint,
- &options,
- &fields,
- &type,
- &source,
- &superOpt);
+ static const pcre::Regex kRe{
+ // (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11)
+ R"re(^(\d+) (\d+) (\d+):(\d+) (\S+) (\S+) (\S+) ((?:\S+:\S+ ?)*) - (\S+) (\S+) (\S+)$)re"};
+ auto m = kRe.matchView(line);
+ if (!m)
+ return false;
+ size_t i = 1;
+ auto load = [&](auto& var) {
+ using T = std::decay_t<decltype(var)>;
+ std::string nextString{m[i++]};
+ if constexpr (std::is_same_v<T, int>) {
+ var = std::stoi(nextString);
+ } else {
+ var = std::move(nextString);
+ }
+ };
+ load(mountId);
+ load(parentId);
+ load(major);
+ load(minor);
+ load(root);
+ load(mountPoint);
+ load(options);
+ load(fields);
+ load(type);
+ load(source);
+ load(superOpt);
+ return true;
}
void appendBSON(BSONObjBuilder& bob) const {
@@ -319,7 +333,9 @@ void appendMountInfo(BSONObjBuilder& bob) {
class CpuInfoParser {
public:
struct LineProcessor {
- pcrecpp::RE regex;
+ LineProcessor(std::string pattern, std::function<void(const std::string&)> f)
+ : regex{std::make_shared<pcre::Regex>(std::move(pattern))}, f{std::move(f)} {}
+ std::shared_ptr<pcre::Regex> regex;
std::function<void(const std::string&)> f;
};
std::vector<LineProcessor> lineProcessors;
@@ -331,17 +347,18 @@ public:
bool readSuccess;
bool unprocessed = false;
- static StaticImmortal<pcrecpp::RE> lineRegex(R"re((.*?)\s*:\s*(.*))re");
+ static StaticImmortal<pcre::Regex> lineRegex(R"re(^(.*?)\s*:\s*(.*)$)re");
do {
std::string fstr;
readSuccess = f && std::getline(f, fstr);
if (readSuccess && !fstr.empty()) {
- std::string key;
- std::string value;
- if (!lineRegex->FullMatch(fstr, &key, &value))
+ auto m = lineRegex->matchView(fstr);
+ if (!m)
continue;
+ std::string key{m[1]};
+ std::string value{m[2]};
for (auto&& [lpr, lpf] : lineProcessors) {
- if (lpr.FullMatch(key))
+ if (lpr->matchView(key, pcre::ANCHORED | pcre::ENDANCHORED))
lpf(value);
}
unprocessed = true;
diff --git a/src/mongo/util/procparser.cpp b/src/mongo/util/procparser.cpp
index bbe212325ad..67611e1e5ae 100644
--- a/src/mongo/util/procparser.cpp
+++ b/src/mongo/util/procparser.cpp
@@ -38,7 +38,6 @@
#include <boost/algorithm/string/split.hpp>
#include <boost/filesystem.hpp>
#include <fcntl.h>
-#include <pcrecpp.h>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
@@ -51,6 +50,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/str.h"
#include "mongo/util/text.h"
@@ -655,9 +655,9 @@ Status parseProcSelfMountStatsImpl(
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// | | | | | | | | | |
// (1)(2)(3:4)(5) (6) (7) (8) (9) (10) (11)
- static const pcrecpp::RE kRe(R"re(\d+ \d+ \d+:\d+ \S+ (\S+))re");
- std::string mountPoint;
- if (kRe.PartialMatch(line, &mountPoint)) {
+ static const pcre::Regex kRe(R"re(\d+ \d+ \d+:\d+ \S+ (\S+))re");
+ if (auto m = kRe.matchView(line)) {
+ std::string mountPoint{m[1]};
boost::filesystem::path p(mountPoint);
boost::system::error_code ec;
boost::filesystem::space_info spaceInfo = getSpace(p, ec);
diff --git a/src/mongo/util/regex_util.cpp b/src/mongo/util/regex_util.cpp
index 77f875c6e13..7d0242e3083 100644
--- a/src/mongo/util/regex_util.cpp
+++ b/src/mongo/util/regex_util.cpp
@@ -58,7 +58,7 @@ pcrecpp::RE_Options flagsToPcreOptions(StringData optionFlags, StringData opName
// must accept this flag without an error as some drivers send it by default.
continue;
default:
- uasserted(51108,
+ uasserted(6716200,
str::stream() << opName << " invalid flag in regex options: " << flag);
}
}
diff --git a/src/mongo/util/stacktrace_test.cpp b/src/mongo/util/stacktrace_test.cpp
index f747f7a12bf..9d93f652257 100644
--- a/src/mongo/util/stacktrace_test.cpp
+++ b/src/mongo/util/stacktrace_test.cpp
@@ -38,13 +38,13 @@
#include <fmt/printf.h>
#include <functional>
#include <map>
-#include <pcrecpp.h>
#include <random>
#include <signal.h>
#include <sstream>
#include <utility>
#include <vector>
+#include "mongo/base/parse_number.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/json.h"
#include "mongo/config.h"
@@ -55,6 +55,7 @@
#include "mongo/unittest/unittest.h"
#include "mongo/util/debug_util.h"
#include "mongo/util/hex.h"
+#include "mongo/util/pcre.h"
#include "mongo/util/stacktrace.h"
/** `sigaltstack` was introduced in glibc-2.12 in 2010. */
@@ -153,6 +154,15 @@ uintptr_t fromHex(const std::string& s) {
return static_cast<uintptr_t>(std::stoull(s, nullptr, 16));
}
+bool consume(const pcre::Regex& re, StringData* in, std::string* out) {
+ auto m = re.matchView(*in);
+ if (!m)
+ return false;
+ *in = in->substr(m[0].size());
+ *out = std::string{m[1]};
+ return true;
+}
+
// Break down a printStackTrace output for a contrived call tree and sanity-check it.
TEST(StackTrace, PosixFormat) {
if (kIsWindows) {
@@ -174,18 +184,18 @@ TEST(StackTrace, PosixFormat) {
// Each "Frame:" line holds a full json object, but we only examine its "a" field here.
std::string jsonLine;
std::vector<uintptr_t> humanAddrs;
- pcrecpp::StringPiece in{trace};
- static const pcrecpp::RE jsonLineRE(R"re(BACKTRACE: (\{.*\})\n?)re");
- ASSERT_TRUE(jsonLineRE.Consume(&in, &jsonLine)) << "\"" << in.as_string() << "\"";
+ StringData in{trace};
+ static const pcre::Regex jsonLineRE(R"re(^BACKTRACE: (\{.*\})\n?)re");
+ ASSERT_TRUE(consume(jsonLineRE, &in, &jsonLine)) << "\"" << in << "\"";
while (true) {
std::string frameLine;
- static const pcrecpp::RE frameRE(R"re( Frame: (\{.*\})\n?)re");
- if (!frameRE.Consume(&in, &frameLine))
+ static const pcre::Regex frameRE(R"re(^ Frame: (\{.*\})\n?)re");
+ if (!consume(frameRE, &in, &frameLine))
break;
BSONObj frameObj = fromjson(frameLine); // throwy
humanAddrs.push_back(fromHex(frameObj["a"].String()));
}
- ASSERT_TRUE(in.empty()) << "must be consumed fully: \"" << in.as_string() << "\"";
+ ASSERT_TRUE(in.empty()) << "must be consumed fully: \"" << in << "\"";
BSONObj jsonObj = fromjson(jsonLine); // throwy
ASSERT_TRUE(jsonObj.hasField("backtrace"));
@@ -255,14 +265,18 @@ TEST(StackTrace, WindowsFormat) {
std::vector<std::string> lines = splitLines(trace);
- std::string jsonLine;
- ASSERT_TRUE(pcrecpp::RE(R"re(BACKTRACE: (\{.*\}))re").FullMatch(lines[0], &jsonLine));
+ auto re = pcre::Regex(R"re(^BACKTRACE: (\{.*\})$)re");
+ auto m = re.matchView(lines[0]);
+ ASSERT_TRUE(!!m);
+ std::string jsonLine{m[1]};
std::vector<uintptr_t> humanAddrs;
for (size_t i = 1; i < lines.size(); ++i) {
- static const pcrecpp::RE re(R"re( Frame: (?:\{"a":"(.*?)",.*\}))re");
+ static const pcre::Regex re(R"re(^ Frame: (?:\{"a":"(.*?)",.*\})$)re");
uintptr_t addr;
- ASSERT_TRUE(re.FullMatch(lines[i], pcrecpp::Hex(&addr))) << lines[i];
+ auto m = re.matchView(lines[i]);
+ ASSERT_TRUE(!!m) << lines[i];
+ ASSERT_OK(NumberParser{}.base(16)(m[1], &addr));
humanAddrs.push_back(addr);
}