diff options
author | Yuhong Zhang <yuhong.zhang@mongodb.com> | 2022-08-29 13:18:10 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-29 14:02:28 +0000 |
commit | c24c3b8d5ac159f255ef22433cd12e5211952c27 (patch) | |
tree | 6b415fac1e83dd1290381d8ff268b7399ff71825 | |
parent | 3a848e239082625f1f1e6c677f149412654f53b6 (diff) | |
download | mongo-c24c3b8d5ac159f255ef22433cd12e5211952c27.tar.gz |
SERVER-69187 Use a no-op buffer to avoid allocating memory when checking for invalid UTF-8 characters
-rw-r--r-- | src/mongo/bson/bson_validate.cpp | 17 | ||||
-rw-r--r-- | src/mongo/util/str_escape.cpp | 27 | ||||
-rw-r--r-- | src/mongo/util/str_escape.h | 8 |
3 files changed, 29 insertions, 23 deletions
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp index f0bb0d038b5..c1a4a21e34e 100644 --- a/src/mongo/bson/bson_validate.cpp +++ b/src/mongo/bson/bson_validate.cpp @@ -223,8 +223,6 @@ public: void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) { registerFieldName(ptr + 1); ExtendedValidator::checkNonConformantElem(ptr, offsetToValue, type); - // Check the field name is UTF-8 encoded. - checkUTF8Char(ptr + 1); switch (type) { case BSONType::Array: { objFrames.push_back({std::vector<std::string>(), false}); @@ -285,19 +283,18 @@ private: std::vector<std::pair<std::vector<std::string>, bool>> objFrames = { {std::vector<std::string>(), true}}; - void registerFieldName(std::string str) { + void registerFieldName(const char* ptr) { + // Check the field name is UTF-8 encoded. + checkUTF8Char(ptr); if (objFrames.back().second) { - objFrames.back().first.emplace_back(str); + objFrames.back().first.emplace_back(ptr); }; } -private: void checkUTF8Char(const char* ptr) { - try { - str::checkInvalidUTF8(ptr); - } catch (const ExceptionFor<ErrorCodes::BadValue>&) { - uasserted(NonConformantBSON, "Found string that doesn't follow UTF-8 encoding."); - } + uassert(NonConformantBSON, + "Found string that doesn't follow UTF-8 encoding.", + str::validUTF8(ptr)); } }; diff --git a/src/mongo/util/str_escape.cpp b/src/mongo/util/str_escape.cpp index 9dbb479af67..e96922a017e 100644 --- a/src/mongo/util/str_escape.cpp +++ b/src/mongo/util/str_escape.cpp @@ -39,6 +39,10 @@ namespace mongo::str { namespace { constexpr char kHexChar[] = "0123456789abcdef"; +struct NoopBuffer { + void append(const char* begin, const char* end) {} +}; + // Appends the bytes in the range [begin, end) to the output buffer, // which can either be a fmt::memory_buffer, or a std::string. template <typename Buffer, typename Iterator> @@ -499,9 +503,9 @@ std::string escapeForJSON(StringData str, size_t maxLength, size_t* wouldWrite) return buffer; } -void checkInvalidUTF8(StringData str, size_t maxLength, size_t* wouldWrite) { +bool validUTF8(StringData str) { // No-op buffer and handlers, defined to re-use escape method logic. - std::string buffer; + NoopBuffer buffer; auto singleByteHandler = [](const auto& writer, uint8_t unescaped) {}; auto twoByteEscaper = [](const auto& writer, uint8_t first, uint8_t second) {}; @@ -510,12 +514,17 @@ void checkInvalidUTF8(StringData str, size_t maxLength, size_t* wouldWrite) { uasserted(ErrorCodes::BadValue, "Invalid UTF-8 Character"); }; - escape(buffer, - str, - std::move(singleByteHandler), - std::move(invalidByteHandler), - std::move(twoByteEscaper), - maxLength, - wouldWrite); + try { + escape(buffer, + str, + std::move(singleByteHandler), + std::move(invalidByteHandler), + std::move(twoByteEscaper), + std::string::npos, + nullptr); + return true; + } catch (const ExceptionFor<ErrorCodes::BadValue>&) { + return false; + } } } // namespace mongo::str diff --git a/src/mongo/util/str_escape.h b/src/mongo/util/str_escape.h index 14b89128a30..11386656220 100644 --- a/src/mongo/util/str_escape.h +++ b/src/mongo/util/str_escape.h @@ -107,8 +107,8 @@ std::string escapeForJSON(StringData str, size_t maxLength = std::string::npos, size_t* wouldWrite = nullptr); - -void checkInvalidUTF8(StringData str, - size_t maxLength = std::string::npos, - size_t* wouldWrite = nullptr); +/** + * Returns whether a string consists with valid UTF-8 encoded characters. + */ +bool validUTF8(StringData str); } // namespace mongo::str |