summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuhong Zhang <yuhong.zhang@mongodb.com>2022-08-29 13:18:10 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-29 14:02:28 +0000
commitc24c3b8d5ac159f255ef22433cd12e5211952c27 (patch)
tree6b415fac1e83dd1290381d8ff268b7399ff71825
parent3a848e239082625f1f1e6c677f149412654f53b6 (diff)
downloadmongo-c24c3b8d5ac159f255ef22433cd12e5211952c27.tar.gz
SERVER-69187 Use a no-op buffer to avoid allocating memory when checking for invalid UTF-8 characters
-rw-r--r--src/mongo/bson/bson_validate.cpp17
-rw-r--r--src/mongo/util/str_escape.cpp27
-rw-r--r--src/mongo/util/str_escape.h8
3 files changed, 29 insertions, 23 deletions
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index f0bb0d038b5..c1a4a21e34e 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -223,8 +223,6 @@ public:
void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) {
registerFieldName(ptr + 1);
ExtendedValidator::checkNonConformantElem(ptr, offsetToValue, type);
- // Check the field name is UTF-8 encoded.
- checkUTF8Char(ptr + 1);
switch (type) {
case BSONType::Array: {
objFrames.push_back({std::vector<std::string>(), false});
@@ -285,19 +283,18 @@ private:
std::vector<std::pair<std::vector<std::string>, bool>> objFrames = {
{std::vector<std::string>(), true}};
- void registerFieldName(std::string str) {
+ void registerFieldName(const char* ptr) {
+ // Check the field name is UTF-8 encoded.
+ checkUTF8Char(ptr);
if (objFrames.back().second) {
- objFrames.back().first.emplace_back(str);
+ objFrames.back().first.emplace_back(ptr);
};
}
-private:
void checkUTF8Char(const char* ptr) {
- try {
- str::checkInvalidUTF8(ptr);
- } catch (const ExceptionFor<ErrorCodes::BadValue>&) {
- uasserted(NonConformantBSON, "Found string that doesn't follow UTF-8 encoding.");
- }
+ uassert(NonConformantBSON,
+ "Found string that doesn't follow UTF-8 encoding.",
+ str::validUTF8(ptr));
}
};
diff --git a/src/mongo/util/str_escape.cpp b/src/mongo/util/str_escape.cpp
index 9dbb479af67..e96922a017e 100644
--- a/src/mongo/util/str_escape.cpp
+++ b/src/mongo/util/str_escape.cpp
@@ -39,6 +39,10 @@ namespace mongo::str {
namespace {
constexpr char kHexChar[] = "0123456789abcdef";
+struct NoopBuffer {
+ void append(const char* begin, const char* end) {}
+};
+
// Appends the bytes in the range [begin, end) to the output buffer,
// which can either be a fmt::memory_buffer, or a std::string.
template <typename Buffer, typename Iterator>
@@ -499,9 +503,9 @@ std::string escapeForJSON(StringData str, size_t maxLength, size_t* wouldWrite)
return buffer;
}
-void checkInvalidUTF8(StringData str, size_t maxLength, size_t* wouldWrite) {
+bool validUTF8(StringData str) {
// No-op buffer and handlers, defined to re-use escape method logic.
- std::string buffer;
+ NoopBuffer buffer;
auto singleByteHandler = [](const auto& writer, uint8_t unescaped) {};
auto twoByteEscaper = [](const auto& writer, uint8_t first, uint8_t second) {};
@@ -510,12 +514,17 @@ void checkInvalidUTF8(StringData str, size_t maxLength, size_t* wouldWrite) {
uasserted(ErrorCodes::BadValue, "Invalid UTF-8 Character");
};
- escape(buffer,
- str,
- std::move(singleByteHandler),
- std::move(invalidByteHandler),
- std::move(twoByteEscaper),
- maxLength,
- wouldWrite);
+ try {
+ escape(buffer,
+ str,
+ std::move(singleByteHandler),
+ std::move(invalidByteHandler),
+ std::move(twoByteEscaper),
+ std::string::npos,
+ nullptr);
+ return true;
+ } catch (const ExceptionFor<ErrorCodes::BadValue>&) {
+ return false;
+ }
}
} // namespace mongo::str
diff --git a/src/mongo/util/str_escape.h b/src/mongo/util/str_escape.h
index 14b89128a30..11386656220 100644
--- a/src/mongo/util/str_escape.h
+++ b/src/mongo/util/str_escape.h
@@ -107,8 +107,8 @@ std::string escapeForJSON(StringData str,
size_t maxLength = std::string::npos,
size_t* wouldWrite = nullptr);
-
-void checkInvalidUTF8(StringData str,
- size_t maxLength = std::string::npos,
- size_t* wouldWrite = nullptr);
+/**
+ * Returns whether a string consists with valid UTF-8 encoded characters.
+ */
+bool validUTF8(StringData str);
} // namespace mongo::str