summaryrefslogtreecommitdiff
path: root/src/mongo/logv2
diff options
context:
space:
mode:
authorErwin Pe <erwin.pe@mongodb.com>2022-08-01 14:17:53 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-01 15:01:29 +0000
commitb795776ac3c1599528a5772825f16f88ecb5b1c9 (patch)
treeb5759670113e3655f80a2f3b1673aabd6087ff94 /src/mongo/logv2
parent9a775c3090716d55f3a5aa0d5027fad7af32acd0 (diff)
downloadmongo-b795776ac3c1599528a5772825f16f88ecb5b1c9.tar.gz
SERVER-66841 Fix LOGV2 invalid JSON when truncation happens at a backslash character
Diffstat (limited to 'src/mongo/logv2')
-rw-r--r--src/mongo/logv2/json_formatter.cpp29
-rw-r--r--src/mongo/logv2/logv2_test.cpp49
2 files changed, 69 insertions, 9 deletions
diff --git a/src/mongo/logv2/json_formatter.cpp b/src/mongo/logv2/json_formatter.cpp
index cbcfc85f121..58111121423 100644
--- a/src/mongo/logv2/json_formatter.cpp
+++ b/src/mongo/logv2/json_formatter.cpp
@@ -168,16 +168,27 @@ private:
void storeQuoted(StringData name, const T& value) {
format_to(std::back_inserter(_buffer), FMT_COMPILE(R"({}"{}":")"), _separator, name);
std::size_t before = _buffer.size();
- str::escapeForJSON(_buffer, value);
- if (_attributeMaxSize != 0) {
+ std::size_t wouldWrite = 0;
+ std::size_t written = 0;
+ str::escapeForJSON(
+ _buffer, value, _attributeMaxSize ? _attributeMaxSize : std::string::npos, &wouldWrite);
+ written = _buffer.size() - before;
+
+ if (wouldWrite > written) {
+ // The bounded escape may have reached the limit and
+ // stopped writing while in the middle of a UTF-8 sequence,
+ // in which case the incomplete UTF-8 octets at the tail of the
+ // buffer have to be trimmed.
+ // Push a dummy byte so that the UTF-8 safe truncation
+ // will truncate back down to the correct size.
+ _buffer.push_back('x');
auto truncatedEnd =
- str::UTF8SafeTruncation(_buffer.begin() + before, _buffer.end(), _attributeMaxSize);
- if (truncatedEnd != _buffer.end()) {
- BSONObjBuilder truncationInfo = _truncated.subobjStart(name);
- truncationInfo.append("type"_sd, typeName(BSONType::String));
- truncationInfo.append("size"_sd, static_cast<int64_t>(_buffer.size() - before));
- truncationInfo.done();
- }
+ str::UTF8SafeTruncation(_buffer.begin() + before, _buffer.end(), written);
+
+ BSONObjBuilder truncationInfo = _truncated.subobjStart(name);
+ truncationInfo.append("type"_sd, typeName(BSONType::String));
+ truncationInfo.append("size"_sd, static_cast<int64_t>(wouldWrite));
+ truncationInfo.done();
_buffer.resize(truncatedEnd - _buffer.begin());
}
diff --git a/src/mongo/logv2/logv2_test.cpp b/src/mongo/logv2/logv2_test.cpp
index 91d9327f06a..fde66599224 100644
--- a/src/mongo/logv2/logv2_test.cpp
+++ b/src/mongo/logv2/logv2_test.cpp
@@ -64,6 +64,7 @@
#include "mongo/unittest/temp_dir.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/exit_code.h"
+#include "mongo/util/str_escape.h"
#include "mongo/util/string_map.h"
#include "mongo/util/uuid.h"
@@ -1568,6 +1569,54 @@ TEST_F(LogV2Test, JsonTruncation) {
validateArrayTruncation(mongo::fromjson(lines.back()));
}
+TEST_F(LogV2Test, StringTruncation) {
+ const AtomicWord<int32_t> maxAttributeSizeKB(1);
+ auto lines = makeLineCapture(JSONFormatter(&maxAttributeSizeKB));
+
+ std::size_t maxLength = maxAttributeSizeKB.load() << 10;
+ std::string prefix(maxLength - 3, 'a');
+
+ struct TestCase {
+ std::string input;
+ std::string suffix;
+ std::string note;
+ };
+
+ TestCase tests[] = {
+ {prefix + "LMNOPQ", "LMN", "unescaped 1-byte octet"},
+ // "\n\"NOPQ" expands to "\\n\\\"NOPQ" after escape, and the limit
+ // is reached at the 2nd '\\' octet, but since it splits the "\\\""
+ // sequence, the actual truncation happens after the 'n' octet.
+ {prefix + "\n\"NOPQ", "\n", "2-byte escape sequence"},
+ // "L\vNOPQ" expands to "L\\u000bNOPQ" after escape, and the limit
+ // is reached at the 'u' octet, so the entire sequence is truncated.
+ {prefix + "L\vNOPQ", "L", "multi-byte escape sequence"},
+ {prefix + "LM\xC3\xB1PQ", "LM", "2-byte UTF-8 sequence"},
+ {prefix + "L\xE1\x9B\x8FPQ", "L", "3-byte UTF-8 sequence"},
+ {prefix + "L\xF0\x90\x8C\xBCQ", "L", "4-byte UTF-8 sequence"},
+ {prefix + "\xE1\x9B\x8E\xE1\x9B\x8F", "\xE1\x9B\x8E", "UTF-8 codepoint boundary"},
+ // The invalid UTF-8 codepoint 0xC3 is replaced with "\\ufffd", and truncated entirely
+ {prefix + "L\xC3NOPQ", "L", "escaped invalid codepoint"},
+ {std::string(maxLength, '\\'), "\\", "escaped backslash"},
+ };
+
+ for (const auto& [input, suffix, note] : tests) {
+ LOGV2(6694001, "name", "name"_attr = input);
+ BSONObj obj = fromjson(lines.back());
+
+ auto str = obj[constants::kAttributesFieldName]["name"].checkAndGetStringData();
+ std::string context = "Failed test: " + note;
+
+ ASSERT_LTE(str.size(), maxLength) << context;
+ ASSERT(str.endsWith(suffix))
+ << context << " - string " << str << " does not end with " << suffix;
+
+ auto trunc = obj[constants::kTruncatedFieldName]["name"];
+ ASSERT_EQUALS(trunc["type"].String(), typeName(BSONType::String)) << context;
+ ASSERT_EQUALS(trunc["size"].numberLong(), str::escapeForJSON(input).size()) << context;
+ }
+}
+
TEST_F(LogV2Test, Threads) {
auto linesPlain = makeLineCapture(PlainFormatter());
auto linesText = makeLineCapture(TextFormatter());