diff options
author | Erwin Pe <erwin.pe@mongodb.com> | 2022-08-01 14:17:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-01 15:01:29 +0000 |
commit | b795776ac3c1599528a5772825f16f88ecb5b1c9 (patch) | |
tree | b5759670113e3655f80a2f3b1673aabd6087ff94 /src/mongo/logv2 | |
parent | 9a775c3090716d55f3a5aa0d5027fad7af32acd0 (diff) | |
download | mongo-b795776ac3c1599528a5772825f16f88ecb5b1c9.tar.gz |
SERVER-66841 Fix LOGV2 invalid JSON when truncation happens at a backslash character
Diffstat (limited to 'src/mongo/logv2')
-rw-r--r-- | src/mongo/logv2/json_formatter.cpp | 29 | ||||
-rw-r--r-- | src/mongo/logv2/logv2_test.cpp | 49 |
2 files changed, 69 insertions, 9 deletions
diff --git a/src/mongo/logv2/json_formatter.cpp b/src/mongo/logv2/json_formatter.cpp index cbcfc85f121..58111121423 100644 --- a/src/mongo/logv2/json_formatter.cpp +++ b/src/mongo/logv2/json_formatter.cpp @@ -168,16 +168,27 @@ private: void storeQuoted(StringData name, const T& value) { format_to(std::back_inserter(_buffer), FMT_COMPILE(R"({}"{}":")"), _separator, name); std::size_t before = _buffer.size(); - str::escapeForJSON(_buffer, value); - if (_attributeMaxSize != 0) { + std::size_t wouldWrite = 0; + std::size_t written = 0; + str::escapeForJSON( + _buffer, value, _attributeMaxSize ? _attributeMaxSize : std::string::npos, &wouldWrite); + written = _buffer.size() - before; + + if (wouldWrite > written) { + // The bounded escape may have reached the limit and + // stopped writing while in the middle of a UTF-8 sequence, + // in which case the incomplete UTF-8 octets at the tail of the + // buffer have to be trimmed. + // Push a dummy byte so that the UTF-8 safe truncation + // will truncate back down to the correct size. + _buffer.push_back('x'); auto truncatedEnd = - str::UTF8SafeTruncation(_buffer.begin() + before, _buffer.end(), _attributeMaxSize); - if (truncatedEnd != _buffer.end()) { - BSONObjBuilder truncationInfo = _truncated.subobjStart(name); - truncationInfo.append("type"_sd, typeName(BSONType::String)); - truncationInfo.append("size"_sd, static_cast<int64_t>(_buffer.size() - before)); - truncationInfo.done(); - } + str::UTF8SafeTruncation(_buffer.begin() + before, _buffer.end(), written); + + BSONObjBuilder truncationInfo = _truncated.subobjStart(name); + truncationInfo.append("type"_sd, typeName(BSONType::String)); + truncationInfo.append("size"_sd, static_cast<int64_t>(wouldWrite)); + truncationInfo.done(); _buffer.resize(truncatedEnd - _buffer.begin()); } diff --git a/src/mongo/logv2/logv2_test.cpp b/src/mongo/logv2/logv2_test.cpp index 91d9327f06a..fde66599224 100644 --- a/src/mongo/logv2/logv2_test.cpp +++ b/src/mongo/logv2/logv2_test.cpp @@ -64,6 +64,7 @@ #include "mongo/unittest/temp_dir.h" #include "mongo/unittest/unittest.h" #include "mongo/util/exit_code.h" +#include "mongo/util/str_escape.h" #include "mongo/util/string_map.h" #include "mongo/util/uuid.h" @@ -1568,6 +1569,54 @@ TEST_F(LogV2Test, JsonTruncation) { validateArrayTruncation(mongo::fromjson(lines.back())); } +TEST_F(LogV2Test, StringTruncation) { + const AtomicWord<int32_t> maxAttributeSizeKB(1); + auto lines = makeLineCapture(JSONFormatter(&maxAttributeSizeKB)); + + std::size_t maxLength = maxAttributeSizeKB.load() << 10; + std::string prefix(maxLength - 3, 'a'); + + struct TestCase { + std::string input; + std::string suffix; + std::string note; + }; + + TestCase tests[] = { + {prefix + "LMNOPQ", "LMN", "unescaped 1-byte octet"}, + // "\n\"NOPQ" expands to "\\n\\\"NOPQ" after escape, and the limit + // is reached at the 2nd '\\' octet, but since it splits the "\\\"" + // sequence, the actual truncation happens after the 'n' octet. + {prefix + "\n\"NOPQ", "\n", "2-byte escape sequence"}, + // "L\vNOPQ" expands to "L\\u000bNOPQ" after escape, and the limit + // is reached at the 'u' octet, so the entire sequence is truncated. + {prefix + "L\vNOPQ", "L", "multi-byte escape sequence"}, + {prefix + "LM\xC3\xB1PQ", "LM", "2-byte UTF-8 sequence"}, + {prefix + "L\xE1\x9B\x8FPQ", "L", "3-byte UTF-8 sequence"}, + {prefix + "L\xF0\x90\x8C\xBCQ", "L", "4-byte UTF-8 sequence"}, + {prefix + "\xE1\x9B\x8E\xE1\x9B\x8F", "\xE1\x9B\x8E", "UTF-8 codepoint boundary"}, + // The invalid UTF-8 codepoint 0xC3 is replaced with "\\ufffd", and truncated entirely + {prefix + "L\xC3NOPQ", "L", "escaped invalid codepoint"}, + {std::string(maxLength, '\\'), "\\", "escaped backslash"}, + }; + + for (const auto& [input, suffix, note] : tests) { + LOGV2(6694001, "name", "name"_attr = input); + BSONObj obj = fromjson(lines.back()); + + auto str = obj[constants::kAttributesFieldName]["name"].checkAndGetStringData(); + std::string context = "Failed test: " + note; + + ASSERT_LTE(str.size(), maxLength) << context; + ASSERT(str.endsWith(suffix)) + << context << " - string " << str << " does not end with " << suffix; + + auto trunc = obj[constants::kTruncatedFieldName]["name"]; + ASSERT_EQUALS(trunc["type"].String(), typeName(BSONType::String)) << context; + ASSERT_EQUALS(trunc["size"].numberLong(), str::escapeForJSON(input).size()) << context; + } +} + TEST_F(LogV2Test, Threads) { auto linesPlain = makeLineCapture(PlainFormatter()); auto linesText = makeLineCapture(TextFormatter()); |