From b795776ac3c1599528a5772825f16f88ecb5b1c9 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Mon, 1 Aug 2022 14:17:53 +0000 Subject: SERVER-66841 Fix LOGV2 invalid JSON when truncation happens at a backslash character --- src/mongo/logv2/logv2_test.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'src/mongo/logv2/logv2_test.cpp') diff --git a/src/mongo/logv2/logv2_test.cpp b/src/mongo/logv2/logv2_test.cpp index 91d9327f06a..fde66599224 100644 --- a/src/mongo/logv2/logv2_test.cpp +++ b/src/mongo/logv2/logv2_test.cpp @@ -64,6 +64,7 @@ #include "mongo/unittest/temp_dir.h" #include "mongo/unittest/unittest.h" #include "mongo/util/exit_code.h" +#include "mongo/util/str_escape.h" #include "mongo/util/string_map.h" #include "mongo/util/uuid.h" @@ -1568,6 +1569,54 @@ TEST_F(LogV2Test, JsonTruncation) { validateArrayTruncation(mongo::fromjson(lines.back())); } +TEST_F(LogV2Test, StringTruncation) { + const AtomicWord maxAttributeSizeKB(1); + auto lines = makeLineCapture(JSONFormatter(&maxAttributeSizeKB)); + + std::size_t maxLength = maxAttributeSizeKB.load() << 10; + std::string prefix(maxLength - 3, 'a'); + + struct TestCase { + std::string input; + std::string suffix; + std::string note; + }; + + TestCase tests[] = { + {prefix + "LMNOPQ", "LMN", "unescaped 1-byte octet"}, + // "\n\"NOPQ" expands to "\\n\\\"NOPQ" after escape, and the limit + // is reached at the 2nd '\\' octet, but since it splits the "\\\"" + // sequence, the actual truncation happens after the 'n' octet. + {prefix + "\n\"NOPQ", "\n", "2-byte escape sequence"}, + // "L\vNOPQ" expands to "L\\u000bNOPQ" after escape, and the limit + // is reached at the 'u' octet, so the entire sequence is truncated. + {prefix + "L\vNOPQ", "L", "multi-byte escape sequence"}, + {prefix + "LM\xC3\xB1PQ", "LM", "2-byte UTF-8 sequence"}, + {prefix + "L\xE1\x9B\x8FPQ", "L", "3-byte UTF-8 sequence"}, + {prefix + "L\xF0\x90\x8C\xBCQ", "L", "4-byte UTF-8 sequence"}, + {prefix + "\xE1\x9B\x8E\xE1\x9B\x8F", "\xE1\x9B\x8E", "UTF-8 codepoint boundary"}, + // The invalid UTF-8 codepoint 0xC3 is replaced with "\\ufffd", and truncated entirely + {prefix + "L\xC3NOPQ", "L", "escaped invalid codepoint"}, + {std::string(maxLength, '\\'), "\\", "escaped backslash"}, + }; + + for (const auto& [input, suffix, note] : tests) { + LOGV2(6694001, "name", "name"_attr = input); + BSONObj obj = fromjson(lines.back()); + + auto str = obj[constants::kAttributesFieldName]["name"].checkAndGetStringData(); + std::string context = "Failed test: " + note; + + ASSERT_LTE(str.size(), maxLength) << context; + ASSERT(str.endsWith(suffix)) + << context << " - string " << str << " does not end with " << suffix; + + auto trunc = obj[constants::kTruncatedFieldName]["name"]; + ASSERT_EQUALS(trunc["type"].String(), typeName(BSONType::String)) << context; + ASSERT_EQUALS(trunc["size"].numberLong(), str::escapeForJSON(input).size()) << context; + } +} + TEST_F(LogV2Test, Threads) { auto linesPlain = makeLineCapture(PlainFormatter()); auto linesText = makeLineCapture(TextFormatter()); -- cgit v1.2.1