summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2020-02-04 08:20:25 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-11 22:09:53 +0000
commit0245f85a0d296c7084ac564ff88dfa5935803c38 (patch)
tree907a018e6bdb80788648d9355b83fa246fb0ca57 /src/mongo
parentef15d9830681a1c4fe70cc5670b084a3530c27bc (diff)
downloadmongo-0245f85a0d296c7084ac564ff88dfa5935803c38.tar.gz
SERVER-46017 Add truncation support for JSON formatter
Truncated objects will report truncation status in 'truncated' and 'size' sub objects.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/bson/bsonelement.cpp133
-rw-r--r--src/mongo/bson/bsonelement.h53
-rw-r--r--src/mongo/bson/bsonobj.cpp99
-rw-r--r--src/mongo/bson/bsonobj.h47
-rw-r--r--src/mongo/db/initialize_server_global_state.cpp21
-rw-r--r--src/mongo/db/initialize_server_global_state.idl13
-rw-r--r--src/mongo/logv2/bson_formatter.h6
-rw-r--r--src/mongo/logv2/constants.h5
-rw-r--r--src/mongo/logv2/json_formatter.cpp92
-rw-r--r--src/mongo/logv2/json_formatter.h10
-rw-r--r--src/mongo/logv2/log_domain_global.cpp22
-rw-r--r--src/mongo/logv2/log_domain_global.h18
-rw-r--r--src/mongo/logv2/log_test_v2.cpp95
-rw-r--r--src/mongo/logv2/plain_formatter.cpp13
-rw-r--r--src/mongo/logv2/plain_formatter.h10
-rw-r--r--src/mongo/logv2/text_formatter.h5
-rw-r--r--src/mongo/util/str.h41
-rw-r--r--src/mongo/util/str_test.cpp27
18 files changed, 525 insertions, 185 deletions
diff --git a/src/mongo/bson/bsonelement.cpp b/src/mongo/bson/bsonelement.cpp
index b2746f1ddf1..667b5eeb079 100644
--- a/src/mongo/bson/bsonelement.cpp
+++ b/src/mongo/bson/bsonelement.cpp
@@ -67,39 +67,55 @@ const double BSONElement::kLongLongMaxPlusOneAsDouble =
scalbn(1, std::numeric_limits<long long>::digits);
std::string BSONElement::jsonString(JsonStringFormat format,
+ bool includeSeparator,
bool includeFieldNames,
- int pretty) const {
+ int pretty,
+ size_t writeLimit,
+ BSONObj* outTruncationResult) const {
fmt::memory_buffer buffer;
- jsonStringBuffer(format, includeFieldNames, pretty, buffer);
+ BSONObj truncation =
+ jsonStringBuffer(format, includeSeparator, includeFieldNames, pretty, buffer, writeLimit);
+ if (outTruncationResult) {
+ *outTruncationResult = truncation;
+ }
return fmt::to_string(buffer);
}
-void BSONElement::jsonStringBuffer(JsonStringFormat format,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const {
+BSONObj BSONElement::jsonStringBuffer(JsonStringFormat format,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
auto withGenerator = [&](auto&& gen) {
- jsonStringGenerator(gen, includeFieldNames, pretty, buffer);
+ return jsonStringGenerator(
+ gen, includeSeparator, includeFieldNames, pretty, buffer, writeLimit);
};
if (format == ExtendedCanonicalV2_0_0)
- withGenerator(ExtendedCanonicalV200Generator());
+ return withGenerator(ExtendedCanonicalV200Generator());
else if (format == ExtendedRelaxedV2_0_0)
- withGenerator(ExtendedRelaxedV200Generator());
+ return withGenerator(ExtendedRelaxedV200Generator());
else if (format == LegacyStrict) {
- withGenerator(LegacyStrictGenerator());
+ return withGenerator(LegacyStrictGenerator());
} else {
MONGO_UNREACHABLE;
}
}
template <typename Generator>
-void BSONElement::_jsonStringGenerator(const Generator& g,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const {
+BSONObj BSONElement::_jsonStringGenerator(const Generator& g,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ size_t before = buffer.size();
+ if (includeSeparator)
+ buffer.push_back(',');
+
if (includeFieldNames) {
g.writePadding(buffer);
- g.writeString(buffer, fieldName());
+ g.writeString(buffer, fieldNameStringData());
g.writePadding(buffer);
buffer.push_back(':');
}
@@ -134,12 +150,28 @@ void BSONElement::_jsonStringGenerator(const Generator& g,
case Undefined:
g.writeUndefined(buffer);
break;
- case Object:
- embeddedObject().jsonStringGenerator(g, pretty ? pretty + 1 : 0, false, buffer);
- break;
- case mongo::Array:
- embeddedObject().jsonStringGenerator(g, pretty ? pretty + 1 : 0, true, buffer);
- break;
+ case Object: {
+ BSONObj truncated = embeddedObject().jsonStringGenerator(
+ g, pretty ? pretty + 1 : 0, false, buffer, writeLimit);
+ if (!truncated.isEmpty()) {
+ BSONObjBuilder builder;
+ builder.append(fieldNameStringData(), truncated);
+ return builder.obj();
+ }
+ // return to not check the write limit below, we're not in a leaf
+ return truncated;
+ }
+ case mongo::Array: {
+ BSONObj truncated = embeddedObject().jsonStringGenerator(
+ g, pretty ? pretty + 1 : 0, true, buffer, writeLimit);
+ if (!truncated.isEmpty()) {
+ BSONObjBuilder builder;
+ builder.append(fieldNameStringData(), truncated);
+ return builder.obj();
+ }
+ // return to not check the write limit below, we're not in a leaf
+ return truncated;
+ }
case DBRef:
// valuestrsize() returns the size including the null terminator
g.writeDBRef(buffer,
@@ -187,25 +219,46 @@ void BSONElement::_jsonStringGenerator(const Generator& g,
default:
MONGO_UNREACHABLE;
}
-}
-
-void BSONElement::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, includeFieldNames, pretty, buffer);
-}
-void BSONElement::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, includeFieldNames, pretty, buffer);
-}
-void BSONElement::jsonStringGenerator(LegacyStrictGenerator const& generator,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, includeFieldNames, pretty, buffer);
+ // If a write limit is enabled and we went over it, record truncation info and roll back buffer.
+ if (writeLimit > 0 && buffer.size() > writeLimit) {
+ buffer.resize(before);
+
+ BSONObjBuilder builder;
+ BSONObjBuilder truncationInfo = builder.subobjStart(fieldNameStringData());
+ truncationInfo.append("type"_sd, typeName(type()));
+ truncationInfo.append("size"_sd, valuesize());
+ truncationInfo.done();
+ return builder.obj();
+ }
+ return BSONObj();
+}
+
+BSONObj BSONElement::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(
+ generator, includeSeparator, includeFieldNames, pretty, buffer, writeLimit);
+}
+BSONObj BSONElement::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(
+ generator, includeSeparator, includeFieldNames, pretty, buffer, writeLimit);
+}
+BSONObj BSONElement::jsonStringGenerator(LegacyStrictGenerator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(
+ generator, includeSeparator, includeFieldNames, pretty, buffer, writeLimit);
}
namespace {
diff --git a/src/mongo/bson/bsonelement.h b/src/mongo/bson/bsonelement.h
index a012ee3c169..e3a0e37a2bc 100644
--- a/src/mongo/bson/bsonelement.h
+++ b/src/mongo/bson/bsonelement.h
@@ -212,26 +212,37 @@ public:
int depth = 0) const;
std::string jsonString(JsonStringFormat format,
+ bool includeSeparator,
bool includeFieldNames = true,
- int pretty = 0) const;
+ int pretty = 0,
+ size_t writeLimit = 0,
+ BSONObj* outTruncationResult = nullptr) const;
- void jsonStringBuffer(JsonStringFormat format,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const;
-
- void jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const;
- void jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const;
- void jsonStringGenerator(LegacyStrictGenerator const& generator,
+ BSONObj jsonStringBuffer(JsonStringFormat format,
+ bool includeSeparator,
bool includeFieldNames,
int pretty,
- fmt::memory_buffer& buffer) const;
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+
+ BSONObj jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+ BSONObj jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+ BSONObj jsonStringGenerator(LegacyStrictGenerator const& generator,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
operator std::string() const {
return toString();
@@ -772,10 +783,12 @@ public:
private:
template <typename Generator>
- void _jsonStringGenerator(const Generator& g,
- bool includeFieldNames,
- int pretty,
- fmt::memory_buffer& buffer) const;
+ BSONObj _jsonStringGenerator(const Generator& g,
+ bool includeSeparator,
+ bool includeFieldNames,
+ int pretty,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const;
const char* data;
int fieldNameSize_; // internal size includes null terminator
diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp
index 6fab09e94ab..5ed88c978aa 100644
--- a/src/mongo/bson/bsonobj.cpp
+++ b/src/mongo/bson/bsonobj.cpp
@@ -144,72 +144,91 @@ BSONObj BSONObj::getOwned(const BSONObj& obj) {
}
template <typename Generator>
-void BSONObj::_jsonStringGenerator(const Generator& g,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const {
+BSONObj BSONObj::_jsonStringGenerator(const Generator& g,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
if (isEmpty()) {
fmt::format_to(buffer, "{}", isArray ? "[]" : "{}");
- return;
+ return BSONObj();
}
buffer.push_back(isArray ? '[' : '{');
BSONObjIterator i(*this);
BSONElement e = i.next();
- if (!e.eoo())
+ BSONObj truncation;
+ if (!e.eoo()) {
+ bool writeSeparator = false;
while (1) {
- e.jsonStringGenerator(g, !isArray, pretty, buffer);
+ truncation =
+ e.jsonStringGenerator(g, writeSeparator, !isArray, pretty, buffer, writeLimit);
e = i.next();
- if (e.eoo()) {
+ if (!truncation.isEmpty() || e.eoo()) {
g.writePadding(buffer);
break;
}
- buffer.push_back(',');
+ writeSeparator = true;
if (pretty) {
fmt::format_to(buffer, "{: <{}}", '\n', pretty * 2);
}
}
+ }
buffer.push_back(isArray ? ']' : '}');
-}
-
-void BSONObj::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, pretty, isArray, buffer);
-}
-void BSONObj::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, pretty, isArray, buffer);
-}
-void BSONObj::jsonStringGenerator(LegacyStrictGenerator const& generator,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const {
- _jsonStringGenerator(generator, pretty, isArray, buffer);
-}
-
-std::string BSONObj::jsonString(JsonStringFormat format, int pretty, bool isArray) const {
+ return truncation;
+}
+
+BSONObj BSONObj::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(generator, pretty, isArray, buffer, writeLimit);
+}
+BSONObj BSONObj::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(generator, pretty, isArray, buffer, writeLimit);
+}
+BSONObj BSONObj::jsonStringGenerator(LegacyStrictGenerator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ return _jsonStringGenerator(generator, pretty, isArray, buffer, writeLimit);
+}
+
+std::string BSONObj::jsonString(JsonStringFormat format,
+ int pretty,
+ bool isArray,
+ size_t writeLimit,
+ BSONObj* outTruncationResult) const {
fmt::memory_buffer buffer;
- jsonStringBuffer(format, pretty, isArray, buffer);
+ BSONObj truncation = jsonStringBuffer(format, pretty, isArray, buffer);
+ if (outTruncationResult) {
+ *outTruncationResult = truncation;
+ }
return fmt::to_string(buffer);
}
-void BSONObj::jsonStringBuffer(JsonStringFormat format,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const {
- auto withGenerator = [&](auto&& gen) { jsonStringGenerator(gen, pretty, isArray, buffer); };
+BSONObj BSONObj::jsonStringBuffer(JsonStringFormat format,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const {
+ auto withGenerator = [&](auto&& gen) {
+ return jsonStringGenerator(gen, pretty, isArray, buffer, writeLimit);
+ };
if (format == ExtendedCanonicalV2_0_0) {
- withGenerator(ExtendedCanonicalV200Generator());
+ return withGenerator(ExtendedCanonicalV200Generator());
} else if (format == ExtendedRelaxedV2_0_0) {
- withGenerator(ExtendedRelaxedV200Generator());
+ return withGenerator(ExtendedRelaxedV200Generator());
} else if (format == LegacyStrict) {
- withGenerator(LegacyStrictGenerator());
+ return withGenerator(LegacyStrictGenerator());
} else {
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h
index 94af60877e8..6df5e45ce57 100644
--- a/src/mongo/bson/bsonobj.h
+++ b/src/mongo/bson/bsonobj.h
@@ -266,25 +266,31 @@ public:
*/
std::string jsonString(JsonStringFormat format = ExtendedCanonicalV2_0_0,
int pretty = 0,
- bool isArray = false) const;
+ bool isArray = false,
+ size_t writeLimit = 0,
+ BSONObj* outTruncationResult = nullptr) const;
- void jsonStringBuffer(JsonStringFormat format,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const;
-
- void jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const;
- void jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const;
- void jsonStringGenerator(LegacyStrictGenerator const& generator,
+ BSONObj jsonStringBuffer(JsonStringFormat format,
int pretty,
bool isArray,
- fmt::memory_buffer& buffer) const;
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+
+ BSONObj jsonStringGenerator(ExtendedCanonicalV200Generator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+ BSONObj jsonStringGenerator(ExtendedRelaxedV200Generator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
+ BSONObj jsonStringGenerator(LegacyStrictGenerator const& generator,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit = 0) const;
/** note: addFields always adds _id even if not specified */
int addFields(BSONObj& from, std::set<std::string>& fields); /* returns n added */
@@ -603,10 +609,11 @@ public:
private:
template <typename Generator>
- void _jsonStringGenerator(const Generator& g,
- int pretty,
- bool isArray,
- fmt::memory_buffer& buffer) const;
+ BSONObj _jsonStringGenerator(const Generator& g,
+ int pretty,
+ bool isArray,
+ fmt::memory_buffer& buffer,
+ size_t writeLimit) const;
void _assertInvalid(int maxSize) const;
diff --git a/src/mongo/db/initialize_server_global_state.cpp b/src/mongo/db/initialize_server_global_state.cpp
index f0105085c30..2d262167793 100644
--- a/src/mongo/db/initialize_server_global_state.cpp
+++ b/src/mongo/db/initialize_server_global_state.cpp
@@ -220,10 +220,11 @@ MONGO_INITIALIZER_GENERAL(ServerLogRedirection,
using logger::StatusWithRotatableFileWriter;
// Hook up this global into our logging encoder
- MessageEventDetailsEncoder::setMaxLogSizeKBSource(gMaxLogSizeKB);
LogManager* manager = logger::globalLogManager();
auto& lv2Manager = logv2::LogManager::global();
logv2::LogDomainGlobal::ConfigurationOptions lv2Config;
+ MessageEventDetailsEncoder::setMaxLogSizeKBSource(gMaxLogAttributeSizeKB);
+ lv2Config.maxAttributeSizeKB = &gMaxLogAttributeSizeKB;
if (serverGlobalParams.logWithSyslog) {
#ifdef _WIN32
@@ -239,9 +240,9 @@ MONGO_INITIALIZER_GENERAL(ServerLogRedirection,
javascriptAppender = std::make_unique<logger::LogV2Appender<MessageEventEphemeral>>(
&(lv2Manager.getGlobalDomain()), true);
- lv2Config._consoleEnabled = false;
- lv2Config._syslogEnabled = true;
- lv2Config._syslogFacility = serverGlobalParams.syslogFacility;
+ lv2Config.consoleEnabled = false;
+ lv2Config.syslogEnabled = true;
+ lv2Config.syslogFacility = serverGlobalParams.syslogFacility;
} else {
using logger::SyslogAppender;
StringBuilder sb;
@@ -309,13 +310,13 @@ MONGO_INITIALIZER_GENERAL(ServerLogRedirection,
javascriptAppender = std::make_unique<logger::LogV2Appender<MessageEventEphemeral>>(
&(lv2Manager.getGlobalDomain()), true);
- lv2Config._consoleEnabled = false;
- lv2Config._fileEnabled = true;
- lv2Config._filePath = absoluteLogpath;
- lv2Config._fileRotationMode = serverGlobalParams.logRenameOnRotate
+ lv2Config.consoleEnabled = false;
+ lv2Config.fileEnabled = true;
+ lv2Config.filePath = absoluteLogpath;
+ lv2Config.fileRotationMode = serverGlobalParams.logRenameOnRotate
? logv2::LogDomainGlobal::ConfigurationOptions::RotationMode::kRename
: logv2::LogDomainGlobal::ConfigurationOptions::RotationMode::kReopen;
- lv2Config._fileOpenMode = serverGlobalParams.logAppend
+ lv2Config.fileOpenMode = serverGlobalParams.logAppend
? logv2::LogDomainGlobal::ConfigurationOptions::OpenMode::kAppend
: logv2::LogDomainGlobal::ConfigurationOptions::OpenMode::kTruncate;
@@ -372,7 +373,7 @@ MONGO_INITIALIZER_GENERAL(ServerLogRedirection,
if (logV2Enabled()) {
- lv2Config._format = serverGlobalParams.logFormat;
+ lv2Config.format = serverGlobalParams.logFormat;
return lv2Manager.getGlobalDomainInternal().configure(lv2Config);
}
diff --git a/src/mongo/db/initialize_server_global_state.idl b/src/mongo/db/initialize_server_global_state.idl
index c25d78fa07f..7dd9cafd850 100644
--- a/src/mongo/db/initialize_server_global_state.idl
+++ b/src/mongo/db/initialize_server_global_state.idl
@@ -29,16 +29,19 @@ global:
cpp_namespace: mongo
cpp_includes:
- mongo/logger/message_event_utf8_encoder.h
+ - mongo/logv2/constants.h
server_parameters:
maxLogSizeKB:
- cpp_varname: gMaxLogSizeKB
- cpp_vartype: AtomicWord<int>
+ cpp_varname: gMaxLogAttributeSizeKB
+ cpp_vartype: AtomicWord<int32_t>
default:
- expr: logger::LogContext::kDefaultMaxLogSizeKB
- description: 'Max log size in kilobytes'
+ expr: logv2::constants::kDefaultMaxAttributeOutputSizeKB
+ validator:
+ gte: 0
+ description: 'Max log attribute size in kilobytes'
set_at: [ startup, runtime ]
-
+
honorSystemUmask:
description: 'Use the system provided umask, rather than overriding with processUmask config value'
set_at: startup
diff --git a/src/mongo/logv2/bson_formatter.h b/src/mongo/logv2/bson_formatter.h
index 27717f633fb..0e9479d65b8 100644
--- a/src/mongo/logv2/bson_formatter.h
+++ b/src/mongo/logv2/bson_formatter.h
@@ -32,14 +32,14 @@
#include <boost/log/core/record_view.hpp>
#include <boost/log/utility/formatting_ostream_fwd.hpp>
+#include "mongo/logv2/constants.h"
+
namespace mongo {
namespace logv2 {
class BSONFormatter {
public:
- static bool binary() {
- return true;
- };
+ BSONFormatter(const AtomicWord<int32_t>* maxAttributeSizeKB = nullptr) {}
void operator()(boost::log::record_view const& rec, boost::log::formatting_ostream& strm) const;
};
diff --git a/src/mongo/logv2/constants.h b/src/mongo/logv2/constants.h
index 6b16cccbc8a..81bf4c21618 100644
--- a/src/mongo/logv2/constants.h
+++ b/src/mongo/logv2/constants.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/base/string_data.h"
#include "mongo/logv2/log_truncation.h"
namespace mongo::logv2::constants {
@@ -45,12 +46,14 @@ constexpr StringData kContextFieldName = "ctx"_sd;
constexpr StringData kIdFieldName = "id"_sd;
constexpr StringData kMessageFieldName = "msg"_sd;
constexpr StringData kAttributesFieldName = "attr"_sd;
+constexpr StringData kTruncatedFieldName = "truncated"_sd;
+constexpr StringData kTruncatedSizeFieldName = "size"_sd;
constexpr StringData kTagsFieldName = "tags"_sd;
// String to be used when logging empty boost::optional with the text formatter
constexpr StringData kNullOptionalString = "(nothing)"_sd;
constexpr LogTruncation kDefaultTruncation = LogTruncation::Enabled;
-constexpr size_t kDefaultMaxAttributeOutputSize = 10 * 1024;
+constexpr int32_t kDefaultMaxAttributeOutputSizeKB = 10;
} // namespace mongo::logv2::constants
diff --git a/src/mongo/logv2/json_formatter.cpp b/src/mongo/logv2/json_formatter.cpp
index 22583dd45eb..2f579f66809 100644
--- a/src/mongo/logv2/json_formatter.cpp
+++ b/src/mongo/logv2/json_formatter.cpp
@@ -42,6 +42,7 @@
#include "mongo/logv2/log_component.h"
#include "mongo/logv2/log_severity.h"
#include "mongo/logv2/log_tag.h"
+#include "mongo/logv2/log_truncation.h"
#include "mongo/logv2/name_extractor.h"
#include "mongo/util/str_escape.h"
#include "mongo/util/time_support.h"
@@ -51,7 +52,8 @@
namespace mongo::logv2 {
namespace {
struct JSONValueExtractor {
- JSONValueExtractor(fmt::memory_buffer& buffer) : _buffer(buffer) {}
+ JSONValueExtractor(fmt::memory_buffer& buffer, size_t attributeMaxSize)
+ : _buffer(buffer), _attributeMaxSize(attributeMaxSize) {}
void operator()(StringData name, CustomAttributeValue const& val) {
// Try to format as BSON first if available. Prefer BSONAppend if available as we might only
@@ -61,20 +63,32 @@ struct JSONValueExtractor {
val.BSONAppend(builder, name);
// This is a JSON subobject, no quotes needed
storeUnquoted(name);
- builder.done().getField(name).jsonStringBuffer(
- JsonStringFormat::ExtendedRelaxedV2_0_0, false, 0, _buffer);
+ BSONElement element = builder.done().getField(name);
+ BSONObj truncated = element.jsonStringBuffer(JsonStringFormat::ExtendedRelaxedV2_0_0,
+ false,
+ false,
+ 0,
+ _buffer,
+ _attributeMaxSize);
+ addTruncationReport(name, truncated, element.size());
} else if (val.BSONSerialize) {
// This is a JSON subobject, no quotes needed
storeUnquoted(name);
BSONObjBuilder builder;
val.BSONSerialize(builder);
- builder.done().jsonStringBuffer(
- JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer);
+ BSONObj obj = builder.done();
+ BSONObj truncated = obj.jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer, _attributeMaxSize);
+ addTruncationReport(name, truncated, builder.done().objsize());
+
} else if (val.toBSONArray) {
// This is a JSON subarray, no quotes needed
storeUnquoted(name);
- val.toBSONArray().jsonStringBuffer(
- JsonStringFormat::ExtendedRelaxedV2_0_0, 0, true, _buffer);
+ BSONArray arr = val.toBSONArray();
+ BSONObj truncated = arr.jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, true, _buffer, _attributeMaxSize);
+ addTruncationReport(name, truncated, arr.objsize());
+
} else if (val.stringSerialize) {
fmt::memory_buffer intermediate;
val.stringSerialize(intermediate);
@@ -88,13 +102,17 @@ struct JSONValueExtractor {
void operator()(StringData name, const BSONObj* val) {
// This is a JSON subobject, no quotes needed
storeUnquoted(name);
- val->jsonStringBuffer(JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer);
+ BSONObj truncated = val->jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer, _attributeMaxSize);
+ addTruncationReport(name, truncated, val->objsize());
}
void operator()(StringData name, const BSONArray* val) {
// This is a JSON subobject, no quotes needed
storeUnquoted(name);
- val->jsonStringBuffer(JsonStringFormat::ExtendedRelaxedV2_0_0, 0, true, _buffer);
+ BSONObj truncated = val->jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, true, _buffer, _attributeMaxSize);
+ addTruncationReport(name, truncated, val->objsize());
}
void operator()(StringData name, StringData value) {
@@ -113,6 +131,13 @@ struct JSONValueExtractor {
storeUnquotedValue(name, value);
}
+ BSONObj truncated() {
+ return _truncated.done();
+ }
+
+ BSONObj truncatedSizes() {
+ return _truncatedSizes.done();
+ }
private:
void storeUnquoted(StringData name) {
@@ -129,13 +154,37 @@ private:
template <typename T>
void storeQuoted(StringData name, const T& value) {
fmt::format_to(_buffer, R"({}"{}":")", _separator, name);
+ std::size_t before = _buffer.size();
str::escapeForJSON(_buffer, value);
+ if (_attributeMaxSize != 0) {
+ auto truncatedEnd =
+ str::UTF8SafeTruncation(_buffer.begin() + before, _buffer.end(), _attributeMaxSize);
+ if (truncatedEnd != _buffer.end()) {
+ BSONObjBuilder truncationInfo = _truncated.subobjStart(name);
+ truncationInfo.append("type"_sd, typeName(BSONType::String));
+ truncationInfo.append("size"_sd, static_cast<int64_t>(_buffer.size() - before));
+ truncationInfo.done();
+ }
+
+ _buffer.resize(truncatedEnd - _buffer.begin());
+ }
+
_buffer.push_back('"');
_separator = ","_sd;
}
+ void addTruncationReport(StringData name, const BSONObj& truncated, int64_t objsize) {
+ if (!truncated.isEmpty()) {
+ _truncated.append(name, truncated);
+ _truncatedSizes.append(name, objsize);
+ }
+ }
+
fmt::memory_buffer& _buffer;
+ BSONObjBuilder _truncated;
+ BSONObjBuilder _truncatedSizes;
StringData _separator = ""_sd;
+ size_t _attributeMaxSize;
};
} // namespace
@@ -196,17 +245,34 @@ void JSONFormatter::operator()(boost::log::record_view const& rec,
if (!attrs.empty()) {
fmt::format_to(buffer, R"(,"{}":{{)", constants::kAttributesFieldName);
// comma separated list of attributes (no opening/closing brace are added here)
- JSONValueExtractor extractor(buffer);
+ size_t attributeMaxSize = 0;
+ if (extract<LogTruncation>(attributes::truncation(), rec).get() == LogTruncation::Enabled) {
+ if (_maxAttributeSizeKB)
+ attributeMaxSize = _maxAttributeSizeKB->loadRelaxed() * 1024;
+ else
+ attributeMaxSize = constants::kDefaultMaxAttributeOutputSizeKB * 1024;
+ }
+ JSONValueExtractor extractor(buffer, attributeMaxSize);
attrs.apply(extractor);
+ buffer.push_back('}');
+
+ if (BSONObj truncated = extractor.truncated(); !truncated.isEmpty()) {
+ fmt::format_to(buffer, R"(,"{}":)", constants::kTruncatedFieldName);
+ truncated.jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, buffer, 0);
+ }
+
+ if (BSONObj truncatedSizes = extractor.truncatedSizes(); !truncatedSizes.isEmpty()) {
+ fmt::format_to(buffer, R"(,"{}":)", constants::kTruncatedSizeFieldName);
+ truncatedSizes.jsonStringBuffer(
+ JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, buffer, 0);
+ }
}
// Add remaining fields
fmt::format_to(buffer,
- R"({})" // optional attribute closing
R"({})" // optional tags
R"(}})",
- // closing brace
- attrs.empty() ? "" : "}",
// tags
tag);
diff --git a/src/mongo/logv2/json_formatter.h b/src/mongo/logv2/json_formatter.h
index ba90441b4a2..fb33dcde399 100644
--- a/src/mongo/logv2/json_formatter.h
+++ b/src/mongo/logv2/json_formatter.h
@@ -32,15 +32,19 @@
#include <boost/log/core/record_view.hpp>
#include <boost/log/utility/formatting_ostream_fwd.hpp>
+#include "mongo/logv2/constants.h"
+
namespace mongo::logv2 {
class JSONFormatter {
public:
- static bool binary() {
- return false;
- };
+ JSONFormatter(const AtomicWord<int32_t>* maxAttributeSizeKB = nullptr)
+ : _maxAttributeSizeKB(maxAttributeSizeKB) {}
void operator()(boost::log::record_view const& rec, boost::log::formatting_ostream& strm) const;
+
+private:
+ const AtomicWord<int32_t>* _maxAttributeSizeKB;
};
} // namespace mongo::logv2
diff --git a/src/mongo/logv2/log_domain_global.cpp b/src/mongo/logv2/log_domain_global.cpp
index d80be9f65d7..50b378f4c0e 100644
--- a/src/mongo/logv2/log_domain_global.cpp
+++ b/src/mongo/logv2/log_domain_global.cpp
@@ -49,7 +49,7 @@ namespace mongo {
namespace logv2 {
void LogDomainGlobal::ConfigurationOptions::makeDisabled() {
- _consoleEnabled = false;
+ consoleEnabled = false;
}
struct LogDomainGlobal::Impl {
@@ -96,12 +96,12 @@ LogDomainGlobal::Impl::Impl(LogDomainGlobal& parent) : _parent(parent) {
Status LogDomainGlobal::Impl::configure(LogDomainGlobal::ConfigurationOptions const& options) {
#ifndef _WIN32
- if (options._syslogEnabled) {
+ if (options.syslogEnabled) {
// Create a backend
auto backend = boost::make_shared<SyslogBackend>(
boost::make_shared<boost::log::sinks::syslog_backend>(
boost::log::keywords::facility =
- boost::log::sinks::syslog::make_facility(options._syslogFacility),
+ boost::log::sinks::syslog::make_facility(options.syslogFacility),
boost::log::keywords::use_impl = boost::log::sinks::syslog::native),
boost::make_shared<RamLogSink>(RamLog::get("global")),
boost::make_shared<RamLogSink>(RamLog::get("startupWarnings")));
@@ -133,22 +133,22 @@ Status LogDomainGlobal::Impl::configure(LogDomainGlobal::ConfigurationOptions co
}
#endif
- if (options._consoleEnabled && _consoleSink.use_count() == 1) {
+ if (options.consoleEnabled && _consoleSink.use_count() == 1) {
boost::log::core::get()->add_sink(_consoleSink);
}
- if (!options._consoleEnabled && _consoleSink.use_count() > 1) {
+ if (!options.consoleEnabled && _consoleSink.use_count() > 1) {
boost::log::core::get()->remove_sink(_consoleSink);
}
- if (options._fileEnabled) {
+ if (options.fileEnabled) {
auto backend = boost::make_shared<RotatableFileBackend>(
boost::make_shared<FileRotateSink>(),
boost::make_shared<RamLogSink>(RamLog::get("global")),
boost::make_shared<RamLogSink>(RamLog::get("startupWarnings")));
Status ret = backend->lockedBackend<0>()->addFile(
- options._filePath,
- options._fileOpenMode == ConfigurationOptions::OpenMode::kAppend ? true : false);
+ options.filePath,
+ options.fileOpenMode == ConfigurationOptions::OpenMode::kAppend ? true : false);
if (!ret.isOK())
return ret;
backend->lockedBackend<0>()->auto_flush(true);
@@ -173,13 +173,13 @@ Status LogDomainGlobal::Impl::configure(LogDomainGlobal::ConfigurationOptions co
#endif
};
- switch (options._format) {
+ switch (options.format) {
case LogFormat::kDefault:
case LogFormat::kText:
- setFormatters([] { return TextFormatter(); });
+ setFormatters([&] { return TextFormatter(options.maxAttributeSizeKB); });
break;
case LogFormat::kJson:
- setFormatters([] { return JSONFormatter(); });
+ setFormatters([&] { return JSONFormatter(options.maxAttributeSizeKB); });
break;
}
diff --git a/src/mongo/logv2/log_domain_global.h b/src/mongo/logv2/log_domain_global.h
index 03a09562c07..a956806b621 100644
--- a/src/mongo/logv2/log_domain_global.h
+++ b/src/mongo/logv2/log_domain_global.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/logv2/constants.h"
#include "mongo/logv2/log_domain_internal.h"
#include "mongo/logv2/log_format.h"
@@ -40,14 +41,15 @@ public:
enum class RotationMode { kRename, kReopen };
enum class OpenMode { kTruncate, kAppend };
- bool _consoleEnabled{true};
- bool _fileEnabled{false};
- std::string _filePath;
- RotationMode _fileRotationMode{RotationMode::kRename};
- OpenMode _fileOpenMode{OpenMode::kTruncate};
- bool _syslogEnabled{false};
- int _syslogFacility{-1}; // invalid facility by default, must be set
- LogFormat _format{LogFormat::kDefault};
+ bool consoleEnabled{true};
+ bool fileEnabled{false};
+ std::string filePath;
+ RotationMode fileRotationMode{RotationMode::kRename};
+ OpenMode fileOpenMode{OpenMode::kTruncate};
+ bool syslogEnabled{false};
+ int syslogFacility{-1}; // invalid facility by default, must be set
+ LogFormat format{LogFormat::kDefault};
+ const AtomicWord<int32_t>* maxAttributeSizeKB = nullptr;
void makeDisabled();
};
diff --git a/src/mongo/logv2/log_test_v2.cpp b/src/mongo/logv2/log_test_v2.cpp
index 47bfae441d5..4b852b6c816 100644
--- a/src/mongo/logv2/log_test_v2.cpp
+++ b/src/mongo/logv2/log_test_v2.cpp
@@ -1127,6 +1127,101 @@ TEST_F(LogTestV2, Unicode) {
}
}
+TEST_F(LogTestV2, JsonTruncation) {
+ using namespace constants;
+
+ std::vector<std::string> lines;
+ auto sink = LogCaptureBackend::create(lines);
+ sink->set_filter(ComponentSettingsFilter(LogManager::global().getGlobalDomain(),
+ LogManager::global().getGlobalSettings()));
+ sink->set_formatter(JSONFormatter());
+ attach(sink);
+
+ std::size_t maxAttributeOutputSize = constants::kDefaultMaxAttributeOutputSizeKB * 1024;
+
+ BSONObjBuilder builder;
+ BSONObjBuilder subobj = builder.subobjStart("sub"_sd);
+ subobj.append("small1", 1);
+ subobj.append("small2", "small string");
+ subobj.append("large", std::string(maxAttributeOutputSize * 2, 'a'));
+ subobj.append("small3", "small string after large object");
+ subobj.done();
+
+ LOGV2(20085, "{name}{attr2}", "name"_attr = builder.done(), "attr2"_attr = true);
+ auto validateTruncation = [&](const BSONObj& obj) {
+ // Check that all fields up until the large one is written
+ BSONObj sub = obj.getField(constants::kAttributesFieldName)
+ .Obj()
+ .getField("name"_sd)
+ .Obj()
+ .getField("sub"_sd)
+ .Obj();
+ ASSERT(sub.hasField("small1"_sd));
+ ASSERT(sub.hasField("small2"_sd));
+ ASSERT(!sub.hasField("large"_sd));
+ ASSERT(!sub.hasField("small3"_sd));
+
+ // The truncated field should we witten in the truncated and size sub object
+ BSONObj truncated = obj.getField(constants::kTruncatedFieldName).Obj();
+ BSONObj truncatedInfo =
+ truncated.getField("name"_sd).Obj().getField("sub"_sd).Obj().getField("large"_sd).Obj();
+ ASSERT_EQUALS(truncatedInfo.getField("type"_sd).String(), typeName(BSONType::String));
+ ASSERT(truncatedInfo.getField("size"_sd).isNumber());
+
+ ASSERT_EQUALS(
+ obj.getField(constants::kTruncatedSizeFieldName).Obj().getField("name"_sd).Int(),
+ builder.done().objsize());
+
+ // Attributes coming after the truncated one should be written
+ ASSERT(obj.getField(constants::kAttributesFieldName).Obj().getField("attr2").Bool());
+ };
+ validateTruncation(mongo::fromjson(lines.back()));
+
+ LOGV2_OPTIONS(20086, {LogTruncation::Disabled}, "{name}", "name"_attr = builder.done());
+ auto validateTruncationDisabled = [&](const BSONObj& obj) {
+ BSONObj sub = obj.getField(constants::kAttributesFieldName)
+ .Obj()
+ .getField("name"_sd)
+ .Obj()
+ .getField("sub"_sd)
+ .Obj();
+ // No truncation should occur
+ ASSERT(sub.hasField("small1"_sd));
+ ASSERT(sub.hasField("small2"_sd));
+ ASSERT(sub.hasField("large"_sd));
+ ASSERT(sub.hasField("small3"_sd));
+
+ ASSERT(!obj.hasField(constants::kTruncatedFieldName));
+ ASSERT(!obj.hasField(constants::kTruncatedSizeFieldName));
+ };
+ validateTruncationDisabled(mongo::fromjson(lines.back()));
+
+ BSONArrayBuilder arrBuilder;
+ // Fields will use more than one byte each so this will truncate at some point
+ for (size_t i = 0; i < maxAttributeOutputSize; ++i) {
+ arrBuilder.append("str");
+ }
+
+ BSONArray arrToLog = arrBuilder.arr();
+ LOGV2(20087, "{name}", "name"_attr = arrToLog);
+ auto validateArrayTruncation = [&](const BSONObj& obj) {
+ auto arr = obj.getField(constants::kAttributesFieldName).Obj().getField("name"_sd).Array();
+ ASSERT_LESS_THAN(arr.size(), maxAttributeOutputSize);
+
+ std::string truncatedFieldName = std::to_string(arr.size());
+ BSONObj truncated = obj.getField(constants::kTruncatedFieldName).Obj();
+ BSONObj truncatedInfo =
+ truncated.getField("name"_sd).Obj().getField(truncatedFieldName).Obj();
+ ASSERT_EQUALS(truncatedInfo.getField("type"_sd).String(), typeName(BSONType::String));
+ ASSERT(truncatedInfo.getField("size"_sd).isNumber());
+
+ ASSERT_EQUALS(
+ obj.getField(constants::kTruncatedSizeFieldName).Obj().getField("name"_sd).Int(),
+ arrToLog.objsize());
+ };
+ validateArrayTruncation(mongo::fromjson(lines.back()));
+}
+
TEST_F(LogTestV2, Threads) {
std::vector<std::string> linesPlain;
auto plainSink = LogCaptureBackend::create(linesPlain);
diff --git a/src/mongo/logv2/plain_formatter.cpp b/src/mongo/logv2/plain_formatter.cpp
index e156a1148ac..c0d3ccdf797 100644
--- a/src/mongo/logv2/plain_formatter.cpp
+++ b/src/mongo/logv2/plain_formatter.cpp
@@ -150,11 +150,14 @@ void PlainFormatter::operator()(boost::log::record_view const& rec,
to_string_view(message),
fmt::basic_format_args<fmt::format_context>(extractor.args.data(), extractor.args.size()));
- LogTruncation truncation = extract<LogTruncation>(attributes::truncation(), rec).get();
- strm.write(buffer.data(),
- truncation == LogTruncation::Enabled
- ? std::min(constants::kDefaultMaxAttributeOutputSize, buffer.size())
- : buffer.size());
+ size_t attributeMaxSize = buffer.size();
+ if (extract<LogTruncation>(attributes::truncation(), rec).get() == LogTruncation::Enabled) {
+ if (_maxAttributeSizeKB)
+ attributeMaxSize = _maxAttributeSizeKB->loadRelaxed() * 1024;
+ else
+ attributeMaxSize = constants::kDefaultMaxAttributeOutputSizeKB * 1024;
+ }
+ strm.write(buffer.data(), std::min(attributeMaxSize, buffer.size()));
}
} // namespace mongo::logv2
diff --git a/src/mongo/logv2/plain_formatter.h b/src/mongo/logv2/plain_formatter.h
index 7b869703058..e61cd2a9af2 100644
--- a/src/mongo/logv2/plain_formatter.h
+++ b/src/mongo/logv2/plain_formatter.h
@@ -32,16 +32,20 @@
#include <boost/log/core/record_view.hpp>
#include <boost/log/utility/formatting_ostream_fwd.hpp>
+#include "mongo/logv2/constants.h"
+
namespace mongo::logv2 {
// Text formatter without metadata. Just contains the formatted message.
class PlainFormatter {
public:
- static bool binary() {
- return false;
- };
+ PlainFormatter(const AtomicWord<int32_t>* maxAttributeSizeKB = nullptr)
+ : _maxAttributeSizeKB(maxAttributeSizeKB) {}
void operator()(boost::log::record_view const& rec, boost::log::formatting_ostream& strm) const;
+
+private:
+ const AtomicWord<int32_t>* _maxAttributeSizeKB;
};
} // namespace mongo::logv2
diff --git a/src/mongo/logv2/text_formatter.h b/src/mongo/logv2/text_formatter.h
index ccc61ffb0e0..6276b413e96 100644
--- a/src/mongo/logv2/text_formatter.h
+++ b/src/mongo/logv2/text_formatter.h
@@ -35,9 +35,8 @@ namespace mongo::logv2 {
class TextFormatter : protected PlainFormatter {
public:
- static bool binary() {
- return false;
- };
+ TextFormatter(const AtomicWord<int32_t>* maxAttributeSizeKB = nullptr)
+ : PlainFormatter(maxAttributeSizeKB) {}
void operator()(boost::log::record_view const& rec, boost::log::formatting_ostream& strm) const;
};
diff --git a/src/mongo/util/str.h b/src/mongo/util/str.h
index f36aa3b0f6d..ab0c71bb4bf 100644
--- a/src/mongo/util/str.h
+++ b/src/mongo/util/str.h
@@ -282,6 +282,47 @@ inline size_t lengthInUTF8CodePoints(mongo::StringData str) {
return strLen;
}
+// Performs truncation at closest UTF-8 codepoint boundary to guarantee the end result to be valid
+// UTF-8 Input encoding has to be valid UTF-8. Random-access iterator required
+template <typename Iterator>
+Iterator UTF8SafeTruncation(Iterator begin, Iterator end, std::size_t maximum) {
+ // If we are requesting more bytes than exists in the range, then there's nothing to do
+ if (static_cast<size_t>(end - begin) <= maximum)
+ return end;
+
+ const auto rbegin = std::make_reverse_iterator(begin + maximum);
+ const auto rend = std::make_reverse_iterator(begin);
+ auto it = rbegin;
+
+ // Look back until we find the beginning of a unicode codepoint, extract its expected number of
+ // bytes
+ int codepoint_bytes = 0;
+ for (; it != rend; ++it) {
+ if ((*it & 0b1000'0000) == 0) {
+ codepoint_bytes = 1;
+ break;
+ } else if ((*it & 0b1100'0000) == 0b1100'0000) {
+ codepoint_bytes = 2;
+ uint8_t byte = static_cast<uint8_t>(*it) << 1;
+ while ((codepoint_bytes < 4) && ((byte <<= 1) & 0b1000'0000))
+ ++codepoint_bytes;
+ break;
+ }
+ }
+
+ // Check we had the expected number of continuation bytes. If not skip this codepoint.
+ int offset = codepoint_bytes - 1;
+ if (std::distance(rbegin, it) != offset)
+ offset = -1; // This was a broken codepoint, go back one extra step to skip it
+
+ return it.base() + offset;
+}
+
+inline StringData UTF8SafeTruncation(StringData input, std::size_t maximum) {
+ auto truncatedEnd = UTF8SafeTruncation(input.begin(), input.end(), maximum);
+ return StringData(input.rawData(), truncatedEnd - input.begin());
+}
+
inline int caseInsensitiveCompare(const char* s1, const char* s2) {
#if defined(_WIN32)
return _stricmp(s1, s2);
diff --git a/src/mongo/util/str_test.cpp b/src/mongo/util/str_test.cpp
index 66ee0f77196..92259e342a4 100644
--- a/src/mongo/util/str_test.cpp
+++ b/src/mongo/util/str_test.cpp
@@ -280,4 +280,31 @@ TEST(StringUtilsTest, ConvertDoubleToStringWithProperPrecision) {
ASSERT_EQUALS(std::string("0.1000000006"), convertDoubleToString(0.1 + 6E-10, 10));
ASSERT_EQUALS(std::string("0.1"), convertDoubleToString(0.1 + 6E-8, 6));
}
+
+TEST(StringUtilsTest, UTF8SafeTruncation) {
+ // Empty string and ASCII works like normal truncation
+ ASSERT_EQUALS(UTF8SafeTruncation(""_sd, 10), ""_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("abcdefg"_sd, 5), "abcde"_sd);
+
+ // Valid 2 Octet sequences, LATIN SMALL LETTER N WITH TILDE
+ ASSERT_EQUALS(UTF8SafeTruncation("\u00f1\u00f1\u00f1"_sd, 1), ""_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u00f1\u00f1\u00f1"_sd, 4), "\u00f1\u00f1"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u00f1\u00f1\u00f1"_sd, 5), "\u00f1\u00f1"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u00f1\u00f1\u00f1"_sd, 6), "\u00f1\u00f1\u00f1"_sd);
+
+ // Valid 3 Octet sequences, RUNIC LETTER TIWAZ TIR TYR T
+ ASSERT_EQUALS(UTF8SafeTruncation("\u16cf\u16cf"_sd, 2), ""_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u16cf\u16cf"_sd, 3), "\u16cf"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u16cf\u16cf"_sd, 4), "\u16cf"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u16cf\u16cf"_sd, 5), "\u16cf"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\u16cf\u16cf"_sd, 6), "\u16cf\u16cf"_sd);
+
+ // Valid 4 Octet sequences, GOTHIC LETTER MANNA
+ ASSERT_EQUALS(UTF8SafeTruncation("\U0001033c\U0001033c"_sd, 4), "\U0001033c"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\U0001033c\U0001033c"_sd, 5), "\U0001033c"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\U0001033c\U0001033c"_sd, 6), "\U0001033c"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\U0001033c\U0001033c"_sd, 7), "\U0001033c"_sd);
+ ASSERT_EQUALS(UTF8SafeTruncation("\U0001033c\U0001033c"_sd, 8), "\U0001033c\U0001033c"_sd);
+}
+
} // namespace mongo::str