diff options
30 files changed, 1644 insertions, 938 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index fd7123f49e4..5f2237f4533 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -124,7 +124,6 @@ baseEnv.Library( 'logv2/log_tag.cpp', 'logv2/plain_formatter.cpp', 'logv2/ramlog.cpp', - 'logv2/string_escape.cpp', 'logv2/text_formatter.cpp', 'platform/decimal128.cpp', 'platform/mutex.cpp', @@ -144,6 +143,7 @@ baseEnv.Library( 'util/concurrency/idle_thread_block.cpp', 'util/concurrency/thread_name.cpp', 'util/duration.cpp', + 'util/str_escape.cpp', 'util/errno_util.cpp', 'util/exception_filter_win32.cpp', 'util/exit.cpp', diff --git a/src/mongo/bson/bsonelement.cpp b/src/mongo/bson/bsonelement.cpp index 67ea860f45d..b2746f1ddf1 100644 --- a/src/mongo/bson/bsonelement.cpp +++ b/src/mongo/bson/bsonelement.cpp @@ -39,6 +39,9 @@ #include "mongo/base/data_cursor.h" #include "mongo/base/parse_number.h" #include "mongo/base/simple_string_data_comparator.h" +#include "mongo/bson/generator_extended_canonical_2_0_0.h" +#include "mongo/bson/generator_extended_relaxed_2_0_0.h" +#include "mongo/bson/generator_legacy_strict.h" #include "mongo/db/jsobj.h" #include "mongo/platform/strnlen.h" #include "mongo/util/base64.h" @@ -63,282 +66,148 @@ using std::string; const double BSONElement::kLongLongMaxPlusOneAsDouble = scalbn(1, std::numeric_limits<long long>::digits); -string BSONElement::jsonString(JsonStringFormat format, bool includeFieldNames, int pretty) const { - std::stringstream s; - BSONElement::jsonStringStream(format, includeFieldNames, pretty, s); - return s.str(); +std::string BSONElement::jsonString(JsonStringFormat format, + bool includeFieldNames, + int pretty) const { + fmt::memory_buffer buffer; + jsonStringBuffer(format, includeFieldNames, pretty, buffer); + return fmt::to_string(buffer); } -void BSONElement::jsonStringStream(JsonStringFormat format, +void BSONElement::jsonStringBuffer(JsonStringFormat format, bool includeFieldNames, int pretty, - std::stringstream& s) const { - if (includeFieldNames) - s << '"' << str::escape(fieldName()) << "\" : "; + fmt::memory_buffer& buffer) const { + auto withGenerator = [&](auto&& gen) { + jsonStringGenerator(gen, includeFieldNames, pretty, buffer); + }; + if (format == ExtendedCanonicalV2_0_0) + withGenerator(ExtendedCanonicalV200Generator()); + else if (format == ExtendedRelaxedV2_0_0) + withGenerator(ExtendedRelaxedV200Generator()); + else if (format == LegacyStrict) { + withGenerator(LegacyStrictGenerator()); + } else { + MONGO_UNREACHABLE; + } +} + +template <typename Generator> +void BSONElement::_jsonStringGenerator(const Generator& g, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const { + if (includeFieldNames) { + g.writePadding(buffer); + g.writeString(buffer, fieldName()); + g.writePadding(buffer); + buffer.push_back(':'); + } + + g.writePadding(buffer); + switch (type()) { case mongo::String: + g.writeString(buffer, StringData(valuestr(), valuestrsize() - 1)); + break; case Symbol: - s << '"' << str::escape(string(valuestr(), valuestrsize() - 1)) << '"'; + g.writeSymbol(buffer, StringData(valuestr(), valuestrsize() - 1)); break; case NumberLong: - if (format == TenGen) { - s << "NumberLong(" << _numberLong() << ")"; - } else { - s << "{ \"$numberLong\" : \"" << _numberLong() << "\" }"; - } + g.writeInt64(buffer, _numberLong()); break; case NumberInt: - if (format == TenGen) { - s << "NumberInt(" << _numberInt() << ")"; - break; - } + g.writeInt32(buffer, _numberInt()); + break; case NumberDouble: - if (number() >= -std::numeric_limits<double>::max() && - number() <= std::numeric_limits<double>::max()) { - auto origPrecision = s.precision(); - auto guard = makeGuard([&s, origPrecision]() { s.precision(origPrecision); }); - s.precision(16); - s << number(); - } - // This is not valid JSON, but according to RFC-4627, "Numeric values that cannot be - // represented as sequences of digits (such as Infinity and NaN) are not permitted." so - // we are accepting the fact that if we have such values we cannot output valid JSON. - else if (std::isnan(number())) { - s << "NaN"; - } else if (std::isinf(number())) { - s << (number() > 0 ? "Infinity" : "-Infinity"); - } else { - StringBuilder ss; - ss << "Number " << number() << " cannot be represented in JSON"; - string message = ss.str(); - massert(10311, message.c_str(), false); - } + g.writeDouble(buffer, number()); break; case NumberDecimal: - if (format == TenGen) - s << "NumberDecimal(\""; - else - s << "{ \"$numberDecimal\" : \""; - // Recognize again that this is not valid JSON according to RFC-4627. - // Also, treat -NaN and +NaN as the same thing for MongoDB. - if (numberDecimal().isNaN()) { - s << "NaN"; - } else if (numberDecimal().isInfinite()) { - s << (numberDecimal().isNegative() ? "-Infinity" : "Infinity"); - } else { - s << numberDecimal().toString(); - } - if (format == TenGen) - s << "\")"; - else - s << "\" }"; + g.writeDecimal128(buffer, numberDecimal()); break; case mongo::Bool: - s << (boolean() ? "true" : "false"); + g.writeBool(buffer, boolean()); break; case jstNULL: - s << "null"; + g.writeNull(buffer); break; case Undefined: - if (format == Strict) { - s << "{ \"$undefined\" : true }"; - } else { - s << "undefined"; - } + g.writeUndefined(buffer); break; case Object: - embeddedObject().jsonStringStream(format, pretty, false, s); + embeddedObject().jsonStringGenerator(g, pretty ? pretty + 1 : 0, false, buffer); break; - case mongo::Array: { - if (embeddedObject().isEmpty()) { - s << "[]"; - break; - } - s << "[ "; - BSONObjIterator i(embeddedObject()); - BSONElement e = i.next(); - if (!e.eoo()) { - int count = 0; - while (1) { - if (pretty) { - s << '\n'; - for (int x = 0; x < pretty; x++) - s << " "; - } - - long index; - if (NumberParser::strToAny(10)(e.fieldName(), &index).isOK() && index > count) { - s << "undefined"; - } else { - // print the element if its index is being printed or if the index it - // belongs to could not be parsed - e.jsonStringStream(format, false, pretty ? pretty + 1 : 0, s); - e = i.next(); - } - count++; - if (e.eoo()) - break; - s << ", "; - } - } - s << " ]"; + case mongo::Array: + embeddedObject().jsonStringGenerator(g, pretty ? pretty + 1 : 0, true, buffer); break; - } - case DBRef: { - if (format == TenGen) - s << "Dbref( "; - else - s << "{ \"$ref\" : "; - s << '"' << valuestr() << "\", "; - if (format != TenGen) - s << "\"$id\" : "; - s << '"' << mongo::OID::from(valuestr() + valuestrsize()) << "\" "; - if (format == TenGen) - s << ')'; - else - s << '}'; + case DBRef: + // valuestrsize() returns the size including the null terminator + g.writeDBRef(buffer, + StringData(valuestr(), valuestrsize() - 1), + OID::from(valuestr() + valuestrsize())); break; - } case jstOID: - if (format == TenGen) { - s << "ObjectId( "; - } else { - s << "{ \"$oid\" : "; - } - s << '"' << __oid() << '"'; - if (format == TenGen) { - s << " )"; - } else { - s << " }"; - } + g.writeOID(buffer, __oid()); break; case BinData: { ConstDataCursor reader(value()); const int len = reader.readAndAdvance<LittleEndian<int>>(); BinDataType type = static_cast<BinDataType>(reader.readAndAdvance<uint8_t>()); - - s << "{ \"$binary\" : \""; - base64::encode(s, StringData(reader.view(), len)); - - auto origFill = s.fill(); - auto origFmtF = s.flags(); - auto origWidth = s.width(); - auto guard = makeGuard([&s, origFill, origFmtF, origWidth] { - s.fill(origFill); - s.setf(origFmtF); - s.width(origWidth); - }); - - s.setf(std::ios_base::hex, std::ios_base::basefield); - - s << "\", \"$type\" : \""; - s.width(2); - s.fill('0'); - s << type; - s << "\" }"; - break; + g.writeBinData(buffer, StringData(reader.view(), len), type); } + + break; case mongo::Date: - if (format == Strict) { - Date_t d = date(); - s << "{ \"$date\" : "; - // The two cases in which we cannot convert Date_t::millis to an ISO Date string are - // when the date is too large to format (SERVER-13760), and when the date is before - // the epoch (SERVER-11273). Since Date_t internally stores millis as an unsigned - // long long, despite the fact that it is logically signed (SERVER-8573), this check - // handles both the case where Date_t::millis is too large, and the case where - // Date_t::millis is negative (before the epoch). - if (d.isFormattable()) { - s << "\"" << dateToISOStringLocal(date()) << "\""; - } else { - s << "{ \"$numberLong\" : \"" << d.toMillisSinceEpoch() << "\" }"; - } - s << " }"; - } else { - s << "Date( "; - if (pretty) { - Date_t d = date(); - // The two cases in which we cannot convert Date_t::millis to an ISO Date string - // are when the date is too large to format (SERVER-13760), and when the date is - // before the epoch (SERVER-11273). Since Date_t internally stores millis as an - // unsigned long long, despite the fact that it is logically signed - // (SERVER-8573), this check handles both the case where Date_t::millis is too - // large, and the case where Date_t::millis is negative (before the epoch). - if (d.isFormattable()) { - s << "\"" << dateToISOStringLocal(date()) << "\""; - } else { - // FIXME: This is not parseable by the shell, since it may not fit in a - // float - s << d.toMillisSinceEpoch(); - } - } else { - s << date().asInt64(); - } - s << " )"; - } - break; - case RegEx: - if (format == Strict) { - s << "{ \"$regex\" : \"" << str::escape(regex()); - s << "\", \"$options\" : \"" << regexFlags() << "\" }"; - } else { - s << "/" << str::escape(regex(), true) << "/"; - // FIXME Worry about alpha order? - for (const char* f = regexFlags(); *f; ++f) { - switch (*f) { - case 'g': - case 'i': - case 'm': - case 's': - s << *f; - default: - break; - } - } - } + g.writeDate(buffer, date()); break; - + case RegEx: { + StringData pattern(regex()); + g.writeRegex(buffer, pattern, StringData(pattern.rawData() + pattern.size() + 1)); + } break; case CodeWScope: { BSONObj scope = codeWScopeObject(); if (!scope.isEmpty()) { - s << "{ \"$code\" : \"" << str::escape(_asCode()) << "\" , " - << "\"$scope\" : " << scope.jsonString() << " }"; + g.writeCodeWithScope(buffer, _asCode(), scope); break; } + // fall through if scope is empty } - case Code: - s << "\"" << str::escape(_asCode()) << "\""; + g.writeCode(buffer, _asCode()); break; - case bsonTimestamp: - if (format == TenGen) { - s << "Timestamp( " << durationCount<Seconds>(timestampTime().toDurationSinceEpoch()) - << ", " << timestampInc() << " )"; - } else { - s << "{ \"$timestamp\" : { \"t\" : " - << durationCount<Seconds>(timestampTime().toDurationSinceEpoch()) - << ", \"i\" : " << timestampInc() << " } }"; - } + g.writeTimestamp(buffer, timestamp()); break; - case MinKey: - s << "{ \"$minKey\" : 1 }"; + g.writeMinKey(buffer); break; - case MaxKey: - s << "{ \"$maxKey\" : 1 }"; + g.writeMaxKey(buffer); break; - default: - StringBuilder ss; - ss << "Cannot create a properly formatted JSON string with " - << "element: " << toString() << " of type: " << type(); - string message = ss.str(); - massert(10312, message.c_str(), false); + MONGO_UNREACHABLE; } } +void BSONElement::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, includeFieldNames, pretty, buffer); +} +void BSONElement::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, includeFieldNames, pretty, buffer); +} +void BSONElement::jsonStringGenerator(LegacyStrictGenerator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, includeFieldNames, pretty, buffer); +} + namespace { // Compares two string elements using a simple binary compare. diff --git a/src/mongo/bson/bsonelement.h b/src/mongo/bson/bsonelement.h index a688d40eb60..11be12ba114 100644 --- a/src/mongo/bson/bsonelement.h +++ b/src/mongo/bson/bsonelement.h @@ -31,6 +31,7 @@ #include <cmath> #include <cstdint> +#include <fmt/format.h> #include <string.h> // strlen #include <string> #include <vector> @@ -52,6 +53,9 @@ class BSONObj; class BSONElement; class BSONObjBuilder; class Timestamp; +class ExtendedCanonicalV200Generator; +class ExtendedRelaxedV200Generator; +class LegacyStrictGenerator; typedef BSONElement be; typedef BSONObj bo; @@ -211,10 +215,23 @@ public: bool includeFieldNames = true, int pretty = 0) const; - void jsonStringStream(JsonStringFormat format, + void jsonStringBuffer(JsonStringFormat format, bool includeFieldNames, int pretty, - std::stringstream& s) const; + fmt::memory_buffer& buffer) const; + + void jsonStringGenerator(ExtendedCanonicalV200Generator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const; + void jsonStringGenerator(ExtendedRelaxedV200Generator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const; + void jsonStringGenerator(LegacyStrictGenerator const& generator, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const; operator std::string() const { return toString(); @@ -741,6 +758,12 @@ public: static const double kLongLongMaxPlusOneAsDouble; private: + template <typename Generator> + void _jsonStringGenerator(const Generator& g, + bool includeFieldNames, + int pretty, + fmt::memory_buffer& buffer) const; + const char* data; int fieldNameSize_; // internal size includes null terminator int totalSize; diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp index 15e08aefca6..6fab09e94ab 100644 --- a/src/mongo/bson/bsonobj.cpp +++ b/src/mongo/bson/bsonobj.cpp @@ -34,6 +34,9 @@ #include "mongo/base/data_range.h" #include "mongo/bson/bson_validate.h" #include "mongo/bson/bsonelement_comparator_interface.h" +#include "mongo/bson/generator_extended_canonical_2_0_0.h" +#include "mongo/bson/generator_extended_relaxed_2_0_0.h" +#include "mongo/bson/generator_legacy_strict.h" #include "mongo/db/json.h" #include "mongo/util/allocator.h" #include "mongo/util/hex.h" @@ -140,39 +143,76 @@ BSONObj BSONObj::getOwned(const BSONObj& obj) { return obj.getOwned(); } -std::string BSONObj::jsonString(JsonStringFormat format, int pretty, bool isArray) const { - std::stringstream s; - BSONObj::jsonStringStream(format, pretty, isArray, s); - return s.str(); -} - -void BSONObj::jsonStringStream(JsonStringFormat format, - int pretty, - bool isArray, - std::stringstream& s) const { +template <typename Generator> +void BSONObj::_jsonStringGenerator(const Generator& g, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const { if (isEmpty()) { - s << (isArray ? "[]" : "{}"); + fmt::format_to(buffer, "{}", isArray ? "[]" : "{}"); return; } - s << (isArray ? "[ " : "{ "); + buffer.push_back(isArray ? '[' : '{'); + BSONObjIterator i(*this); BSONElement e = i.next(); if (!e.eoo()) while (1) { - e.jsonStringStream(format, !isArray, pretty ? pretty + 1 : 0, s); + e.jsonStringGenerator(g, !isArray, pretty, buffer); e = i.next(); - if (e.eoo()) + if (e.eoo()) { + g.writePadding(buffer); break; - s << ","; + } + buffer.push_back(','); if (pretty) { - s << '\n'; - for (int x = 0; x < pretty; x++) - s << " "; - } else { - s << " "; + fmt::format_to(buffer, "{: <{}}", '\n', pretty * 2); } } - s << (isArray ? " ]" : " }"); + + buffer.push_back(isArray ? ']' : '}'); +} + +void BSONObj::jsonStringGenerator(ExtendedCanonicalV200Generator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, pretty, isArray, buffer); +} +void BSONObj::jsonStringGenerator(ExtendedRelaxedV200Generator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, pretty, isArray, buffer); +} +void BSONObj::jsonStringGenerator(LegacyStrictGenerator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const { + _jsonStringGenerator(generator, pretty, isArray, buffer); +} + +std::string BSONObj::jsonString(JsonStringFormat format, int pretty, bool isArray) const { + fmt::memory_buffer buffer; + jsonStringBuffer(format, pretty, isArray, buffer); + return fmt::to_string(buffer); +} + +void BSONObj::jsonStringBuffer(JsonStringFormat format, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const { + auto withGenerator = [&](auto&& gen) { jsonStringGenerator(gen, pretty, isArray, buffer); }; + + if (format == ExtendedCanonicalV2_0_0) { + withGenerator(ExtendedCanonicalV200Generator()); + } else if (format == ExtendedRelaxedV2_0_0) { + withGenerator(ExtendedRelaxedV200Generator()); + } else if (format == LegacyStrict) { + withGenerator(LegacyStrictGenerator()); + } else { + MONGO_UNREACHABLE; + } } bool BSONObj::valid(BSONVersion version) const { diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h index bf8559f7dbd..94af60877e8 100644 --- a/src/mongo/bson/bsonobj.h +++ b/src/mongo/bson/bsonobj.h @@ -53,6 +53,9 @@ namespace mongo { class BSONObjStlIterator; +class ExtendedCanonicalV200Generator; +class ExtendedRelaxedV200Generator; +class LegacyStrictGenerator; /** C++ representation of a "BSON" object -- that is, an extended JSON-style @@ -261,14 +264,27 @@ public: /** Properly formatted JSON string. @param pretty if true we try to add some lf's and indentation */ - std::string jsonString(JsonStringFormat format = Strict, + std::string jsonString(JsonStringFormat format = ExtendedCanonicalV2_0_0, int pretty = 0, bool isArray = false) const; - void jsonStringStream(JsonStringFormat format, + void jsonStringBuffer(JsonStringFormat format, int pretty, bool isArray, - std::stringstream& s) const; + fmt::memory_buffer& buffer) const; + + void jsonStringGenerator(ExtendedCanonicalV200Generator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const; + void jsonStringGenerator(ExtendedRelaxedV200Generator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const; + void jsonStringGenerator(LegacyStrictGenerator const& generator, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const; /** note: addFields always adds _id even if not specified */ int addFields(BSONObj& from, std::set<std::string>& fields); /* returns n added */ @@ -586,6 +602,12 @@ public: } private: + template <typename Generator> + void _jsonStringGenerator(const Generator& g, + int pretty, + bool isArray, + fmt::memory_buffer& buffer) const; + void _assertInvalid(int maxSize) const; template <typename Traits = DefaultSizeTrait> diff --git a/src/mongo/bson/generator_extended_canonical_2_0_0.h b/src/mongo/bson/generator_extended_canonical_2_0_0.h new file mode 100644 index 00000000000..34a99f8b015 --- /dev/null +++ b/src/mongo/bson/generator_extended_canonical_2_0_0.h @@ -0,0 +1,170 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/platform/decimal128.h" +#include "mongo/util/base64.h" +#include "mongo/util/str_escape.h" + +#include <fmt/format.h> + +namespace mongo { +class ExtendedCanonicalV200Generator { +public: + void writeNull(fmt::memory_buffer& buffer) const { + appendTo(buffer, "null"_sd); + } + void writeUndefined(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({"$undefined":true})"_sd); + } + + void writeString(fmt::memory_buffer& buffer, StringData str) const { + buffer.push_back('"'); + str::escapeForJSON(buffer, str); + buffer.push_back('"'); + } + + void writeBool(fmt::memory_buffer& buffer, bool val) const { + if (val) + appendTo(buffer, "true"_sd); + else + appendTo(buffer, "false"_sd); + } + + void writeInt32(fmt::memory_buffer& buffer, int32_t val) const { + fmt::format_to(buffer, R"({{"$numberInt":"{}"}})", val); + } + + void writeInt64(fmt::memory_buffer& buffer, int64_t val) const { + fmt::format_to(buffer, R"({{"$numberLong":"{}"}})", val); + } + + void writeDouble(fmt::memory_buffer& buffer, double val) const { + if (val >= std::numeric_limits<double>::lowest() && + val <= std::numeric_limits<double>::max()) + fmt::format_to(buffer, R"({{"$numberDouble":"{}"}})", val); + else if (std::isnan(val)) + appendTo(buffer, R"({"$numberDouble":"NaN"})"_sd); + else if (std::isinf(val)) { + if (val > 0) + appendTo(buffer, R"({"$numberDouble":"Infinity"})"_sd); + else + appendTo(buffer, R"({"$numberDouble":"-Infinity"})"_sd); + } else { + StringBuilder ss; + ss << "Number " << val << " cannot be represented in JSON"; + uassert(51757, ss.str(), false); + } + } + + void writeDecimal128(fmt::memory_buffer& buffer, Decimal128 val) const { + if (val.isNaN()) + appendTo(buffer, R"({"$numberDecimal":"NaN"})"_sd); + else if (val.isInfinite()) + fmt::format_to(buffer, + R"({{"$numberDecimal":"{}"}})", + val.isNegative() ? "-Infinity"_sd : "Infinity"_sd); + else { + fmt::format_to(buffer, R"({{"$numberDecimal":"{}"}})", val.toString()); + } + } + + void writeDate(fmt::memory_buffer& buffer, Date_t val) const { + fmt::format_to(buffer, R"({{"$date":{{"$numberLong":"{}"}}}})", val.toMillisSinceEpoch()); + } + + void writeDBRef(fmt::memory_buffer& buffer, StringData ref, OID id) const { + // Collection names can unfortunately contain control characters that need to be escaped + appendTo(buffer, R"({"$ref":")"_sd); + str::escapeForJSON(buffer, ref); + + // OID is a hex string and does not need to be escaped + fmt::format_to(buffer, R"(","$id":"{}"}})", id.toString()); + } + + void writeOID(fmt::memory_buffer& buffer, OID val) const { + // OID is a hex string and does not need to be escaped + fmt::format_to(buffer, R"({{"$oid":"{}"}})", val.toString()); + } + + void writeTimestamp(fmt::memory_buffer& buffer, Timestamp val) const { + fmt::format_to( + buffer, R"({{"$timestamp":{{"t":{},"i":{}}}}})", val.getSecs(), val.getInc()); + } + + void writeBinData(fmt::memory_buffer& buffer, StringData data, BinDataType type) const { + appendTo(buffer, R"({"$binary":{"base64":")"_sd); + base64::encode(buffer, data); + fmt::format_to(buffer, R"(","subType":"{:x}"}}}})", type); + } + + void writeRegex(fmt::memory_buffer& buffer, StringData pattern, StringData options) const { + appendTo(buffer, R"({"$regularExpression":{"pattern":")"_sd); + str::escapeForJSON(buffer, pattern); + appendTo(buffer, R"(","options":")"_sd); + str::escapeForJSON(buffer, options); + appendTo(buffer, R"("}})"_sd); + } + + void writeSymbol(fmt::memory_buffer& buffer, StringData symbol) const { + appendTo(buffer, R"({"$symbol":")"_sd); + str::escapeForJSON(buffer, symbol); + appendTo(buffer, R"("})"_sd); + } + + void writeCode(fmt::memory_buffer& buffer, StringData code) const { + appendTo(buffer, R"({"$code":")"_sd); + str::escapeForJSON(buffer, code); + appendTo(buffer, R"("})"_sd); + } + void writeCodeWithScope(fmt::memory_buffer& buffer, + StringData code, + BSONObj const& scope) const { + appendTo(buffer, R"({"$code":")"_sd); + str::escapeForJSON(buffer, code); + appendTo(buffer, R"(","$scope":)"_sd); + scope.jsonStringGenerator(*this, 0, false, buffer); + appendTo(buffer, R"(})"_sd); + } + void writeMinKey(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({"$minKey":1})"_sd); + } + void writeMaxKey(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({"$maxKey":1})"_sd); + } + void writePadding(fmt::memory_buffer& buffer) const {} + +protected: + static void appendTo(fmt::memory_buffer& buffer, StringData data) { + buffer.append(data.begin(), data.end()); + } +}; +} // namespace mongo diff --git a/src/mongo/bson/generator_extended_relaxed_2_0_0.h b/src/mongo/bson/generator_extended_relaxed_2_0_0.h new file mode 100644 index 00000000000..20484798e9f --- /dev/null +++ b/src/mongo/bson/generator_extended_relaxed_2_0_0.h @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/generator_extended_canonical_2_0_0.h" + +namespace mongo { +class ExtendedRelaxedV200Generator : private ExtendedCanonicalV200Generator { +public: + using ExtendedCanonicalV200Generator::writeBinData; + using ExtendedCanonicalV200Generator::writeBool; + using ExtendedCanonicalV200Generator::writeCode; + using ExtendedCanonicalV200Generator::writeCodeWithScope; + using ExtendedCanonicalV200Generator::writeDBRef; + using ExtendedCanonicalV200Generator::writeDecimal128; + using ExtendedCanonicalV200Generator::writeMaxKey; + using ExtendedCanonicalV200Generator::writeMinKey; + using ExtendedCanonicalV200Generator::writeNull; + using ExtendedCanonicalV200Generator::writeOID; + using ExtendedCanonicalV200Generator::writePadding; + using ExtendedCanonicalV200Generator::writeRegex; + using ExtendedCanonicalV200Generator::writeString; + using ExtendedCanonicalV200Generator::writeSymbol; + using ExtendedCanonicalV200Generator::writeTimestamp; + using ExtendedCanonicalV200Generator::writeUndefined; + + void writeInt32(fmt::memory_buffer& buffer, int32_t val) const { + fmt::format_to(buffer, R"({})", val); + } + + void writeInt64(fmt::memory_buffer& buffer, int64_t val) const { + fmt::format_to(buffer, R"({})", val); + } + + void writeDouble(fmt::memory_buffer& buffer, double val) const { + if (val >= std::numeric_limits<double>::lowest() && + val <= std::numeric_limits<double>::max()) + fmt::format_to(buffer, R"({})", val); + else { + ExtendedCanonicalV200Generator::writeDouble(buffer, val); + } + } + + void writeDate(fmt::memory_buffer& buffer, Date_t val) const { + // The two cases in which we cannot convert Date_t::millis to an ISO Date string are + // when the date is too large to format (SERVER-13760), and when the date is before + // the epoch (SERVER-11273). Since Date_t internally stores millis as an unsigned + // long long, despite the fact that it is logically signed (SERVER-8573), this check + // handles both the case where Date_t::millis is too large, and the case where + // Date_t::millis is negative (before the epoch). + if (val.isFormattable()) { + fmt::format_to(buffer, R"({{"$date":"{}"}})", dateToISOStringLocal(val)); + } else { + ExtendedCanonicalV200Generator::writeDate(buffer, val); + } + } +}; +} // namespace mongo diff --git a/src/mongo/bson/generator_legacy_strict.h b/src/mongo/bson/generator_legacy_strict.h new file mode 100644 index 00000000000..bfe027328d1 --- /dev/null +++ b/src/mongo/bson/generator_legacy_strict.h @@ -0,0 +1,159 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/generator_extended_canonical_2_0_0.h" +#include "mongo/bson/util/builder.h" +#include "mongo/util/str.h" + +#include <fmt/format.h> + +namespace mongo { +class LegacyStrictGenerator : private ExtendedCanonicalV200Generator { +public: + using ExtendedCanonicalV200Generator::writeBool; + using ExtendedCanonicalV200Generator::writeNull; + + void writeUndefined(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({ "$undefined" : true })"_sd); + } + + void writeString(fmt::memory_buffer& buffer, StringData str) const { + fmt::format_to(buffer, R"("{}")", str::escape(str)); + } + + void writeSymbol(fmt::memory_buffer& buffer, StringData symbol) const { + writeString(buffer, symbol); + } + + void writeInt32(fmt::memory_buffer& buffer, int32_t val) const { + writeDouble(buffer, val); + } + + void writeInt64(fmt::memory_buffer& buffer, int64_t val) const { + fmt::format_to(buffer, R"({{ "$numberLong" : "{}" }})", val); + } + + void writeDouble(fmt::memory_buffer& buffer, double val) const { + if (val >= std::numeric_limits<double>::lowest() && + val <= std::numeric_limits<double>::max()) + fmt::format_to(buffer, R"({:.16g})", val); + else if (std::isnan(val)) + appendTo(buffer, "NaN"_sd); + else if (std::isinf(val)) { + if (val > 0) + appendTo(buffer, "Infinity"_sd); + else + appendTo(buffer, "-Infinity"_sd); + } else { + StringBuilder ss; + ss << "Number " << val << " cannot be represented in JSON"; + uassert(10311, ss.str(), false); + } + } + + void writeDecimal128(fmt::memory_buffer& buffer, Decimal128 val) const { + if (val.isNaN()) + appendTo(buffer, R"({ "$numberDecimal" : "NaN" })"_sd); + else if (val.isInfinite()) + fmt::format_to(buffer, + R"({{ "$numberDecimal" : "{}" }})", + val.isNegative() ? "-Infinity"_sd : "Infinity"_sd); + else { + fmt::format_to(buffer, R"({{ "$numberDecimal" : "{}" }})", val.toString()); + } + } + + void writeDate(fmt::memory_buffer& buffer, Date_t val) const { + // The two cases in which we cannot convert Date_t::millis to an ISO Date string are + // when the date is too large to format (SERVER-13760), and when the date is before + // the epoch (SERVER-11273). Since Date_t internally stores millis as an unsigned + // long long, despite the fact that it is logically signed (SERVER-8573), this check + // handles both the case where Date_t::millis is too large, and the case where + // Date_t::millis is negative (before the epoch). + if (val.isFormattable()) { + fmt::format_to(buffer, R"({{ "$date" : "{}" }})", dateToISOStringLocal(val)); + } else { + fmt::format_to( + buffer, R"({{ "$date" : {{ "$numberLong" : "{}" }} }})", val.toMillisSinceEpoch()); + } + } + + void writeDBRef(fmt::memory_buffer& buffer, StringData ref, OID id) const { + fmt::format_to(buffer, R"({{ "$ref" : "{}", "$id" : "{}" }})", ref, id.toString()); + } + + void writeOID(fmt::memory_buffer& buffer, OID val) const { + fmt::format_to(buffer, R"({{ "$oid" : "{}" }})", val.toString()); + } + + void writeBinData(fmt::memory_buffer& buffer, StringData data, BinDataType type) const { + appendTo(buffer, R"({ "$binary" : ")"); + base64::encode(buffer, data); + fmt::format_to(buffer, R"(", "$type" : "{:02x}" }})", type); + } + + void writeRegex(fmt::memory_buffer& buffer, StringData pattern, StringData options) const { + fmt::format_to( + buffer, R"({{ "$regex" : "{}", "$options" : "{}" }})", str::escape(pattern), options); + } + + void writeCode(fmt::memory_buffer& buffer, StringData code) const { + fmt::format_to(buffer, R"({{ "$code" : "{}" }})", str::escape(code)); + } + + void writeCodeWithScope(fmt::memory_buffer& buffer, + StringData code, + BSONObj const& scope) const { + fmt::format_to(buffer, R"({{ "$code" : "{}", "$scope" : )", str::escape(code)); + scope.jsonStringGenerator(*this, 0, false, buffer); + appendTo(buffer, R"( })"); + } + + void writeTimestamp(fmt::memory_buffer& buffer, Timestamp val) const { + fmt::format_to(buffer, + R"({{ "$timestamp" : {{ "t" : {}, "i" : {} }} }})", + val.getSecs(), + val.getInc()); + } + + void writeMinKey(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({ "$minKey" : 1 })"_sd); + } + + void writeMaxKey(fmt::memory_buffer& buffer) const { + appendTo(buffer, R"({ "$maxKey" : 1 })"_sd); + } + + void writePadding(fmt::memory_buffer& buffer) const { + buffer.push_back(' '); + } +}; +} // namespace mongo diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp index b71cd7d3d4e..b6fbdf94541 100644 --- a/src/mongo/bson/json.cpp +++ b/src/mongo/bson/json.cpp @@ -67,7 +67,7 @@ using namespace fmt::literals; // Size hints given to char vectors enum { - ID_RESERVE_SIZE = 64, + ID_RESERVE_SIZE = 24, PAT_RESERVE_SIZE = 4096, OPT_RESERVE_SIZE = 64, FIELD_RESERVE_SIZE = 4096, @@ -76,7 +76,9 @@ enum { BINDATATYPE_RESERVE_SIZE = 4096, NS_RESERVE_SIZE = 64, DB_RESERVE_SIZE = 64, - NUMBERLONG_RESERVE_SIZE = 64, + NUMBERINT_RESERVE_SIZE = 16, + NUMBERLONG_RESERVE_SIZE = 20, + NUMBERDOUBLE_RESERVE_SIZE = 64, NUMBERDECIMAL_RESERVE_SIZE = 64, DATE_RESERVE_SIZE = 64 }; @@ -253,6 +255,14 @@ Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObj if (ret != Status::OK()) { return ret; } + } else if (firstField == "$regularExpression") { + if (!subObject) { + return parseError("Reserved field name in base object: $regularExpression"); + } + Status ret = regexObjectCanonical(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } } else if (firstField == "$ref") { if (!subObject) { return parseError("Reserved field name in base object: $ref"); @@ -269,6 +279,14 @@ Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObj if (ret != Status::OK()) { return ret; } + } else if (firstField == "$numberInt") { + if (!subObject) { + return parseError("Reserved field name in base object: $numberInt"); + } + Status ret = numberIntObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } } else if (firstField == "$numberLong") { if (!subObject) { return parseError("Reserved field name in base object: $numberLong"); @@ -277,6 +295,15 @@ Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObj if (ret != Status::OK()) { return ret; } + + } else if (firstField == "$numberDouble") { + if (!subObject) { + return parseError("Reserved field name in base object: $numberDouble"); + } + Status ret = numberDoubleObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } } else if (firstField == "$numberDecimal") { if (!subObject) { return parseError("Reserved field name in base object: $numberDecimal"); @@ -367,37 +394,73 @@ Status JParse::binaryObject(StringData fieldName, BSONObjBuilder& builder) { } std::string binDataString; binDataString.reserve(BINDATA_RESERVE_SIZE); - Status dataRet = quotedString(&binDataString); - if (dataRet != Status::OK()) { - return dataRet; + + std::string binDataType; + binDataType.reserve(BINDATATYPE_RESERVE_SIZE); + + if (peekToken(LBRACE)) { + readToken(LBRACE); + + if (!readField("base64")) { + return parseError("Expected field name: \"base64\", in \"$binary\" object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + + Status dataRet = quotedString(&binDataString); + if (dataRet != Status::OK()) { + return dataRet; + } + if (!readToken(COMMA)) { + return parseError("Expected ','"); + } + if (!readField("subType")) { + return parseError("Expected field name: \"subType\", in \"$binary\" object"); + } + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + Status typeRet = quotedString(&binDataType); + if (typeRet != Status::OK()) { + return typeRet; + } + if (binDataType.size() == 1) + binDataType = "0" + binDataType; + readToken(RBRACE); + } else { + Status dataRet = quotedString(&binDataString); + if (dataRet != Status::OK()) { + return dataRet; + } + if (!readToken(COMMA)) { + return parseError("Expected ','"); + } + if (!readField("$type")) { + return parseError("Expected second field name: \"$type\", in \"$binary\" object"); + } + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + + Status typeRet = quotedString(&binDataType); + if (typeRet != Status::OK()) { + return typeRet; + } } + if (binDataString.size() % 4 != 0) { return parseError("Invalid length base64 encoded string"); } if (!isBase64String(binDataString)) { return parseError("Invalid character in base64 encoded string"); } - const std::string& binData = base64::decode(binDataString); - if (!readToken(COMMA)) { - return parseError("Expected ','"); - } + std::string binData = base64::decode(binDataString); - if (!readField("$type")) { - return parseError("Expected second field name: \"$type\", in \"$binary\" object"); - } - if (!readToken(COLON)) { - return parseError("Expected ':'"); - } - std::string binDataType; - binDataType.reserve(BINDATATYPE_RESERVE_SIZE); - Status typeRet = quotedString(&binDataType); - if (typeRet != Status::OK()) { - return typeRet; - } if ((binDataType.size() != 2) || !isHexString(binDataType)) { return parseError( - "Argument of $type in $bindata object must be a hex string representation of a single " - "byte"); + "Argument of $type in $bindata object must be a hex string representation of a " + "single byte"); } // The fromHex function returns a signed char, but the highest @@ -459,6 +522,8 @@ Status JParse::dateObject(StringData fieldName, BSONObjBuilder& builder) { if (!ret.isOK()) { return ret; } + + readToken(RBRACE); date = Date_t::fromMillisSinceEpoch(numberLong); } else { StatusWith<Date_t> parsedDate = parseDate(); @@ -564,6 +629,47 @@ Status JParse::regexObject(StringData fieldName, BSONObjBuilder& builder) { return Status::OK(); } +Status JParse::regexObjectCanonical(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + readToken(LBRACE); + if (!readField("pattern")) { + return parseError("Expected field name: \"pattern\", in \"$regularExpression\" object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + std::string pat; + pat.reserve(PAT_RESERVE_SIZE); + Status patRet = quotedString(&pat); + if (patRet != Status::OK()) { + return patRet; + } + if (!readToken(COMMA)) { + return parseError("Expected ','"); + } + if (!readField("options")) { + return parseError("Expected field name: \"pattern\", in \"$regularExpression\" object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + std::string opt; + opt.reserve(OPT_RESERVE_SIZE); + Status optRet = quotedString(&opt); + if (optRet != Status::OK()) { + return optRet; + } + Status optCheckRet = regexOptCheck(opt); + if (optCheckRet != Status::OK()) { + return optCheckRet; + } + readToken(RBRACE); + builder.appendRegex(fieldName, pat, opt); + return Status::OK(); +} + Status JParse::dbRefObject(StringData fieldName, BSONObjBuilder& builder) { BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); @@ -648,6 +754,53 @@ Status JParse::numberLongObject(StringData fieldName, BSONObjBuilder& builder) { return Status::OK(); } +Status JParse::numberIntObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + + // The number must be a quoted string, since large long numbers could overflow a double and + // thus may not be valid JSON + std::string numberIntString; + numberIntString.reserve(NUMBERINT_RESERVE_SIZE); + Status ret = quotedString(&numberIntString); + if (!ret.isOK()) { + return ret; + } + + int numberInt; + ret = NumberParser{}(numberIntString, &numberInt); + if (!ret.isOK()) { + return ret; + } + + builder.append(fieldName, numberInt); + return Status::OK(); +} + +Status JParse::numberDoubleObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + // The number must be a quoted string, since large double numbers could overflow other types + // and thus may not be valid JSON + std::string numberDoubleString; + numberDoubleString.reserve(NUMBERDOUBLE_RESERVE_SIZE); + Status ret = quotedString(&numberDoubleString); + if (!ret.isOK()) { + return ret; + } + + double numberDouble; + ret = NumberParser{}(numberDoubleString, &numberDouble); + if (!ret.isOK()) { + return ret; + } + + builder.append(fieldName, numberDouble); + return Status::OK(); +} + Status JParse::numberDecimalObject(StringData fieldName, BSONObjBuilder& builder) { if (!readToken(COLON)) { return parseError("Expecting ':'"); diff --git a/src/mongo/bson/json.h b/src/mongo/bson/json.h index 9b74bce5fbb..76ece5feb7e 100644 --- a/src/mongo/bson/json.h +++ b/src/mongo/bson/json.h @@ -79,7 +79,9 @@ bool isArray(StringData str); * @param format The JSON format (TenGen, Strict). * @param pretty Enables pretty output. */ -std::string tojson(const BSONArray& arr, JsonStringFormat format = Strict, bool pretty = false); +std::string tojson(const BSONArray& arr, + JsonStringFormat format = ExtendedCanonicalV2_0_0, + bool pretty = false); /** * Convert a BSONObj to a JSON string. @@ -88,7 +90,9 @@ std::string tojson(const BSONArray& arr, JsonStringFormat format = Strict, bool * @param format The JSON format (JS, TenGen, Strict). * @param pretty Enables pretty output. */ -std::string tojson(const BSONObj& obj, JsonStringFormat format = Strict, bool pretty = false); +std::string tojson(const BSONObj& obj, + JsonStringFormat format = ExtendedCanonicalV2_0_0, + bool pretty = false); /** * Parser class. A BSONObj is constructed incrementally by passing a @@ -209,6 +213,16 @@ private: Status regexObject(StringData fieldName, BSONObjBuilder&); /* + * NOTE: the rules for the body of the regex are different here, + * since it is quoted instead of surrounded by slashes. + * REGEXOBJECT : + * { FIELD("$regularExpression") : { + * FIELD("pattern") : <string representing body of regex>, + * FIELD("options") : <string representing regex options> } } + */ + Status regexObjectCanonical(StringData fieldName, BSONObjBuilder&); + + /* * REFOBJECT : * { FIELD("$ref") : <string representing collection name>, * FIELD("$id") : <24 character hex std::string> } @@ -224,12 +238,24 @@ private: Status undefinedObject(StringData fieldName, BSONObjBuilder&); /* + * NUMBERINTOBJECT : + * { FIELD("$numberInt") : "<number>" } + */ + Status numberIntObject(StringData fieldName, BSONObjBuilder&); + + /* * NUMBERLONGOBJECT : * { FIELD("$numberLong") : "<number>" } */ Status numberLongObject(StringData fieldName, BSONObjBuilder&); /* + * NUMBERDOUBLEOBJECT : + * { FIELD("$numberDouble") : "<number>" } + */ + Status numberDoubleObject(StringData fieldName, BSONObjBuilder&); + + /* * NUMBERDECIMALOBJECT : * { FIELD("$numberDecimal") : "<number>" } */ diff --git a/src/mongo/bson/oid.h b/src/mongo/bson/oid.h index d5df0c90ee9..7a5818945f6 100644 --- a/src/mongo/bson/oid.h +++ b/src/mongo/bson/oid.h @@ -255,13 +255,7 @@ inline StringBuilder& operator<<(StringBuilder& s, const OID& o) { See <http://dochub.mongodb.org/core/mongodbextendedjson> for details. */ -enum JsonStringFormat { - /** strict RFC format */ - Strict, - /** 10gen format, which is close to JS format. This form is understandable by - javascript running inside the Mongo server via $where, mr, etc... */ - TenGen, -}; +enum JsonStringFormat { ExtendedCanonicalV2_0_0, ExtendedRelaxedV2_0_0, LegacyStrict }; inline bool operator==(const OID& lhs, const OID& rhs) { return lhs.compare(rhs) == 0; diff --git a/src/mongo/db/matcher/schema/json_schema_parser.cpp b/src/mongo/db/matcher/schema/json_schema_parser.cpp index 3d345aebf12..a8ad976b5da 100644 --- a/src/mongo/db/matcher/schema/json_schema_parser.cpp +++ b/src/mongo/db/matcher/schema/json_schema_parser.cpp @@ -1610,7 +1610,7 @@ StatusWithMatchExpression JSONSchemaParser::parse( const boost::intrusive_ptr<ExpressionContext>& expCtx, BSONObj schema, bool ignoreUnknownKeywords) { - LOG(5) << "Parsing JSON Schema: " << schema.jsonString(); + LOG(5) << "Parsing JSON Schema: " << schema.jsonString(JsonStringFormat::LegacyStrict); try { auto translation = _parse(expCtx, ""_sd, schema, ignoreUnknownKeywords); if (shouldLog(logger::LogSeverity::Debug(5)) && translation.isOK()) { diff --git a/src/mongo/db/query/plan_ranker.cpp b/src/mongo/db/query/plan_ranker.cpp index 1cac7f7424c..0de63fc137c 100644 --- a/src/mongo/db/query/plan_ranker.cpp +++ b/src/mongo/db/query/plan_ranker.cpp @@ -95,7 +95,8 @@ StatusWith<std::unique_ptr<PlanRankingDecision>> PlanRanker::pickBestPlan( if (!candidates[i].failed) { LOG(5) << "Scoring plan " << i << ":" << endl << redact(candidates[i].solution->toString()) << "Stats:\n" - << redact(Explain::statsToBSON(*statTrees[i]).jsonString(Strict, true)); + << redact(Explain::statsToBSON(*statTrees[i]) + .jsonString(ExtendedRelaxedV2_0_0, true)); LOG(2) << "Scoring query plan: " << Explain::getPlanSummary(candidates[i].root) << " planHitEOF=" << statTrees[i]->common.isEOF; diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index c1f1aed7317..39073633620 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -116,7 +116,7 @@ protected: log() << "Simulating writing a rollback file for namespace " << nss.ns() << " with uuid " << uuid; for (auto&& id : idSet) { - log() << "Looking up " << id.jsonString(); + log() << "Looking up " << id.jsonString(JsonStringFormat::LegacyStrict); auto document = _findDocumentById(opCtx, uuid, nss, id.firstElement()); if (document) { _uuidToObjsMap[uuid].push_back(*document); @@ -1562,12 +1562,12 @@ public: auto search = uuidToIdMap.find(uuid); ASSERT(search != uuidToIdMap.end()) << "map is unexpectedly missing an entry for uuid " << uuid.toString() - << " containing object " << bson.jsonString(); + << " containing object " << bson.jsonString(JsonStringFormat::LegacyStrict); const auto& idObjSet = search->second; const auto iter = idObjSet.find(bson); - ASSERT(iter != idObjSet.end()) - << "_id object set is unexpectedly missing object " << bson.jsonString() - << " in namespace with uuid " << uuid.toString(); + ASSERT(iter != idObjSet.end()) << "_id object set is unexpectedly missing object " + << bson.jsonString(JsonStringFormat::LegacyStrict) + << " in namespace with uuid " << uuid.toString(); } diff --git a/src/mongo/db/update/update_serialization_test.cpp b/src/mongo/db/update/update_serialization_test.cpp index 89ae2ac03c4..734d8c19e6a 100644 --- a/src/mongo/db/update/update_serialization_test.cpp +++ b/src/mongo/db/update/update_serialization_test.cpp @@ -56,7 +56,8 @@ auto updateRoundTrip(const char* json, const std::vector<std::string> filterName for (const auto& name : filterNames) filters[name] = nullptr; driver.parse(bson, filters); - return mongo::tojson(driver.serialize().getDocument().toBson()); + return mongo::tojson(driver.serialize().getDocument().toBson(), + mongo::JsonStringFormat::LegacyStrict); } TEST(UpdateSerialization, DocumentReplacementSerializesExactly) { diff --git a/src/mongo/dbtests/jsontests.cpp b/src/mongo/dbtests/jsontests.cpp index 69476f19b77..f1b670e6bdd 100644 --- a/src/mongo/dbtests/jsontests.cpp +++ b/src/mongo/dbtests/jsontests.cpp @@ -35,9 +35,12 @@ #include "mongo/platform/basic.h" +#include <boost/property_tree/json_parser.hpp> +#include <boost/property_tree/ptree.hpp> #include <fmt/format.h> #include <fmt/printf.h> #include <limits> +#include <sstream> #include "mongo/db/jsobj.h" #include "mongo/db/json.h" @@ -46,7 +49,21 @@ #include "mongo/unittest/unittest.h" #include "mongo/util/log.h" + namespace { +std::string makeJsonEquvalent(const std::string& json) { + boost::property_tree::ptree tree; + + std::istringstream in(json); + boost::property_tree::read_json(in, tree); + + std::ostringstream out; + boost::property_tree::write_json(out, tree); + + return out.str(); +} + +#define ASSERT_JSON_EQUALS(a, b) ASSERT_EQUALS(makeJsonEquvalent(a), makeJsonEquvalent(b)) using B = BSONObjBuilder; using Arr = BSONArrayBuilder; @@ -54,13 +71,15 @@ using Arr = BSONArrayBuilder; // Tests of the BSONObj::jsonString member function. namespace JsonStringTests { -void checkJsonString(const BSONObj& bson, const std::string& json) { - ASSERT_EQUALS(bson.jsonString(Strict), json); -} - void checkJsonStringEach(const std::vector<std::pair<BSONObj, std::string>>& pairs) { for (const auto& pair : pairs) { - checkJsonString(pair.first, pair.second); + ASSERT_JSON_EQUALS(pair.first.jsonString(ExtendedCanonicalV2_0_0), pair.second); + ASSERT_JSON_EQUALS(pair.first.jsonString(ExtendedRelaxedV2_0_0), pair.second); + + // Use ASSERT_EQUALS instead of ASSERT_JSON_EQUALS for LegacyStrict. + // LegacyStrict that not produce valid JSON in all cases (which makes boost::property_tree + // throw) and we have other tests elsewhere that checks for exact strings. + ASSERT_EQUALS(pair.first.jsonString(LegacyStrict), pair.second); } } @@ -73,129 +92,168 @@ TEST(JsonStringTest, BasicTest) { // per http://www.ietf.org/rfc/rfc4627.txt, control characters are // (U+0000 through U+001F). U+007F is not mentioned as a control character. {B().append("a", "\x1 \x1f").obj(), - R"({ "a" : "\u0001 \u001f" })"}, // AdditionalControlCharacters - {B().append("a", "\x80").obj(), "{ \"a\" : \"\x80\" }"}, // ExtendedAscii - {B().append("\t", "b").obj(), R"({ "\t" : "b" })"}, // EscapeFieldName - {B().append("a", 1).obj(), R"({ "a" : 1 })"}, // SingleIntMember + R"({ "a" : "\u0001 \u001f" })"}, // AdditionalControlCharacters + {B().append("\t", "b").obj(), R"({ "\t" : "b" })"}, // EscapeFieldName }); } +TEST(JsonStringTest, UnicodeTest) { + // Extended Canonical/Relaxed escapes invalid UTF-8 while LegacyStricts treats it as Extended + // Ascii + ASSERT_JSON_EQUALS(B().append("a", "\x80").obj().jsonString(ExtendedCanonicalV2_0_0), + R"({ "a" : "\u0080" })"); + ASSERT_JSON_EQUALS(B().append("a", "\x80").obj().jsonString(ExtendedRelaxedV2_0_0), + R"({ "a" : "\u0080" })"); + // Can't use ASSERT_JSON_EQUALS because property_tree does not allow invalid unicode + ASSERT_EQUALS(B().append("a", "\x80").obj().jsonString(LegacyStrict), "{ \"a\" : \"\x80\" }"); +} + + TEST(JsonStringTest, NumbersTest) { const double qNaN = std::numeric_limits<double>::quiet_NaN(); const double sNaN = std::numeric_limits<double>::signaling_NaN(); // Note there is no NaN in the JSON RFC but what would be the alternative? - ASSERT(str::contains(B().append("a", qNaN).obj().jsonString(Strict), "NaN")); - ASSERT(str::contains(B().append("a", sNaN).obj().jsonString(Strict), "NaN")); + ASSERT(str::contains(B().append("a", qNaN).obj().jsonString(ExtendedCanonicalV2_0_0), "NaN")); + ASSERT(str::contains(B().append("a", sNaN).obj().jsonString(ExtendedCanonicalV2_0_0), "NaN")); - checkJsonStringEach({ - {B().append("a", 1.5).obj(), R"({ "a" : 1.5 })"}, // SingleNumberMember - {B().append("a", 123456789).obj(), R"({ "a" : 123456789 })"}, // NumberPrecision - {B().append("a", -1).obj(), R"({ "a" : -1 })"}, // NegativeNumber - }); + ASSERT_JSON_EQUALS(B().append("a", 1).obj().jsonString(ExtendedCanonicalV2_0_0), + R"({ "a" : {"$numberInt": 1 }})"); + ASSERT_JSON_EQUALS(B().append("a", 1).obj().jsonString(ExtendedRelaxedV2_0_0), + R"({ "a" : 1 })"); + ASSERT_EQUALS(B().append("a", 1).obj().jsonString(LegacyStrict), R"({ "a" : 1 })"); + + ASSERT_JSON_EQUALS(B().append("a", -1).obj().jsonString(ExtendedCanonicalV2_0_0), + R"({ "a" : {"$numberInt": -1 }})"); + ASSERT_JSON_EQUALS(B().append("a", -1).obj().jsonString(ExtendedRelaxedV2_0_0), + R"({ "a" : -1 })"); + ASSERT_EQUALS(B().append("a", -1).obj().jsonString(LegacyStrict), R"({ "a" : -1 })"); + + ASSERT_JSON_EQUALS(B().append("a", 1.5).obj().jsonString(ExtendedCanonicalV2_0_0), + R"({ "a" : {"$numberDouble": 1.5 }})"); + ASSERT_JSON_EQUALS(B().append("a", 1.5).obj().jsonString(ExtendedRelaxedV2_0_0), + R"({ "a" : 1.5 })"); + ASSERT_EQUALS(B().append("a", 1.5).obj().jsonString(LegacyStrict), R"({ "a" : 1.5 })"); } TEST(JsonStringTest, NumberLongStrictZero) { BSONObjBuilder b; b.append("a", 0LL); - ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"0\" } }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS("{ \"a\" : { \"$numberLong\" : \"0\" } }", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS("{ \"a\" : 0 }", b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"0\" } }", b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, NumberLongStrict) { BSONObjBuilder b; b.append("a", 20000LL); - ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"20000\" } }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS("{ \"a\" : { \"$numberLong\" : \"20000\" } }", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS("{ \"a\" : 20000 }", b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"20000\" } }", b.done().jsonString(LegacyStrict)); } // Test a NumberLong that is too big to fit into a 32 bit integer TEST(JsonStringTest, NumberLongStrictLarge) { BSONObjBuilder b; b.append("a", 9223372036854775807LL); + ASSERT_JSON_EQUALS("{ \"a\" : { \"$numberLong\" : \"9223372036854775807\" } }", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS("{ \"a\" : 9223372036854775807 }", + b.done().jsonString(ExtendedRelaxedV2_0_0)); ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"9223372036854775807\" } }", - b.done().jsonString(Strict)); + b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, NumberLongStrictNegative) { BSONObjBuilder b; b.append("a", -20000LL); - ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"-20000\" } }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS("{ \"a\" : { \"$numberLong\" : \"-20000\" } }", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS("{ \"a\" : -20000 }", b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS("{ \"a\" : { \"$numberLong\" : \"-20000\" } }", + b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, NumberDecimal) { - BSONObjBuilder b; - b.append("a", mongo::Decimal128("123456789.12345")); - ASSERT_EQUALS("{ \"a\" : NumberDecimal(\"123456789.12345\") }", b.done().jsonString(TenGen)); -} - -TEST(JsonStringTest, NumberDecimalStrict) { - BSONObjBuilder b; - b.append("a", mongo::Decimal128("123456789.12345")); - ASSERT_EQUALS("{ \"a\" : { \"$numberDecimal\" : \"123456789.12345\" } }", - b.done().jsonString(Strict)); + checkJsonStringEach({{B().append("a", mongo::Decimal128("123456789.12345")).obj(), + "{ \"a\" : { \"$numberDecimal\" : \"123456789.12345\" } }"}}); } TEST(JsonStringTest, NumberDoubleNaN) { BSONObjBuilder b; b.append("a", std::numeric_limits<double>::quiet_NaN()); - ASSERT_EQUALS("{ \"a\" : NaN }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "NaN" }})", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "NaN" }})", + b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : NaN })", b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, NumberDoubleInfinity) { BSONObjBuilder b; b.append("a", std::numeric_limits<double>::infinity()); - ASSERT_EQUALS("{ \"a\" : Infinity }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "Infinity" }})", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "Infinity" }})", + b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : Infinity })", b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, NumberDoubleNegativeInfinity) { BSONObjBuilder b; b.append("a", -std::numeric_limits<double>::infinity()); - ASSERT_EQUALS("{ \"a\" : -Infinity }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "-Infinity" }})", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$numberDouble": "-Infinity" }})", + b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : -Infinity })", b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, SingleBoolMember) { - ASSERT_EQUALS(R"({ "a" : true })", B().appendBool("a", true).obj().jsonString(Strict)); - ASSERT_EQUALS(R"({ "a" : false })", B().appendBool("a", false).obj().jsonString(Strict)); + checkJsonStringEach({{B().appendBool("a", true).obj(), R"({ "a" : true })"}, + {B().appendBool("a", false).obj(), R"({ "a" : false })"}}); } TEST(JsonStringTest, SingleNullMember) { - BSONObjBuilder b; - b.appendNull("a"); - ASSERT_EQUALS("{ \"a\" : null }", b.done().jsonString(Strict)); + checkJsonStringEach({{B().appendNull("a").obj(), R"({ "a" : null })"}}); } TEST(JsonStringTest, SingleUndefinedMember) { - BSONObjBuilder b; - b.appendUndefined("a"); - ASSERT_EQUALS("{ \"a\" : { \"$undefined\" : true } }", b.done().jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : undefined }", b.done().jsonString(TenGen)); + checkJsonStringEach({{B().appendUndefined("a").obj(), R"({ "a" : { "$undefined" : true } })"}}); } TEST(JsonStringTest, SingleObjectMember) { - BSONObjBuilder b, c; - b.append("a", c.done()); - ASSERT_EQUALS("{ \"a\" : {} }", b.done().jsonString(Strict)); + BSONObjBuilder c; + checkJsonStringEach({{B().append("a", c.done()).obj(), R"({ "a" : {} })"}}); } TEST(JsonStringTest, TwoMembers) { BSONObjBuilder b; b.append("a", 1); b.append("b", 2); - ASSERT_EQUALS("{ \"a\" : 1, \"b\" : 2 }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : {"$numberInt" : 1}, "b" : {"$numberInt" : 2} })", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : 1, "b" : 2 })", b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : 1, "b" : 2 })", b.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, EmptyArray) { std::vector<int> arr; BSONObjBuilder b; b.append("a", arr); - ASSERT_EQUALS("{ \"a\" : [] }", b.done().jsonString(Strict)); + + checkJsonStringEach({{b.done(), R"({ "a" : [] })"}}); } TEST(JsonStringTest, Array) { - std::vector<int> arr; - arr.push_back(1); - arr.push_back(2); + std::vector<std::string> arr; + arr.push_back("1"); + arr.push_back("2"); BSONObjBuilder b; b.append("a", arr); - ASSERT_EQUALS("{ \"a\" : [ 1, 2 ] }", b.done().jsonString(Strict)); + + checkJsonStringEach({{b.done(), R"({ "a" : [ "1", "2" ] })"}}); } TEST(JsonStringTest, DBRef) { @@ -204,12 +262,9 @@ TEST(JsonStringTest, DBRef) { OID oid = OID::from(OIDbytes); BSONObjBuilder b; b.appendDBRef("a", "namespace", oid); - BSONObj built = b.done(); - ASSERT_EQUALS( - "{ \"a\" : { \"$ref\" : \"namespace\", \"$id\" : \"ffffffffffffffffffffffff\" } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : Dbref( \"namespace\", \"ffffffffffffffffffffffff\" ) }", - built.jsonString(TenGen)); + + checkJsonStringEach( + {{b.done(), R"({ "a" : { "$ref" : "namespace", "$id" : "ffffffffffffffffffffffff" } })"}}); } TEST(JsonStringTest, DBRefZero) { @@ -218,9 +273,9 @@ TEST(JsonStringTest, DBRefZero) { OID oid = OID::from(OIDbytes); BSONObjBuilder b; b.appendDBRef("a", "namespace", oid); - ASSERT_EQUALS( - "{ \"a\" : { \"$ref\" : \"namespace\", \"$id\" : \"000000000000000000000000\" } }", - b.done().jsonString(Strict)); + + checkJsonStringEach( + {{b.done(), R"({ "a" : { "$ref" : "namespace", "$id" : "000000000000000000000000" } })"}}); } TEST(JsonStringTest, ObjectId) { @@ -230,9 +285,8 @@ TEST(JsonStringTest, ObjectId) { BSONObjBuilder b; b.appendOID("a", &oid); BSONObj built = b.done(); - ASSERT_EQUALS("{ \"a\" : { \"$oid\" : \"ffffffffffffffffffffffff\" } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : ObjectId( \"ffffffffffffffffffffffff\" ) }", built.jsonString(TenGen)); + + checkJsonStringEach({{b.done(), R"({ "a" : { "$oid" : "ffffffffffffffffffffffff" } })"}}); } TEST(JsonStringTest, BinData) { @@ -243,25 +297,40 @@ TEST(JsonStringTest, BinData) { BSONObjBuilder b; b.appendBinData("a", 3, BinDataGeneral, z); - std::string o = b.done().jsonString(Strict); - - ASSERT_EQUALS("{ \"a\" : { \"$binary\" : \"YWJj\", \"$type\" : \"00\" } }", o); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YWJj", "subType" : "0" } } })", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YWJj", "subType" : "0" } } })", + b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$binary" : "YWJj", "$type" : "00" } })", + b.done().jsonString(LegacyStrict)); BSONObjBuilder c; c.appendBinData("a", 2, BinDataGeneral, z); - ASSERT_EQUALS("{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"00\" } }", - c.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YWI=", "subType" : "0" } } })", + c.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YWI=", "subType" : "0" } } })", + c.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$binary" : "YWI=", "$type" : "00" } })", + c.done().jsonString(LegacyStrict)); BSONObjBuilder d; d.appendBinData("a", 1, BinDataGeneral, z); - ASSERT_EQUALS("{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"00\" } }", - d.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YQ==", "subType" : "0" } } })", + d.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$binary" : { "base64": "YQ==", "subType" : "0" } } })", + d.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$binary" : "YQ==", "$type" : "00" } })", + d.done().jsonString(LegacyStrict)); } TEST(JsonStringTest, Symbol) { BSONObjBuilder b; b.appendSymbol("a", "b"); - ASSERT_EQUALS("{ \"a\" : \"b\" }", b.done().jsonString(Strict)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$symbol": "b" } })", + b.done().jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$symbol": "b" } })", + b.done().jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : "b" })", b.done().jsonString(LegacyStrict)); } #ifdef _WIN32 @@ -321,94 +390,115 @@ TEST(JsonStringTest, Date) { BSONObjBuilder b; b.appendDate("a", Date_t()); BSONObj built = b.done(); - ASSERT_EQUALS("{ \"a\" : { \"$date\" : \"1969-12-31T19:00:00.000-0500\" } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : Date( 0 ) }", built.jsonString(TenGen)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$date" : { "$numberLong" : "0" } } })", + built.jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(R"({ "a" : { "$date" : "1969-12-31T19:00:00.000-0500" } })", + built.jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$date" : "1969-12-31T19:00:00.000-0500" } })", + built.jsonString(LegacyStrict)); // Test dates above our maximum formattable date. See SERVER-13760. BSONObjBuilder b2; b2.appendDate("a", Date_t::fromMillisSinceEpoch(32535262800000LL)); - BSONObj built2 = b2.done(); - ASSERT_EQUALS("{ \"a\" : { \"$date\" : { \"$numberLong\" : \"32535262800000\" } } }", - built2.jsonString(Strict)); + + checkJsonStringEach( + {{b2.done(), R"({ "a" : { "$date" : { "$numberLong" : "32535262800000" } } })"}}); } TEST(JsonStringTest, DateNegative) { BSONObjBuilder b; b.appendDate("a", Date_t::fromMillisSinceEpoch(-1)); - BSONObj built = b.done(); - ASSERT_EQUALS("{ \"a\" : { \"$date\" : { \"$numberLong\" : \"-1\" } } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : Date( -1 ) }", built.jsonString(TenGen)); + + checkJsonStringEach({{b.done(), R"({ "a" : { "$date" : { "$numberLong" : "-1" } } })"}}); } TEST(JsonStringTest, Regex) { BSONObj built = B().appendRegex("a", "abc", "i").obj(); - ASSERT_EQUALS(R"({ "a" : { "$regex" : "abc", "$options" : "i" } })", built.jsonString(Strict)); - ASSERT_EQUALS(R"({ "a" : /abc/i })", built.jsonString(TenGen)); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "abc", "options" : "i" } } })", + built.jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "abc", "options" : "i" } } })", + built.jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$regex" : "abc", "$options" : "i" } })", + built.jsonString(LegacyStrict)); } TEST(JsonStringTest, RegexEscape) { BSONObjBuilder b; b.appendRegex("a", "/\"", "i"); BSONObj built = b.done(); - ASSERT_EQUALS("{ \"a\" : { \"$regex\" : \"/\\\"\", \"$options\" : \"i\" } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : /\\/\\\"/i }", built.jsonString(TenGen)); + + // These raw string literal breaks the Visual Studio preprocessor + const char* expected = + R"({ "a" : { "$regularExpression" : { "pattern" : "/\"", "options" : "i" } } })"; + const char* expectedLegacy = R"({ "a" : { "$regex" : "/\"", "$options" : "i" } })"; + ASSERT_JSON_EQUALS(expected, built.jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS(expected, built.jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(expectedLegacy, built.jsonString(LegacyStrict)); } TEST(JsonStringTest, RegexManyOptions) { BSONObjBuilder b; b.appendRegex("a", "z", "abcgimx"); BSONObj built = b.done(); - ASSERT_EQUALS("{ \"a\" : { \"$regex\" : \"z\", \"$options\" : \"abcgimx\" } }", - built.jsonString(Strict)); - ASSERT_EQUALS("{ \"a\" : /z/gim }", built.jsonString(TenGen)); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "z", "options" : "abcgimx" } } })", + built.jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "z", "options" : "abcgimx" } } })", + built.jsonString(ExtendedRelaxedV2_0_0)); + ASSERT_EQUALS(R"({ "a" : { "$regex" : "z", "$options" : "abcgimx" } })", + built.jsonString(LegacyStrict)); } TEST(JsonStringTest, RegexValidOption) { BSONObj built = B().appendRegex("a", "sometext", "ms").obj(); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "sometext", "options" : "ms" } } })", + built.jsonString(ExtendedCanonicalV2_0_0)); + ASSERT_JSON_EQUALS( + R"({ "a" : { "$regularExpression" : { "pattern" : "sometext", "options" : "ms" } } })", + built.jsonString(ExtendedRelaxedV2_0_0)); ASSERT_EQUALS(R"({ "a" : { "$regex" : "sometext", "$options" : "ms" } })", - built.jsonString(Strict)); - ASSERT_EQUALS(R"({ "a" : /sometext/ms })", built.jsonString(TenGen)); + built.jsonString(LegacyStrict)); } TEST(JsonStringTest, CodeTests) { BSONObjBuilder b; b.appendCode("x", "function(arg){ var string = \"\\n\"; return 1; }"); - BSONObj o = b.obj(); - ASSERT_EQUALS( - "{ \"x\" : \"function(arg){ var string = \\\"\\\\n\\\"; " - "return 1; }\" }", - o.jsonString()); + + checkJsonStringEach({{b.done(), + "{ \"x\" : { \"$code\" : \"function(arg){ var string = \\\"\\\\n\\\"; " + "return 1; }\" } }"}}); } TEST(JsonStringTest, CodeWScopeTests) { BSONObjBuilder b; - b.appendCodeWScope("x", "function(arg){ var string = \"\\n\"; return x; }", BSON("x" << 1)); - BSONObj o = b.obj(); - ASSERT_EQUALS( - "{ \"x\" : " - "{ \"$code\" : " - "\"function(arg){ var string = \\\"\\\\n\\\"; return x; }\" , " - "\"$scope\" : { \"x\" : 1 } } }", - o.jsonString()); + b.appendCodeWScope("x", + "function(arg){ var string = \"\\n\"; return x; }", + BSON("x" + << "1")); + + checkJsonStringEach({{b.done(), + "{ \"x\" : " + "{ \"$code\" : " + "\"function(arg){ var string = \\\"\\\\n\\\"; return x; }\", " + "\"$scope\" : { \"x\" : \"1\" } } }"}}); } TEST(JsonStringTest, TimestampTests) { BSONObjBuilder b; b.append("x", Timestamp(4, 10)); - BSONObj o = b.obj(); - ASSERT_EQUALS("{ \"x\" : { \"$timestamp\" : { \"t\" : 4, \"i\" : 10 } } }", - o.jsonString(Strict)); - ASSERT_EQUALS("{ \"x\" : Timestamp( 4, 10 ) }", o.jsonString(TenGen)); + + checkJsonStringEach({{b.done(), R"({ "x" : { "$timestamp" : { "t" : 4, "i" : 10 } } })"}}); } TEST(JsonStringTest, NullString) { BSONObjBuilder b; b.append("x", "a\0b", 4); - BSONObj o = b.obj(); - ASSERT_EQUALS("{ \"x\" : \"a\\u0000b\" }", o.jsonString()); + + checkJsonStringEach({{b.done(), "{ \"x\" : \"a\\u0000b\" }"}}); } TEST(JsonStringTest, AllTypesTest) { @@ -439,7 +529,9 @@ TEST(JsonStringTest, AllTypesTest) { b.appendMaxKey("v"); BSONObj o = b.obj(); - o.jsonString(); + o.jsonString(ExtendedCanonicalV2_0_0); + o.jsonString(ExtendedRelaxedV2_0_0); + o.jsonString(LegacyStrict); } } // namespace JsonStringTests @@ -468,8 +560,9 @@ void checkEquivalence(const std::string& json, const BSONObj& bson) { ASSERT(fromjson(json).valid(BSONVersion::kLatest)); assertEquals(json, bson, fromjson(json), "mode: json-to-bson"); assertEquals(json, bson, fromjson(tojson(bson)), "mode: <default>"); - assertEquals(json, bson, fromjson(tojson(bson, Strict)), "mode: strict"); - assertEquals(json, bson, fromjson(tojson(bson, TenGen)), "mode: tengen"); + assertEquals(json, bson, fromjson(tojson(bson, LegacyStrict)), "mode: strict"); + assertEquals(json, bson, fromjson(tojson(bson, ExtendedCanonicalV2_0_0)), "mode: canonical"); + assertEquals(json, bson, fromjson(tojson(bson, ExtendedRelaxedV2_0_0)), "mode: relaxed"); } void checkRejection(const std::string& json) { diff --git a/src/mongo/embedded/stitch_support/stitch_support_test.cpp b/src/mongo/embedded/stitch_support/stitch_support_test.cpp index d7e3621bd59..ed9001c20e9 100644 --- a/src/mongo/embedded/stitch_support/stitch_support_test.cpp +++ b/src/mongo/embedded/stitch_support/stitch_support_test.cpp @@ -86,7 +86,8 @@ protected: */ auto fromBSONForAPI(const uint8_t* bson) { return mongo::tojson( - mongo::BSONObj(static_cast<const char*>(static_cast<const void*>(bson)))); + mongo::BSONObj(static_cast<const char*>(static_cast<const void*>(bson))), + mongo::JsonStringFormat::LegacyStrict); } auto checkMatch(const char* filterJSON, diff --git a/src/mongo/logv2/bson_formatter.cpp b/src/mongo/logv2/bson_formatter.cpp index 028ecf10cce..2994951b9c9 100644 --- a/src/mongo/logv2/bson_formatter.cpp +++ b/src/mongo/logv2/bson_formatter.cpp @@ -29,7 +29,6 @@ #include "mongo/logv2/bson_formatter.h" -#include <boost/container/small_vector.hpp> #include <boost/log/attributes/value_extraction.hpp> #include <boost/log/utility/formatting_ostream.hpp> @@ -41,6 +40,7 @@ #include "mongo/logv2/log_component.h" #include "mongo/logv2/log_severity.h" #include "mongo/logv2/log_tag.h" +#include "mongo/logv2/name_extractor.h" #include "mongo/logv2/named_arg_formatter.h" #include "mongo/util/time_support.h" @@ -50,17 +50,6 @@ namespace mongo { namespace logv2 { namespace { -struct NameExtractor { - template <typename T> - void operator()(StringData name, const T& value) { - name_args.push_back(fmt::internal::make_arg<fmt::format_context>(name)); - } - - boost::container::small_vector<fmt::basic_format_arg<fmt::format_context>, - constants::kNumStaticAttrs> - name_args; -}; - struct BSONValueExtractor { BSONValueExtractor(BSONObjBuilder& builder) : _builder(builder.subobjStart(constants::kAttributesFieldName)) {} @@ -126,7 +115,7 @@ void BSONFormatter::operator()(boost::log::record_view const& rec, builder.append(constants::kStableIdFieldName, stable_id); } - NameExtractor nameExtractor; + detail::NameExtractor nameExtractor; attrs.apply(nameExtractor); // Insert the attribute names back into the message string using a special formatter @@ -134,8 +123,8 @@ void BSONFormatter::operator()(boost::log::record_view const& rec, fmt::vformat_to<detail::NamedArgFormatter, char>( buffer, extract<StringData>(attributes::message(), rec).get().toString(), - fmt::basic_format_args<fmt::format_context>(nameExtractor.name_args.data(), - nameExtractor.name_args.size())); + fmt::basic_format_args<fmt::format_context>(nameExtractor.nameArgs.data(), + nameExtractor.nameArgs.size())); builder.append(constants::kMessageFieldName, fmt::to_string(buffer)); if (!attrs.empty()) { diff --git a/src/mongo/logv2/constants.h b/src/mongo/logv2/constants.h index df63a757c7a..3cebb6fba04 100644 --- a/src/mongo/logv2/constants.h +++ b/src/mongo/logv2/constants.h @@ -35,9 +35,6 @@ namespace mongo::logv2::constants { // memory. constexpr size_t kNumStaticAttrs = 16; -// Allocate extra space to fit some escape sequences -constexpr size_t kReservedSpaceForEscaping = 16; - // Field names used in the JSON and BSON formatter constexpr StringData kTimestampFieldName = "t"_sd; constexpr StringData kSeverityFieldName = "s"_sd; diff --git a/src/mongo/logv2/json_formatter.cpp b/src/mongo/logv2/json_formatter.cpp index b2d6bcdccf4..d45d7ece16d 100644 --- a/src/mongo/logv2/json_formatter.cpp +++ b/src/mongo/logv2/json_formatter.cpp @@ -42,6 +42,7 @@ #include "mongo/logv2/log_component.h" #include "mongo/logv2/log_severity.h" #include "mongo/logv2/log_tag.h" +#include "mongo/logv2/name_extractor.h" #include "mongo/logv2/named_arg_formatter.h" #include "mongo/util/time_support.h" @@ -50,16 +51,21 @@ namespace mongo::logv2 { namespace { struct JSONValueExtractor { + JSONValueExtractor(fmt::memory_buffer& buffer) : _buffer(buffer) {} + void operator()(StringData name, CustomAttributeValue const& val) { if (val.BSONAppend) { BSONObjBuilder builder; val.BSONAppend(builder, name); // This is a JSON subobject, no quotes needed - storeUnquoted( - name, builder.done().getField(name).jsonString(JsonStringFormat::Strict, false)); + storeUnquoted(name); + builder.done().getField(name).jsonStringBuffer( + JsonStringFormat::ExtendedRelaxedV2_0_0, false, 0, _buffer); } else if (val.toBSON) { // This is a JSON subobject, no quotes needed - storeUnquoted(name, val.toBSON().jsonString()); + storeUnquoted(name); + val.toBSON().jsonStringBuffer( + JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer); } else { // This is a string, surround value with quotes storeQuoted(name, val.toString()); @@ -68,7 +74,8 @@ struct JSONValueExtractor { void operator()(StringData name, const BSONObj* val) { // This is a JSON subobject, no quotes needed - storeUnquoted(name, val->jsonString()); + storeUnquoted(name); + val->jsonStringBuffer(JsonStringFormat::ExtendedRelaxedV2_0_0, 0, false, _buffer); } void operator()(StringData name, StringData value) { @@ -77,35 +84,29 @@ struct JSONValueExtractor { template <typename T> void operator()(StringData name, const T& value) { - storeUnquoted(name, value); + storeUnquotedValue(name, value); } - fmt::memory_buffer buffer; - boost::container::small_vector<fmt::basic_format_arg<fmt::format_context>, - constants::kNumStaticAttrs> - nameArgs; private: - template <typename T> - void storeUnquoted(StringData name, const T& value) { - // The first {} is for the member separator, added by storeImpl() - storeImpl(R"({}"{}":{})", name, value); + void storeUnquoted(StringData name) { + fmt::format_to(_buffer, R"({}"{}":)", _separator, name); + _separator = ","_sd; } - template <typename T> - void storeQuoted(StringData name, const T& value) { - // The first {} is for the member separator, added by storeImpl() - storeImpl(R"({}"{}":"{}")", name, value); + void storeUnquotedValue(StringData name, const T& value) { + fmt::format_to(_buffer, R"({}"{}":{})", _separator, name, value); + _separator = ","_sd; } template <typename T> - void storeImpl(const char* fmt_str, StringData name, const T& value) { - nameArgs.push_back(fmt::internal::make_arg<fmt::format_context>(name)); - fmt::format_to(buffer, fmt_str, _separator, name, value); + void storeQuoted(StringData name, const T& value) { + fmt::format_to(_buffer, R"({}"{}":"{}")", _separator, name, value); _separator = ","_sd; } + fmt::memory_buffer& _buffer; StringData _separator = ""_sd; }; } // namespace @@ -117,24 +118,12 @@ void JSONFormatter::operator()(boost::log::record_view const& rec, // Build a JSON object for the user attributes. const auto& attrs = extract<TypeErasedAttributeStorage>(attributes::attributes(), rec).get(); - JSONValueExtractor extractor; - attrs.apply(extractor); - std::string id; auto stable_id = extract<StringData>(attributes::stableId(), rec).get(); if (!stable_id.empty()) { id = fmt::format("\"{}\":\"{}\",", constants::kStableIdFieldName, stable_id); } - std::string message; - fmt::memory_buffer buffer; - fmt::vformat_to<detail::NamedArgFormatter, char>( - buffer, - extract<StringData>(attributes::message(), rec).get().toString(), - fmt::basic_format_args<fmt::format_context>(extractor.nameArgs.data(), - extractor.nameArgs.size())); - message = fmt::to_string(buffer); - StringData severity = extract<LogSeverity>(attributes::severity(), rec).get().toStringDataCompact(); StringData component = @@ -142,56 +131,81 @@ void JSONFormatter::operator()(boost::log::record_view const& rec, std::string tag; LogTag tags = extract<LogTag>(attributes::tags(), rec).get(); if (tags != LogTag::kNone) { - tag = fmt::format(",\"{}\":{}", - constants::kTagsFieldName, - tags.toBSON().jsonString(JsonStringFormat::Strict, 0, true)); + tag = + fmt::format(",\"{}\":{}", + constants::kTagsFieldName, + tags.toBSON().jsonString(JsonStringFormat::ExtendedRelaxedV2_0_0, 0, true)); } - strm << fmt::format( - R"({{)" - R"("{}":{{"$date":"{}"}},)" // timestamp - R"("{}":"{}"{: <{}})" // severity with padding for the comma - R"("{}":"{}"{: <{}})" // component with padding for the comma - R"("{}":"{}",)" // context - R"({})" // optional stable id - R"("{}":"{}")" // message - R"({})", // optional attribute key - // timestamp - constants::kTimestampFieldName, - dateToISOStringUTC(extract<Date_t>(attributes::timeStamp(), rec).get()), - // severity, left align the comma and add padding to create fixed column width - constants::kSeverityFieldName, - severity, - ",", - 3 - severity.size(), - // component, left align the comma and add padding to create fixed column width - constants::kComponentFieldName, - component, - ",", - 9 - component.size(), - // context - constants::kContextFieldName, - extract<StringData>(attributes::threadName(), rec).get(), - // stable id - id, - // message - constants::kMessageFieldName, - message, - // attribute field name and opening brace - attrs.empty() ? "" : fmt::format(R"(,"{}":{{)", constants::kAttributesFieldName)); + fmt::memory_buffer buffer; + + // Put all fields up until the message value + fmt::format_to(buffer, + R"({{)" + R"("{}":{{"$date":"{}"}},)" // timestamp + R"("{}":"{}"{: <{}})" // severity with padding for the comma + R"("{}":"{}"{: <{}})" // component with padding for the comma + R"("{}":"{}",)" // context + R"({})" // optional stable id + R"("{}":")", // message + // timestamp + constants::kTimestampFieldName, + dateToISOStringUTC(extract<Date_t>(attributes::timeStamp(), rec).get()), + // severity, left align the comma and add padding to create fixed column width + constants::kSeverityFieldName, + severity, + ",", + 3 - severity.size(), + // component, left align the comma and add padding to create fixed column width + constants::kComponentFieldName, + component, + ",", + 9 - component.size(), + // context + constants::kContextFieldName, + extract<StringData>(attributes::threadName(), rec).get(), + // stable id + id, + // message + constants::kMessageFieldName); + + // Insert the attribute names back into the message string using a special formatter and format + // into buffer + detail::NameExtractor nameExtractor; + attrs.apply(nameExtractor); + + fmt::vformat_to<detail::NamedArgFormatter, char>( + buffer, + extract<StringData>(attributes::message(), rec).get().toString(), + fmt::basic_format_args<fmt::format_context>(nameExtractor.nameArgs.data(), + nameExtractor.nameArgs.size())); + + if (attrs.empty()) { + // If no attributes we can just close the message string + buffer.push_back('"'); + } else { + // otherwise, add attribute field name and opening brace + fmt::format_to(buffer, R"(","{}":{{)", constants::kAttributesFieldName); + } if (!attrs.empty()) { - // comma separated list of attributes - strm << fmt::to_string(extractor.buffer); + // comma separated list of attributes (no opening/closing brace are added here) + JSONValueExtractor extractor(buffer); + attrs.apply(extractor); } - strm << fmt::format(R"({})" // optional attribute closing - R"({})" // optional tags - R"(}})", - // closing brace - attrs.empty() ? "" : "}", - // tags - tag); + // Add remaining fields + fmt::format_to(buffer, + R"({})" // optional attribute closing + R"({})" // optional tags + R"(}})", + // closing brace + attrs.empty() ? "" : "}", + // tags + tag); + + // Write final JSON object to output stream + strm.write(buffer.data(), buffer.size()); } } // namespace mongo::logv2 diff --git a/src/mongo/logv2/log_test_v2.cpp b/src/mongo/logv2/log_test_v2.cpp index 0dd15cdd98c..2b8550e137b 100644 --- a/src/mongo/logv2/log_test_v2.cpp +++ b/src/mongo/logv2/log_test_v2.cpp @@ -300,7 +300,8 @@ TEST_F(LogTestV2, Types) { builder.append("str"_sd, str_data); BSONObj bsonObj = builder.obj(); LOGV2("bson {}", "name"_attr = bsonObj); - ASSERT(text.back() == std::string("bson ") + bsonObj.jsonString()); + ASSERT(text.back() == + std::string("bson ") + bsonObj.jsonString(JsonStringFormat::ExtendedRelaxedV2_0_0)); ASSERT(mongo::fromjson(json.back()) .getField(kAttributesFieldName) .Obj() @@ -448,7 +449,8 @@ TEST_F(LogTestV2, TextFormat) { ASSERT(lines.back().rfind(t.toString() + " custom formatting") != std::string::npos); LOGV2("{} bson", "name"_attr = t.toBSON()); - ASSERT(lines.back().rfind(t.toBSON().jsonString() + " bson") != std::string::npos); + ASSERT(lines.back().rfind(t.toBSON().jsonString(JsonStringFormat::ExtendedRelaxedV2_0_0) + + " bson") != std::string::npos); TypeWithoutBSON t2(1.0, 2.0); LOGV2("{} custom formatting, no bson", "name"_attr = t2); diff --git a/src/mongo/logv2/name_extractor.h b/src/mongo/logv2/name_extractor.h new file mode 100644 index 00000000000..a4d75395934 --- /dev/null +++ b/src/mongo/logv2/name_extractor.h @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/string_data.h" +#include "mongo/logv2/constants.h" + +#include <boost/container/small_vector.hpp> +#include <boost/optional.hpp> +#include <fmt/format.h> + +namespace mongo::logv2::detail { + +struct NameExtractor { + template <typename T> + void operator()(StringData name, const T& value) { + nameArgs.push_back(fmt::internal::make_arg<fmt::format_context>(name)); + } + + boost::container::small_vector<fmt::basic_format_arg<fmt::format_context>, + constants::kNumStaticAttrs> + nameArgs; +}; + +} // namespace mongo::logv2::detail diff --git a/src/mongo/logv2/plain_formatter.cpp b/src/mongo/logv2/plain_formatter.cpp index 82caf3ecf14..edc52c2e4d3 100644 --- a/src/mongo/logv2/plain_formatter.cpp +++ b/src/mongo/logv2/plain_formatter.cpp @@ -33,7 +33,7 @@ #include "mongo/logv2/attribute_storage.h" #include "mongo/logv2/attributes.h" #include "mongo/logv2/constants.h" -#include "mongo/logv2/string_escape.h" +#include "mongo/util/str_escape.h" #include <boost/container/small_vector.hpp> #include <boost/log/attributes/value_extraction.hpp> @@ -48,17 +48,17 @@ namespace { struct TextValueExtractor { void operator()(StringData name, CustomAttributeValue const& val) { - _storage.push_back(escapeForText(val.toString())); + _storage.push_back(str::escapeForText(val.toString())); operator()(name, _storage.back()); } void operator()(StringData name, const BSONObj* val) { - _storage.push_back(val->jsonString()); + _storage.push_back(val->jsonString(JsonStringFormat::ExtendedRelaxedV2_0_0)); operator()(name, _storage.back()); } void operator()(StringData name, StringData val) { - _storage.push_back(escapeForText(val)); + _storage.push_back(str::escapeForText(val)); operator()(name, _storage.back()); } diff --git a/src/mongo/logv2/string_escape.cpp b/src/mongo/logv2/string_escape.cpp deleted file mode 100644 index c270c031668..00000000000 --- a/src/mongo/logv2/string_escape.cpp +++ /dev/null @@ -1,427 +0,0 @@ -/** - * Copyright (C) 2019-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/logv2/string_escape.h" - -#include "mongo/logv2/constants.h" - -#include <algorithm> -#include <array> -#include <iterator> - -namespace mongo::logv2 { -namespace { -constexpr char kHexChar[] = "0123456789abcdef"; - -// 'singleHandler' Function to write a valid single byte UTF-8 sequence with desired escaping. -// 'singleEscaper' Function to write a byte of invalid UTF-8 encoding -// 'twoEscaper' Function to write a valid two byte UTF-8 sequence with desired escaping, for C1 -// control codes. -// All these functions take a function object as their first parameter to perform the -// writing of any escaped data. This function expects the number of handled bytes as its first -// parameter and the corresponding escaped string as the second. They are templates to they can be -// inlined. -template <typename SingleByteHandler, typename SingleByteEscaper, typename TwoByteEscaper> -std::string escape(StringData str, - SingleByteHandler singleHandler, - SingleByteEscaper singleEscaper, - TwoByteEscaper twoEscaper) { - std::string escaped; - // If input string is over the SSO size and we're going to need to allocate memory, add some - // extra to fit a couple of eventual escape sequences. - if (str.size() > escaped.capacity()) - escaped.reserve(str.size() + constants::kReservedSpaceForEscaping); - - // The range [begin, it) contains input that does not need to be escaped and that has not been - // written to output yet. - // The range [it end) contains remaining input to scan 'begin' is pointing to the beginning of - // the input that has not yet been written to 'escaped'. - // 'it' is pointing to the beginning of the unicode code point we're currently processing in the - // while-loop below. 'end' is the end of the input sequence. - auto begin = str.begin(); - auto it = str.begin(); - auto end = str.end(); - - // Writes an escaped sequence to output after flushing pending input that does not need to be - // escaped. 'it' is assumed to be at the beginning of the input sequence represented by the - // escaped data. - // 'numHandled' the number of bytes of unescaped data being written escaped in 'escapeSequence' - auto flushAndWrite = [&](size_t numHandled, StringData escapeSequence) { - // Flush range of unmodified input - escaped.append(begin, it); - begin = it + numHandled; - - // Write escaped data - escaped.append(escapeSequence.rawData(), escapeSequence.size()); - }; - - auto isValidCodePoint = [&](auto pos, int len) { - return std::distance(pos, end) >= len && - std::all_of(pos + 1, pos + len, [](uint8_t c) { return (c >> 6) == 0b10; }); - }; - - // Helper function to write a valid one byte UTF-8 sequence from the input stream - auto writeValid1Byte = [&]() { singleHandler(flushAndWrite, *it); }; - - // Helper function to write a valid two byte UTF-8 sequence from the input stream - auto writeValid2Byte = [&]() { - uint8_t first = *it; - uint8_t second = *(it + 1); - - if (MONGO_unlikely(first == 0xc2 && second >= 0x80 && second < 0xa0)) { - twoEscaper(flushAndWrite, first, second); - } - }; - - // Helper function to write an invalid UTF-8 sequence from the input stream - // Will try and write up to num bytes but bail if we reach the end of the input. - // Updates the position of 'it'. - auto writeInvalid = [&](uint8_t c) { singleEscaper(flushAndWrite, c); }; - - - while (it != end) { - uint8_t c = *it; - bool bit7 = (c >> 7) & 1; - if (MONGO_likely(!bit7)) { - writeValid1Byte(); - ++it; - continue; - } - - bool bit6 = (c >> 6) & 1; - if (MONGO_unlikely(!bit6)) { - writeInvalid(c); - ++it; - continue; - } - - bool bit5 = (c >> 5) & 1; - if (!bit5) { - // 2 byte sequence - if (MONGO_likely(isValidCodePoint(it, 2))) { - writeValid2Byte(); - it += 2; - } else { - writeInvalid(c); - ++it; - } - - continue; - } - - bool bit4 = (c >> 4) & 1; - if (!bit4) { - // 3 byte sequence - if (MONGO_likely(isValidCodePoint(it, 3))) { - it += 3; - } else { - writeInvalid(c); - ++it; - } - continue; - } - - bool bit3 = (c >> 3) & 1; - if (bit3) { - writeInvalid(c); - ++it; - continue; - } - - // 4 byte sequence - if (MONGO_likely(isValidCodePoint(it, 4))) { - it += 4; - } else { - writeInvalid(c); - ++it; - } - } - // Write last block - escaped.append(begin, it); - return escaped; -} -} // namespace -std::string escapeForText(StringData str) { - return escape(str, - [](const auto& writer, uint8_t unescaped) { - switch (unescaped) { - case '\0': - writer(1, "\\0"_sd); - break; - case 0x01: - writer(1, "\\x01"_sd); - break; - case 0x02: - writer(1, "\\x02"_sd); - break; - case 0x03: - writer(1, "\\x03"_sd); - break; - case 0x04: - writer(1, "\\x04"_sd); - break; - case 0x05: - writer(1, "\\x05"_sd); - break; - case 0x06: - writer(1, "\\x06"_sd); - break; - case 0x07: - writer(1, "\\a"_sd); - break; - case 0x08: - writer(1, "\\b"_sd); - break; - case 0x09: - writer(1, "\\t"_sd); - break; - case 0x0a: - writer(1, "\\n"_sd); - break; - case 0x0b: - writer(1, "\\v"_sd); - break; - case 0x0c: - writer(1, "\\f"_sd); - break; - case 0x0d: - writer(1, "\\r"_sd); - break; - case 0x0e: - writer(1, "\\x0e"_sd); - break; - case 0x0f: - writer(1, "\\x0f"_sd); - break; - case 0x10: - writer(1, "\\x10"_sd); - break; - case 0x11: - writer(1, "\\x11"_sd); - break; - case 0x12: - writer(1, "\\x12"_sd); - break; - case 0x13: - writer(1, "\\x13"_sd); - break; - case 0x14: - writer(1, "\\x14"_sd); - break; - case 0x15: - writer(1, "\\x15"_sd); - break; - case 0x16: - writer(1, "\\x16"_sd); - break; - case 0x17: - writer(1, "\\x17"_sd); - break; - case 0x18: - writer(1, "\\x18"_sd); - break; - case 0x19: - writer(1, "\\x19"_sd); - break; - case 0x1a: - writer(1, "\\x1a"_sd); - break; - case 0x1b: - writer(1, "\\e"_sd); - break; - case 0x1c: - writer(1, "\\x1c"_sd); - break; - case 0x1d: - writer(1, "\\x1d"_sd); - break; - case 0x1e: - writer(1, "\\x1e"_sd); - break; - case 0x1f: - writer(1, "\\x1f"_sd); - break; - case '\\': - writer(1, "\\\\"_sd); - break; - case 0x7f: - writer(1, "\\x7f"_sd); - break; - default: - break; - } - }, - [](const auto& writer, uint8_t unescaped) { - std::array<char, 4> buffer = { - '\\', 'x', kHexChar[unescaped >> 4], kHexChar[unescaped & 0xf]}; - writer(1, StringData(buffer.data(), buffer.size())); - }, - [](const auto& writer, uint8_t first, uint8_t second) { - std::array<char, 8> buffer = {'\\', - 'x', - kHexChar[first >> 4], - kHexChar[first & 0xf], - '\\', - 'x', - kHexChar[second >> 4], - kHexChar[second & 0xf]}; - writer(2, StringData(buffer.data(), buffer.size())); - } - - ); -} -std::string escapeForJSON(StringData str) { - return escape(str, - [](const auto& writer, uint8_t unescaped) { - switch (unescaped) { - case '\0': - writer(1, "\\u0000"_sd); - break; - case 0x01: - writer(1, "\\u0001"_sd); - break; - case 0x02: - writer(1, "\\u0002"_sd); - break; - case 0x03: - writer(1, "\\u0003"_sd); - break; - case 0x04: - writer(1, "\\u0004"_sd); - break; - case 0x05: - writer(1, "\\u0005"_sd); - break; - case 0x06: - writer(1, "\\u0006"_sd); - break; - case 0x07: - writer(1, "\\u0007"_sd); - break; - case 0x08: - writer(1, "\\b"_sd); - break; - case 0x09: - writer(1, "\\t"_sd); - break; - case 0x0a: - writer(1, "\\n"_sd); - break; - case 0x0b: - writer(1, "\\u000b"_sd); - break; - case 0x0c: - writer(1, "\\f"_sd); - break; - case 0x0d: - writer(1, "\\r"_sd); - break; - case 0x0e: - writer(1, "\\u000e"_sd); - break; - case 0x0f: - writer(1, "\\u000f"_sd); - break; - case 0x10: - writer(1, "\\u0010"_sd); - break; - case 0x11: - writer(1, "\\u0011"_sd); - break; - case 0x12: - writer(1, "\\u0012"_sd); - break; - case 0x13: - writer(1, "\\u0013"_sd); - break; - case 0x14: - writer(1, "\\u0014"_sd); - break; - case 0x15: - writer(1, "\\u0015"_sd); - break; - case 0x16: - writer(1, "\\u0016"_sd); - break; - case 0x17: - writer(1, "\\u0017"_sd); - break; - case 0x18: - writer(1, "\\u0018"_sd); - break; - case 0x19: - writer(1, "\\u0019"_sd); - break; - case 0x1a: - writer(1, "\\u001a"_sd); - break; - case 0x1b: - writer(1, "\\u001b"_sd); - break; - case 0x1c: - writer(1, "\\u000c"_sd); - break; - case 0x1d: - writer(1, "\\u001d"_sd); - break; - case 0x1e: - writer(1, "\\u001e"_sd); - break; - case 0x1f: - writer(1, "\\u001f"_sd); - break; - case '\\': - writer(1, "\\\\"_sd); - break; - case '\"': - writer(1, "\\\""_sd); - break; - case 0x7f: - writer(1, "\\u007f"_sd); - break; - default: - break; - } - }, - [](const auto& writer, uint8_t unescaped) { - std::array<char, 6> buffer = { - '\\', 'u', '0', '0', kHexChar[unescaped >> 4], kHexChar[unescaped & 0xf]}; - writer(1, StringData(buffer.data(), buffer.size())); - }, - [](const auto& writer, uint8_t first, uint8_t second) { - std::array<char, 6> buffer = {'\\', - 'u', - kHexChar[first >> 4], - kHexChar[first & 0xf], - kHexChar[second >> 4], - kHexChar[second & 0xf]}; - writer(2, StringData(buffer.data(), buffer.size())); - }); -} -} // namespace mongo::logv2 diff --git a/src/mongo/scripting/utils.cpp b/src/mongo/scripting/utils.cpp index 3c626583aa6..51b084c44f0 100644 --- a/src/mongo/scripting/utils.cpp +++ b/src/mongo/scripting/utils.cpp @@ -62,7 +62,7 @@ static BSONObj native_tostrictjson(const mongo::BSONObj& args, void* data) { if (args.nFields() == 2) { prettyPrint = args["1"].boolean(); } - return BSON("" << tojson(args.firstElement().embeddedObject(), Strict, prettyPrint)); + return BSON("" << tojson(args.firstElement().embeddedObject(), LegacyStrict, prettyPrint)); } // --------------------------------- diff --git a/src/mongo/util/latch_analyzer.cpp b/src/mongo/util/latch_analyzer.cpp index 318090b8bc8..9eef3fa88c4 100644 --- a/src/mongo/util/latch_analyzer.cpp +++ b/src/mongo/util/latch_analyzer.cpp @@ -345,7 +345,8 @@ void LatchAnalyzer::dump() { auto obj = bob.done(); log().setIsTruncatable(false) << "=====LATCHES=====\n" - << obj.jsonString() << "\n===END LATCHES==="; + << obj.jsonString(JsonStringFormat::LegacyStrict) + << "\n===END LATCHES==="; } LatchAnalyzerDisabledBlock::LatchAnalyzerDisabledBlock() { diff --git a/src/mongo/util/stacktrace_test.cpp b/src/mongo/util/stacktrace_test.cpp index ac18b8041a3..74f7d5cc646 100644 --- a/src/mongo/util/stacktrace_test.cpp +++ b/src/mongo/util/stacktrace_test.cpp @@ -145,7 +145,7 @@ public: private: void doPrint(std::ostream& os) const override { - os << tojson(obj, Strict, /*pretty=*/true); + os << tojson(obj, ExtendedRelaxedV2_0_0, /*pretty=*/true); } const BSONObj& obj; }; diff --git a/src/mongo/util/str_escape.cpp b/src/mongo/util/str_escape.cpp new file mode 100644 index 00000000000..2a18a943f1a --- /dev/null +++ b/src/mongo/util/str_escape.cpp @@ -0,0 +1,437 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/util/str_escape.h" + +#include <algorithm> +#include <array> +#include <iterator> + +namespace mongo::str { +namespace { +constexpr char kHexChar[] = "0123456789abcdef"; + +// 'singleHandler' Function to write a valid single byte UTF-8 sequence with desired escaping. +// 'singleEscaper' Function to write a byte of invalid UTF-8 encoding +// 'twoEscaper' Function to write a valid two byte UTF-8 sequence with desired escaping, for C1 +// control codes. +// All these functions take a function object as their first parameter to perform the +// writing of any escaped data. This function expects the number of handled bytes as its first +// parameter and the corresponding escaped string as the second. They are templates to they can be +// inlined. +template <typename SingleByteHandler, typename SingleByteEscaper, typename TwoByteEscaper> +void escape(fmt::memory_buffer& buffer, + StringData str, + SingleByteHandler singleHandler, + SingleByteEscaper singleEscaper, + TwoByteEscaper twoEscaper) { + // The range [begin, it) contains input that does not need to be escaped and that has not been + // written to output yet. + // The range [it end) contains remaining input to scan 'begin' is pointing to the beginning of + // the input that has not yet been written to 'escaped'. + // 'it' is pointing to the beginning of the unicode code point we're currently processing in the + // while-loop below. 'end' is the end of the input sequence. + auto begin = str.begin(); + auto it = str.begin(); + auto end = str.end(); + + // Writes an escaped sequence to output after flushing pending input that does not need to be + // escaped. 'it' is assumed to be at the beginning of the input sequence represented by the + // escaped data. + // 'numHandled' the number of bytes of unescaped data being written escaped in 'escapeSequence' + auto flushAndWrite = [&](size_t numHandled, StringData escapeSequence) { + // Flush range of unmodified input + buffer.append(begin, it); + begin = it + numHandled; + + // Write escaped data + buffer.append(escapeSequence.rawData(), escapeSequence.rawData() + escapeSequence.size()); + }; + + auto isValidCodePoint = [&](auto pos, int len) { + return std::distance(pos, end) >= len && + std::all_of(pos + 1, pos + len, [](uint8_t c) { return (c >> 6) == 0b10; }); + }; + + // Helper function to write a valid one byte UTF-8 sequence from the input stream + auto writeValid1Byte = [&]() { singleHandler(flushAndWrite, *it); }; + + // Helper function to write a valid two byte UTF-8 sequence from the input stream + auto writeValid2Byte = [&]() { + uint8_t first = *it; + uint8_t second = *(it + 1); + + if (MONGO_unlikely(first == 0xc2 && second >= 0x80 && second < 0xa0)) { + twoEscaper(flushAndWrite, first, second); + } + }; + + // Helper function to write an invalid UTF-8 sequence from the input stream + // Will try and write up to num bytes but bail if we reach the end of the input. + // Updates the position of 'it'. + auto writeInvalid = [&](uint8_t c) { singleEscaper(flushAndWrite, c); }; + + + while (it != end) { + uint8_t c = *it; + bool bit7 = (c >> 7) & 1; + if (MONGO_likely(!bit7)) { + writeValid1Byte(); + ++it; + continue; + } + + bool bit6 = (c >> 6) & 1; + if (MONGO_unlikely(!bit6)) { + writeInvalid(c); + ++it; + continue; + } + + bool bit5 = (c >> 5) & 1; + if (!bit5) { + // 2 byte sequence + if (MONGO_likely(isValidCodePoint(it, 2))) { + writeValid2Byte(); + it += 2; + } else { + writeInvalid(c); + ++it; + } + + continue; + } + + bool bit4 = (c >> 4) & 1; + if (!bit4) { + // 3 byte sequence + if (MONGO_likely(isValidCodePoint(it, 3))) { + it += 3; + } else { + writeInvalid(c); + ++it; + } + continue; + } + + bool bit3 = (c >> 3) & 1; + if (bit3) { + writeInvalid(c); + ++it; + continue; + } + + // 4 byte sequence + if (MONGO_likely(isValidCodePoint(it, 4))) { + it += 4; + } else { + writeInvalid(c); + ++it; + } + } + // Write last block + buffer.append(begin, it); +} +} // namespace +void escapeForText(fmt::memory_buffer& buffer, StringData str) { + auto singleByteHandler = [](const auto& writer, uint8_t unescaped) { + switch (unescaped) { + case '\0': + writer(1, "\\0"_sd); + break; + case 0x01: + writer(1, "\\x01"_sd); + break; + case 0x02: + writer(1, "\\x02"_sd); + break; + case 0x03: + writer(1, "\\x03"_sd); + break; + case 0x04: + writer(1, "\\x04"_sd); + break; + case 0x05: + writer(1, "\\x05"_sd); + break; + case 0x06: + writer(1, "\\x06"_sd); + break; + case 0x07: + writer(1, "\\a"_sd); + break; + case 0x08: + writer(1, "\\b"_sd); + break; + case 0x09: + writer(1, "\\t"_sd); + break; + case 0x0a: + writer(1, "\\n"_sd); + break; + case 0x0b: + writer(1, "\\v"_sd); + break; + case 0x0c: + writer(1, "\\f"_sd); + break; + case 0x0d: + writer(1, "\\r"_sd); + break; + case 0x0e: + writer(1, "\\x0e"_sd); + break; + case 0x0f: + writer(1, "\\x0f"_sd); + break; + case 0x10: + writer(1, "\\x10"_sd); + break; + case 0x11: + writer(1, "\\x11"_sd); + break; + case 0x12: + writer(1, "\\x12"_sd); + break; + case 0x13: + writer(1, "\\x13"_sd); + break; + case 0x14: + writer(1, "\\x14"_sd); + break; + case 0x15: + writer(1, "\\x15"_sd); + break; + case 0x16: + writer(1, "\\x16"_sd); + break; + case 0x17: + writer(1, "\\x17"_sd); + break; + case 0x18: + writer(1, "\\x18"_sd); + break; + case 0x19: + writer(1, "\\x19"_sd); + break; + case 0x1a: + writer(1, "\\x1a"_sd); + break; + case 0x1b: + writer(1, "\\e"_sd); + break; + case 0x1c: + writer(1, "\\x1c"_sd); + break; + case 0x1d: + writer(1, "\\x1d"_sd); + break; + case 0x1e: + writer(1, "\\x1e"_sd); + break; + case 0x1f: + writer(1, "\\x1f"_sd); + break; + case '\\': + writer(1, "\\\\"_sd); + break; + case 0x7f: + writer(1, "\\x7f"_sd); + break; + default: + break; + } + }; + auto twoByteHandler = [](const auto& writer, uint8_t unescaped) { + std::array<char, 4> buffer = { + '\\', 'x', kHexChar[unescaped >> 4], kHexChar[unescaped & 0xf]}; + writer(1, StringData(buffer.data(), buffer.size())); + }; + auto twoByteEscaper = [](const auto& writer, uint8_t first, uint8_t second) { + std::array<char, 8> buffer = {'\\', + 'x', + kHexChar[first >> 4], + kHexChar[first & 0xf], + '\\', + 'x', + kHexChar[second >> 4], + kHexChar[second & 0xf]}; + writer(2, StringData(buffer.data(), buffer.size())); + }; + return escape(buffer, + str, + std::move(singleByteHandler), + std::move(twoByteHandler), + std::move(twoByteEscaper)); +} + +std::string escapeForText(StringData str) { + fmt::memory_buffer buffer; + escapeForText(buffer, str); + return fmt::to_string(buffer); +} + +void escapeForJSON(fmt::memory_buffer& buffer, StringData str) { + auto singleByteHandler = [](const auto& writer, uint8_t unescaped) { + switch (unescaped) { + case '\0': + writer(1, "\\u0000"_sd); + break; + case 0x01: + writer(1, "\\u0001"_sd); + break; + case 0x02: + writer(1, "\\u0002"_sd); + break; + case 0x03: + writer(1, "\\u0003"_sd); + break; + case 0x04: + writer(1, "\\u0004"_sd); + break; + case 0x05: + writer(1, "\\u0005"_sd); + break; + case 0x06: + writer(1, "\\u0006"_sd); + break; + case 0x07: + writer(1, "\\u0007"_sd); + break; + case 0x08: + writer(1, "\\b"_sd); + break; + case 0x09: + writer(1, "\\t"_sd); + break; + case 0x0a: + writer(1, "\\n"_sd); + break; + case 0x0b: + writer(1, "\\u000b"_sd); + break; + case 0x0c: + writer(1, "\\f"_sd); + break; + case 0x0d: + writer(1, "\\r"_sd); + break; + case 0x0e: + writer(1, "\\u000e"_sd); + break; + case 0x0f: + writer(1, "\\u000f"_sd); + break; + case 0x10: + writer(1, "\\u0010"_sd); + break; + case 0x11: + writer(1, "\\u0011"_sd); + break; + case 0x12: + writer(1, "\\u0012"_sd); + break; + case 0x13: + writer(1, "\\u0013"_sd); + break; + case 0x14: + writer(1, "\\u0014"_sd); + break; + case 0x15: + writer(1, "\\u0015"_sd); + break; + case 0x16: + writer(1, "\\u0016"_sd); + break; + case 0x17: + writer(1, "\\u0017"_sd); + break; + case 0x18: + writer(1, "\\u0018"_sd); + break; + case 0x19: + writer(1, "\\u0019"_sd); + break; + case 0x1a: + writer(1, "\\u001a"_sd); + break; + case 0x1b: + writer(1, "\\u001b"_sd); + break; + case 0x1c: + writer(1, "\\u000c"_sd); + break; + case 0x1d: + writer(1, "\\u001d"_sd); + break; + case 0x1e: + writer(1, "\\u001e"_sd); + break; + case 0x1f: + writer(1, "\\u001f"_sd); + break; + case '\\': + writer(1, "\\\\"_sd); + break; + case '\"': + writer(1, "\\\""_sd); + break; + case 0x7f: + writer(1, "\\u007f"_sd); + break; + default: + break; + } + }; + auto twoByteHandler = [](const auto& writer, uint8_t unescaped) { + std::array<char, 6> buffer = { + '\\', 'u', '0', '0', kHexChar[unescaped >> 4], kHexChar[unescaped & 0xf]}; + writer(1, StringData(buffer.data(), buffer.size())); + }; + auto twoByteEscaper = [](const auto& writer, uint8_t first, uint8_t second) { + std::array<char, 6> buffer = {'\\', + 'u', + kHexChar[first >> 4], + kHexChar[first & 0xf], + kHexChar[second >> 4], + kHexChar[second & 0xf]}; + writer(2, StringData(buffer.data(), buffer.size())); + }; + return escape(buffer, + str, + std::move(singleByteHandler), + std::move(twoByteHandler), + std::move(twoByteEscaper)); +} +std::string escapeForJSON(StringData str) { + fmt::memory_buffer buffer; + escapeForJSON(buffer, str); + return fmt::to_string(buffer); +} +} // namespace mongo::str diff --git a/src/mongo/logv2/string_escape.h b/src/mongo/util/str_escape.h index 5cf392e32b6..47fe3d30060 100644 --- a/src/mongo/logv2/string_escape.h +++ b/src/mongo/util/str_escape.h @@ -31,9 +31,13 @@ #include "mongo/base/string_data.h" +#include <fmt/format.h> #include <string> -namespace mongo::logv2 { +namespace mongo::str { +void escapeForText(fmt::memory_buffer& buffer, StringData str); std::string escapeForText(StringData str); + +void escapeForJSON(fmt::memory_buffer& buffer, StringData str); std::string escapeForJSON(StringData str); -} // namespace mongo::logv2 +} // namespace mongo::str diff --git a/src/mongo/util/tla_plus_trace.cpp b/src/mongo/util/tla_plus_trace.cpp index d634a171a8f..9ab08e4a264 100644 --- a/src/mongo/util/tla_plus_trace.cpp +++ b/src/mongo/util/tla_plus_trace.cpp @@ -58,6 +58,6 @@ void logTlaPlusTraceEvent(const TlaPlusTraceEvent& event) { } invariant(afterTime > beforeTime, "Clock went backwards"); - log() << event.toBSON().jsonString(); + log() << event.toBSON().jsonString(JsonStringFormat::LegacyStrict); } } // namespace mongo |