diff options
author | Jess Balint <jbalint@gmail.com> | 2022-11-15 05:07:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-15 06:30:53 +0000 |
commit | 7be344b92d3af3fd8eb4542e594e084bed394fdc (patch) | |
tree | 8fb77406d0eb74fe46854c121454ddd7327cec3e | |
parent | 148c522ffefc93dcc8227579b583691335cd4f2f (diff) | |
download | mongo-7be344b92d3af3fd8eb4542e594e084bed394fdc.tar.gz |
SERVER-71292 Redact all sensitive information in query #8731
-rw-r--r-- | src/mongo/bson/bsonobj.cpp | 41 | ||||
-rw-r--r-- | src/mongo/bson/bsonobj.h | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_telemetry.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.idl | 21 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.cpp | 113 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.h | 19 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry_util.cpp | 16 |
7 files changed, 195 insertions, 24 deletions
diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp index c462031bffd..88715dd68de 100644 --- a/src/mongo/bson/bsonobj.cpp +++ b/src/mongo/bson/bsonobj.cpp @@ -145,41 +145,56 @@ BSONObj BSONObj::getOwned(const BSONObj& obj) { return obj.getOwned(); } -BSONObj BSONObj::redact(bool onlyEncryptedFields) const { +BSONObj BSONObj::redact(bool onlyEncryptedFields, + std::function<std::string(const BSONElement&)> fieldNameRedactor) const { _validateUnownedSize(objsize()); // Helper to get an "internal function" to be able to do recursion struct redactor { - void appendRedactedElem(BSONObjBuilder& builder, const BSONElement& e, bool appendMask) { + void appendRedactedElem(BSONObjBuilder& builder, + const StringData& fieldNameString, + bool appendMask) { if (appendMask) { - builder.append(e.fieldNameStringData(), "###"_sd); + builder.append(fieldNameString, "###"_sd); } else { - builder.appendNull(e.fieldNameStringData()); + builder.appendNull(fieldNameString); } } void operator()(BSONObjBuilder& builder, const BSONObj& obj, bool appendMask, - bool onlyEncryptedFields) { + bool onlyEncryptedFields, + std::function<std::string(const BSONElement&)> fieldNameRedactor) { for (BSONElement e : obj) { + StringData fieldNameString; + // Temporarily allocated string that must live long enough to be copied by builder. + std::string tempString; + if (!fieldNameRedactor) { + fieldNameString = e.fieldNameStringData(); + } else { + tempString = fieldNameRedactor(e); + fieldNameString = {tempString}; + } if (e.type() == Object) { - BSONObjBuilder subBuilder = builder.subobjStart(e.fieldNameStringData()); - operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields); + BSONObjBuilder subBuilder = builder.subobjStart(fieldNameString); + operator()( + subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor); subBuilder.done(); } else if (e.type() == Array) { - BSONObjBuilder subBuilder = builder.subarrayStart(e.fieldNameStringData()); - operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields); + BSONObjBuilder subBuilder = builder.subarrayStart(fieldNameString); + operator()( + subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor); subBuilder.done(); } else { if (onlyEncryptedFields) { if (e.type() == BinData && e.binDataType() == BinDataType::Encrypt) { - appendRedactedElem(builder, e, appendMask); + appendRedactedElem(builder, fieldNameString, appendMask); } else { builder.append(e); } } else { - appendRedactedElem(builder, e, appendMask); + appendRedactedElem(builder, fieldNameString, appendMask); } } } @@ -188,7 +203,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const { try { BSONObjBuilder builder; - redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields); + redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields, fieldNameRedactor); return builder.obj(); } catch (const ExceptionFor<ErrorCodes::BSONObjectTooLarge>&) { } @@ -198,7 +213,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const { // we use BSONType::jstNull, which ensures the redacted object will not be larger than the // original. BSONObjBuilder builder; - redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields); + redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields, fieldNameRedactor); return builder.obj(); } diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h index a0088c8b70d..83453bcc9e1 100644 --- a/src/mongo/bson/bsonobj.h +++ b/src/mongo/bson/bsonobj.h @@ -273,7 +273,9 @@ public: /** * @return a new full (and owned) redacted copy of the object. */ - BSONObj redact(bool onlyEncryptedFields = false) const; + BSONObj redact( + bool onlyEncryptedFields = false, + std::function<std::string(const BSONElement&)> fieldNameRedactor = nullptr) const; /** * Readable representation of a BSON object in an extended JSON-style notation. diff --git a/src/mongo/db/pipeline/document_source_telemetry.cpp b/src/mongo/db/pipeline/document_source_telemetry.cpp index 36acd596241..812261edc1d 100644 --- a/src/mongo/db/pipeline/document_source_telemetry.cpp +++ b/src/mongo/db/pipeline/document_source_telemetry.cpp @@ -101,8 +101,9 @@ void DocumentSourceTelemetry::buildTelemetryStoreIterator() { const auto partitionReadTime = Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)}; for (auto&& [key, metrics] : *partition) { - Document d{ - {{"key", key}, {"metrics", metrics.toBSON()}, {"asOf", partitionReadTime}}}; + Document d{{{"key", metrics.redactKey(key)}, + {"metrics", metrics.toBSON()}, + {"asOf", partitionReadTime}}}; _queue.push(std::move(d)); } }); diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index a4cdc297567..21e86666c2f 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -51,6 +51,16 @@ enums: # Force the Bonsai optimizer for all queries. kForceBonsai: "forceBonsai" + QueryTelemetryFieldNameRedactionStrategy: + description: "Enum for possible values of queryFieldNameRedactionStrategy." + type: string + values: + kNoRedactionStrategy: "none" + # Use the constant redaction strategy. + kConstantRedactionStrategy: "constant" + # Use a prefix of sha256 redaction strategy + kSha256RedactionStrategy: "sha256" + server_parameters: # @@ -973,6 +983,17 @@ server_parameters: validator: callback: telemetry_util::validateTelemetryStoreSize + internalQueryConfigureTelemetryFieldNameRedactionStrategy: + description: Choose between the "none" implementation which will not redact any fields names, the "constant" + implementation, which will replace all field names with a constant, and the "sha256" implementation, which + preserves the identify of a field. + set_at: [ startup, runtime ] + cpp_class: + name: QueryTelemetryControl + data: synchronized_value<QueryTelemetryFieldNameRedactionStrategyEnum> + default: + expr: QueryTelemetryFieldNameRedactionStrategyEnum::kConstantRedactionStrategy + # Note for adding additional query knobs: # diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp index 7af325183f1..7b8c2c36c89 100644 --- a/src/mongo/db/query/telemetry.cpp +++ b/src/mongo/db/query/telemetry.cpp @@ -184,7 +184,6 @@ bool shouldCollect(const ServiceContext* serviceCtx) { if (!telemetryRateLimiter(serviceCtx)->handleRequestSlidingWindow()) { return false; } - // TODO SERVER-71244: check if it's a FLE collection here (maybe pass in the request) return true; } @@ -195,8 +194,10 @@ void addToFindKey(BSONObjBuilder& builder, const StringData& fieldName, const BS serializeBSONWhenNotEmpty(value.redact(false), fieldName, &builder); } -// Call this function from inside the redact() function on every BSONElement in the BSONObj. -void throwIfEncounteringFLEPayload(BSONElement e) { +/** + * Recognize FLE payloads in a query and throw an exception if found. + */ +void throwIfEncounteringFLEPayload(const BSONElement& e) { constexpr auto safeContentLabel = "__safeContent__"_sd; constexpr auto fieldpath = "$__safeContent__"_sd; if (e.type() == BSONType::Object) { @@ -204,12 +205,12 @@ void throwIfEncounteringFLEPayload(BSONElement e) { uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting, "Encountered __safeContent__, or an $_internalFle operator, which indicate a " "rewritten FLE2 query.", - fieldname == safeContentLabel || fieldname.startsWith("$_internalFle"_sd)); + fieldname != safeContentLabel && !fieldname.startsWith("$_internalFle"_sd)); } else if (e.type() == BSONType::String) { auto val = e.valueStringData(); uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting, "Encountered $__safeContent__ fieldpath, which indicates a rewritten FLE2 query.", - val == fieldpath); + val != fieldpath); } else if (e.type() == BSONType::BinData && e.isBinData(BinDataType::Encrypt)) { int len; auto data = e.binData(len); @@ -263,8 +264,104 @@ private: TelemetryStore::Partition _partitionLock; }; +/** + * Upon reading telemetry data, we redact some keys. This is the list. See + * TelemetryMetrics::redactKey(). + */ +const stdx::unordered_set<std::string> kKeysToRedact = {"pipeline", "find"}; + +std::string sha256FieldNameHasher(const BSONElement& e) { + auto&& fieldName = e.fieldNameStringData(); + auto hash = SHA256Block::computeHash({ConstDataRange(fieldName.rawData(), fieldName.size())}); + return hash.toString().substr(0, 12); +} + +std::string constantFieldNameHasher(const BSONElement& e) { + return {"###"}; +} + +/** + * Admittedly an abuse of the BSON redaction interface, we recognize FLE payloads here and avoid + * collecting telemetry for the query. + */ +std::string fleSafeFieldNameRedactor(const BSONElement& e) { + throwIfEncounteringFLEPayload(e); + // Ideally we would change interface to avoid copying here. + return e.fieldNameStringData().toString(); +} + } // namespace +const BSONObj& TelemetryMetrics::redactKey(const BSONObj& key) const { + if (_redactedKey) { + return *_redactedKey; + } + + auto redactionStrategy = ServerParameterSet::getNodeParameterSet() + ->get<QueryTelemetryControl>( + "internalQueryConfigureTelemetryFieldNameRedactionStrategy") + ->_data.get(); + + // The telemetry key is of the following form: + // { "<CMD_TYPE>": {...}, "namespace": "...", "applicationName": "...", ... } + // + // The part of the key we need to redact is the object in the <CMD_TYPE> element. In the case of + // an aggregate() command, it will look something like: + // > "pipeline" : [ { "$telemetry" : {} }, + // { "$addFields" : { "x" : { "$someExpr" {} } } } ], + // We should preserve the top-level stage names in the pipeline but redact all field names of + // children. + // + // The find-specific key will look like so: + // > "find" : { "find" : "###", "filter" : { "_id" : { "$ne" : "###" } } }, + // Again, we should preserve the top-level keys and redact all field names of children. + BSONObjBuilder redacted; + for (BSONElement e : key) { + if ((e.type() == Object || e.type() == Array) && + kKeysToRedact.count(e.fieldNameStringData().toString()) == 1) { + auto redactor = [&](BSONObjBuilder subObj, const BSONObj& obj) { + for (BSONElement e2 : obj) { + if (e2.type() == Object) { + switch (redactionStrategy) { + case QueryTelemetryFieldNameRedactionStrategyEnum:: + kSha256RedactionStrategy: + subObj.append(e2.fieldNameStringData(), + e2.Obj().redact(false, sha256FieldNameHasher)); + break; + case QueryTelemetryFieldNameRedactionStrategyEnum:: + kConstantRedactionStrategy: + subObj.append(e2.fieldNameStringData(), + e2.Obj().redact(false, constantFieldNameHasher)); + break; + case QueryTelemetryFieldNameRedactionStrategyEnum::kNoRedactionStrategy: + subObj.append(e2.fieldNameStringData(), e2.Obj().redact(false)); + break; + } + } else { + subObj.append(e2); + } + } + subObj.done(); + }; + + // Now we're inside the <CMD_TYPE>:{} entry and want to preserve the top-level field + // names. If it's a [pipeline] array, we redact each element in isolation. + if (e.type() == Object) { + redactor(redacted.subobjStart(e.fieldNameStringData()), e.Obj()); + } else { + BSONObjBuilder subArr = redacted.subarrayStart(e.fieldNameStringData()); + for (BSONElement stage : e.Obj()) { + redactor(subArr.subobjStart(""), stage.Obj()); + } + } + } else { + redacted.append(e); + } + } + _redactedKey = redacted.obj(); + return *_redactedKey; +} + void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx) { if (request.getEncryptionInformation()) { return; @@ -285,7 +382,8 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext for (auto&& stage : request.getPipeline()) { auto el = stage.firstElement(); BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd); - stageBuilder.append(el.fieldNameStringData(), el.Obj().redact(false)); + stageBuilder.append(el.fieldNameStringData(), + el.Obj().redact(false, fleSafeFieldNameRedactor)); stageBuilder.done(); } pipelineBuilder.done(); @@ -340,7 +438,8 @@ void registerFindRequest(const FindCommandRequest& request, auto findBson = request.toBSON({}); for (auto&& findEntry : findBson) { if (findEntry.isABSONObj()) { - telemetryKey.append(findEntry.fieldNameStringData(), findEntry.Obj().redact(false)); + telemetryKey.append(findEntry.fieldNameStringData(), + findEntry.Obj().redact(false, fleSafeFieldNameRedactor)); } else { telemetryKey.append(findEntry.fieldNameStringData(), "###"_sd); } diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h index 3a2f41a7e4e..0e225d2f17e 100644 --- a/src/mongo/db/query/telemetry.h +++ b/src/mongo/db/query/telemetry.h @@ -107,6 +107,11 @@ public: } /** + * Redact a given telemetry key. + */ + const BSONObj& redactKey(const BSONObj& key) const; + + /** * Timestamp for when this query shape was added to the store. Set on construction. */ const Timestamp firstSeenTimestamp; @@ -130,6 +135,12 @@ public: AggregatedMetric docsScanned; AggregatedMetric keysScanned; + +private: + /** + * We cache the redacted key the first time it's computed. + */ + mutable boost::optional<BSONObj> _redactedKey; }; struct TelemetryPartitioner { @@ -139,9 +150,15 @@ struct TelemetryPartitioner { } }; +/** + * Average key size used to pad the metrics size. We store a cached redaction of the key in the + * TelemetryMetrics object. + */ +const size_t kAverageKeySize = 100; + struct ComputeEntrySize { size_t operator()(const TelemetryMetrics& entry) { - return sizeof(TelemetryMetrics); + return sizeof(TelemetryMetrics) + kAverageKeySize; } }; diff --git a/src/mongo/db/query/telemetry_util.cpp b/src/mongo/db/query/telemetry_util.cpp index dcce8d7452b..fea9ad6587b 100644 --- a/src/mongo/db/query/telemetry_util.cpp +++ b/src/mongo/db/query/telemetry_util.cpp @@ -32,6 +32,7 @@ #include "mongo/base/status.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/query/partitioned_cache.h" +#include "mongo/db/query/query_knobs_gen.h" #include "mongo/db/query/util/memory_util.h" #include "mongo/db/service_context.h" #include "mongo/logv2/log.h" @@ -82,3 +83,18 @@ const Decorable<ServiceContext>::Decoration<std::unique_ptr<OnParamChangeUpdater telemetryStoreOnParamChangeUpdater = ServiceContext::declareDecoration<std::unique_ptr<OnParamChangeUpdater>>(); } // namespace mongo::telemetry_util + +namespace mongo { +void QueryTelemetryControl::append(OperationContext*, + BSONObjBuilder* b, + StringData name, + const boost::optional<TenantId>&) { + *b << name << QueryTelemetryFieldNameRedactionStrategy_serializer(_data.get()); +} + +Status QueryTelemetryControl::setFromString(StringData value, const boost::optional<TenantId>&) { + _data = QueryTelemetryFieldNameRedactionStrategy_parse( + IDLParserContext("internalQueryConfigureTelemetryFieldNameRedactionStrategy"), value); + return Status::OK(); +} +} // namespace mongo |