summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJess Balint <jbalint@gmail.com>2022-11-15 05:07:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-15 06:30:53 +0000
commit7be344b92d3af3fd8eb4542e594e084bed394fdc (patch)
tree8fb77406d0eb74fe46854c121454ddd7327cec3e
parent148c522ffefc93dcc8227579b583691335cd4f2f (diff)
downloadmongo-7be344b92d3af3fd8eb4542e594e084bed394fdc.tar.gz
SERVER-71292 Redact all sensitive information in query #8731
-rw-r--r--src/mongo/bson/bsonobj.cpp41
-rw-r--r--src/mongo/bson/bsonobj.h4
-rw-r--r--src/mongo/db/pipeline/document_source_telemetry.cpp5
-rw-r--r--src/mongo/db/query/query_knobs.idl21
-rw-r--r--src/mongo/db/query/telemetry.cpp113
-rw-r--r--src/mongo/db/query/telemetry.h19
-rw-r--r--src/mongo/db/query/telemetry_util.cpp16
7 files changed, 195 insertions, 24 deletions
diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp
index c462031bffd..88715dd68de 100644
--- a/src/mongo/bson/bsonobj.cpp
+++ b/src/mongo/bson/bsonobj.cpp
@@ -145,41 +145,56 @@ BSONObj BSONObj::getOwned(const BSONObj& obj) {
return obj.getOwned();
}
-BSONObj BSONObj::redact(bool onlyEncryptedFields) const {
+BSONObj BSONObj::redact(bool onlyEncryptedFields,
+ std::function<std::string(const BSONElement&)> fieldNameRedactor) const {
_validateUnownedSize(objsize());
// Helper to get an "internal function" to be able to do recursion
struct redactor {
- void appendRedactedElem(BSONObjBuilder& builder, const BSONElement& e, bool appendMask) {
+ void appendRedactedElem(BSONObjBuilder& builder,
+ const StringData& fieldNameString,
+ bool appendMask) {
if (appendMask) {
- builder.append(e.fieldNameStringData(), "###"_sd);
+ builder.append(fieldNameString, "###"_sd);
} else {
- builder.appendNull(e.fieldNameStringData());
+ builder.appendNull(fieldNameString);
}
}
void operator()(BSONObjBuilder& builder,
const BSONObj& obj,
bool appendMask,
- bool onlyEncryptedFields) {
+ bool onlyEncryptedFields,
+ std::function<std::string(const BSONElement&)> fieldNameRedactor) {
for (BSONElement e : obj) {
+ StringData fieldNameString;
+ // Temporarily allocated string that must live long enough to be copied by builder.
+ std::string tempString;
+ if (!fieldNameRedactor) {
+ fieldNameString = e.fieldNameStringData();
+ } else {
+ tempString = fieldNameRedactor(e);
+ fieldNameString = {tempString};
+ }
if (e.type() == Object) {
- BSONObjBuilder subBuilder = builder.subobjStart(e.fieldNameStringData());
- operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields);
+ BSONObjBuilder subBuilder = builder.subobjStart(fieldNameString);
+ operator()(
+ subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor);
subBuilder.done();
} else if (e.type() == Array) {
- BSONObjBuilder subBuilder = builder.subarrayStart(e.fieldNameStringData());
- operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields);
+ BSONObjBuilder subBuilder = builder.subarrayStart(fieldNameString);
+ operator()(
+ subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor);
subBuilder.done();
} else {
if (onlyEncryptedFields) {
if (e.type() == BinData && e.binDataType() == BinDataType::Encrypt) {
- appendRedactedElem(builder, e, appendMask);
+ appendRedactedElem(builder, fieldNameString, appendMask);
} else {
builder.append(e);
}
} else {
- appendRedactedElem(builder, e, appendMask);
+ appendRedactedElem(builder, fieldNameString, appendMask);
}
}
}
@@ -188,7 +203,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const {
try {
BSONObjBuilder builder;
- redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields);
+ redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields, fieldNameRedactor);
return builder.obj();
} catch (const ExceptionFor<ErrorCodes::BSONObjectTooLarge>&) {
}
@@ -198,7 +213,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const {
// we use BSONType::jstNull, which ensures the redacted object will not be larger than the
// original.
BSONObjBuilder builder;
- redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields);
+ redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields, fieldNameRedactor);
return builder.obj();
}
diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h
index a0088c8b70d..83453bcc9e1 100644
--- a/src/mongo/bson/bsonobj.h
+++ b/src/mongo/bson/bsonobj.h
@@ -273,7 +273,9 @@ public:
/**
* @return a new full (and owned) redacted copy of the object.
*/
- BSONObj redact(bool onlyEncryptedFields = false) const;
+ BSONObj redact(
+ bool onlyEncryptedFields = false,
+ std::function<std::string(const BSONElement&)> fieldNameRedactor = nullptr) const;
/**
* Readable representation of a BSON object in an extended JSON-style notation.
diff --git a/src/mongo/db/pipeline/document_source_telemetry.cpp b/src/mongo/db/pipeline/document_source_telemetry.cpp
index 36acd596241..812261edc1d 100644
--- a/src/mongo/db/pipeline/document_source_telemetry.cpp
+++ b/src/mongo/db/pipeline/document_source_telemetry.cpp
@@ -101,8 +101,9 @@ void DocumentSourceTelemetry::buildTelemetryStoreIterator() {
const auto partitionReadTime =
Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)};
for (auto&& [key, metrics] : *partition) {
- Document d{
- {{"key", key}, {"metrics", metrics.toBSON()}, {"asOf", partitionReadTime}}};
+ Document d{{{"key", metrics.redactKey(key)},
+ {"metrics", metrics.toBSON()},
+ {"asOf", partitionReadTime}}};
_queue.push(std::move(d));
}
});
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index a4cdc297567..21e86666c2f 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -51,6 +51,16 @@ enums:
# Force the Bonsai optimizer for all queries.
kForceBonsai: "forceBonsai"
+ QueryTelemetryFieldNameRedactionStrategy:
+ description: "Enum for possible values of queryFieldNameRedactionStrategy."
+ type: string
+ values:
+ kNoRedactionStrategy: "none"
+ # Use the constant redaction strategy.
+ kConstantRedactionStrategy: "constant"
+ # Use a prefix of sha256 redaction strategy
+ kSha256RedactionStrategy: "sha256"
+
server_parameters:
#
@@ -973,6 +983,17 @@ server_parameters:
validator:
callback: telemetry_util::validateTelemetryStoreSize
+ internalQueryConfigureTelemetryFieldNameRedactionStrategy:
+ description: Choose between the "none" implementation which will not redact any fields names, the "constant"
+ implementation, which will replace all field names with a constant, and the "sha256" implementation, which
+ preserves the identify of a field.
+ set_at: [ startup, runtime ]
+ cpp_class:
+ name: QueryTelemetryControl
+ data: synchronized_value<QueryTelemetryFieldNameRedactionStrategyEnum>
+ default:
+ expr: QueryTelemetryFieldNameRedactionStrategyEnum::kConstantRedactionStrategy
+
# Note for adding additional query knobs:
#
diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp
index 7af325183f1..7b8c2c36c89 100644
--- a/src/mongo/db/query/telemetry.cpp
+++ b/src/mongo/db/query/telemetry.cpp
@@ -184,7 +184,6 @@ bool shouldCollect(const ServiceContext* serviceCtx) {
if (!telemetryRateLimiter(serviceCtx)->handleRequestSlidingWindow()) {
return false;
}
- // TODO SERVER-71244: check if it's a FLE collection here (maybe pass in the request)
return true;
}
@@ -195,8 +194,10 @@ void addToFindKey(BSONObjBuilder& builder, const StringData& fieldName, const BS
serializeBSONWhenNotEmpty(value.redact(false), fieldName, &builder);
}
-// Call this function from inside the redact() function on every BSONElement in the BSONObj.
-void throwIfEncounteringFLEPayload(BSONElement e) {
+/**
+ * Recognize FLE payloads in a query and throw an exception if found.
+ */
+void throwIfEncounteringFLEPayload(const BSONElement& e) {
constexpr auto safeContentLabel = "__safeContent__"_sd;
constexpr auto fieldpath = "$__safeContent__"_sd;
if (e.type() == BSONType::Object) {
@@ -204,12 +205,12 @@ void throwIfEncounteringFLEPayload(BSONElement e) {
uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting,
"Encountered __safeContent__, or an $_internalFle operator, which indicate a "
"rewritten FLE2 query.",
- fieldname == safeContentLabel || fieldname.startsWith("$_internalFle"_sd));
+ fieldname != safeContentLabel && !fieldname.startsWith("$_internalFle"_sd));
} else if (e.type() == BSONType::String) {
auto val = e.valueStringData();
uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting,
"Encountered $__safeContent__ fieldpath, which indicates a rewritten FLE2 query.",
- val == fieldpath);
+ val != fieldpath);
} else if (e.type() == BSONType::BinData && e.isBinData(BinDataType::Encrypt)) {
int len;
auto data = e.binData(len);
@@ -263,8 +264,104 @@ private:
TelemetryStore::Partition _partitionLock;
};
+/**
+ * Upon reading telemetry data, we redact some keys. This is the list. See
+ * TelemetryMetrics::redactKey().
+ */
+const stdx::unordered_set<std::string> kKeysToRedact = {"pipeline", "find"};
+
+std::string sha256FieldNameHasher(const BSONElement& e) {
+ auto&& fieldName = e.fieldNameStringData();
+ auto hash = SHA256Block::computeHash({ConstDataRange(fieldName.rawData(), fieldName.size())});
+ return hash.toString().substr(0, 12);
+}
+
+std::string constantFieldNameHasher(const BSONElement& e) {
+ return {"###"};
+}
+
+/**
+ * Admittedly an abuse of the BSON redaction interface, we recognize FLE payloads here and avoid
+ * collecting telemetry for the query.
+ */
+std::string fleSafeFieldNameRedactor(const BSONElement& e) {
+ throwIfEncounteringFLEPayload(e);
+ // Ideally we would change interface to avoid copying here.
+ return e.fieldNameStringData().toString();
+}
+
} // namespace
+const BSONObj& TelemetryMetrics::redactKey(const BSONObj& key) const {
+ if (_redactedKey) {
+ return *_redactedKey;
+ }
+
+ auto redactionStrategy = ServerParameterSet::getNodeParameterSet()
+ ->get<QueryTelemetryControl>(
+ "internalQueryConfigureTelemetryFieldNameRedactionStrategy")
+ ->_data.get();
+
+ // The telemetry key is of the following form:
+ // { "<CMD_TYPE>": {...}, "namespace": "...", "applicationName": "...", ... }
+ //
+ // The part of the key we need to redact is the object in the <CMD_TYPE> element. In the case of
+ // an aggregate() command, it will look something like:
+ // > "pipeline" : [ { "$telemetry" : {} },
+ // { "$addFields" : { "x" : { "$someExpr" {} } } } ],
+ // We should preserve the top-level stage names in the pipeline but redact all field names of
+ // children.
+ //
+ // The find-specific key will look like so:
+ // > "find" : { "find" : "###", "filter" : { "_id" : { "$ne" : "###" } } },
+ // Again, we should preserve the top-level keys and redact all field names of children.
+ BSONObjBuilder redacted;
+ for (BSONElement e : key) {
+ if ((e.type() == Object || e.type() == Array) &&
+ kKeysToRedact.count(e.fieldNameStringData().toString()) == 1) {
+ auto redactor = [&](BSONObjBuilder subObj, const BSONObj& obj) {
+ for (BSONElement e2 : obj) {
+ if (e2.type() == Object) {
+ switch (redactionStrategy) {
+ case QueryTelemetryFieldNameRedactionStrategyEnum::
+ kSha256RedactionStrategy:
+ subObj.append(e2.fieldNameStringData(),
+ e2.Obj().redact(false, sha256FieldNameHasher));
+ break;
+ case QueryTelemetryFieldNameRedactionStrategyEnum::
+ kConstantRedactionStrategy:
+ subObj.append(e2.fieldNameStringData(),
+ e2.Obj().redact(false, constantFieldNameHasher));
+ break;
+ case QueryTelemetryFieldNameRedactionStrategyEnum::kNoRedactionStrategy:
+ subObj.append(e2.fieldNameStringData(), e2.Obj().redact(false));
+ break;
+ }
+ } else {
+ subObj.append(e2);
+ }
+ }
+ subObj.done();
+ };
+
+ // Now we're inside the <CMD_TYPE>:{} entry and want to preserve the top-level field
+ // names. If it's a [pipeline] array, we redact each element in isolation.
+ if (e.type() == Object) {
+ redactor(redacted.subobjStart(e.fieldNameStringData()), e.Obj());
+ } else {
+ BSONObjBuilder subArr = redacted.subarrayStart(e.fieldNameStringData());
+ for (BSONElement stage : e.Obj()) {
+ redactor(subArr.subobjStart(""), stage.Obj());
+ }
+ }
+ } else {
+ redacted.append(e);
+ }
+ }
+ _redactedKey = redacted.obj();
+ return *_redactedKey;
+}
+
void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx) {
if (request.getEncryptionInformation()) {
return;
@@ -285,7 +382,8 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext
for (auto&& stage : request.getPipeline()) {
auto el = stage.firstElement();
BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd);
- stageBuilder.append(el.fieldNameStringData(), el.Obj().redact(false));
+ stageBuilder.append(el.fieldNameStringData(),
+ el.Obj().redact(false, fleSafeFieldNameRedactor));
stageBuilder.done();
}
pipelineBuilder.done();
@@ -340,7 +438,8 @@ void registerFindRequest(const FindCommandRequest& request,
auto findBson = request.toBSON({});
for (auto&& findEntry : findBson) {
if (findEntry.isABSONObj()) {
- telemetryKey.append(findEntry.fieldNameStringData(), findEntry.Obj().redact(false));
+ telemetryKey.append(findEntry.fieldNameStringData(),
+ findEntry.Obj().redact(false, fleSafeFieldNameRedactor));
} else {
telemetryKey.append(findEntry.fieldNameStringData(), "###"_sd);
}
diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h
index 3a2f41a7e4e..0e225d2f17e 100644
--- a/src/mongo/db/query/telemetry.h
+++ b/src/mongo/db/query/telemetry.h
@@ -107,6 +107,11 @@ public:
}
/**
+ * Redact a given telemetry key.
+ */
+ const BSONObj& redactKey(const BSONObj& key) const;
+
+ /**
* Timestamp for when this query shape was added to the store. Set on construction.
*/
const Timestamp firstSeenTimestamp;
@@ -130,6 +135,12 @@ public:
AggregatedMetric docsScanned;
AggregatedMetric keysScanned;
+
+private:
+ /**
+ * We cache the redacted key the first time it's computed.
+ */
+ mutable boost::optional<BSONObj> _redactedKey;
};
struct TelemetryPartitioner {
@@ -139,9 +150,15 @@ struct TelemetryPartitioner {
}
};
+/**
+ * Average key size used to pad the metrics size. We store a cached redaction of the key in the
+ * TelemetryMetrics object.
+ */
+const size_t kAverageKeySize = 100;
+
struct ComputeEntrySize {
size_t operator()(const TelemetryMetrics& entry) {
- return sizeof(TelemetryMetrics);
+ return sizeof(TelemetryMetrics) + kAverageKeySize;
}
};
diff --git a/src/mongo/db/query/telemetry_util.cpp b/src/mongo/db/query/telemetry_util.cpp
index dcce8d7452b..fea9ad6587b 100644
--- a/src/mongo/db/query/telemetry_util.cpp
+++ b/src/mongo/db/query/telemetry_util.cpp
@@ -32,6 +32,7 @@
#include "mongo/base/status.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/query/partitioned_cache.h"
+#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/query/util/memory_util.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
@@ -82,3 +83,18 @@ const Decorable<ServiceContext>::Decoration<std::unique_ptr<OnParamChangeUpdater
telemetryStoreOnParamChangeUpdater =
ServiceContext::declareDecoration<std::unique_ptr<OnParamChangeUpdater>>();
} // namespace mongo::telemetry_util
+
+namespace mongo {
+void QueryTelemetryControl::append(OperationContext*,
+ BSONObjBuilder* b,
+ StringData name,
+ const boost::optional<TenantId>&) {
+ *b << name << QueryTelemetryFieldNameRedactionStrategy_serializer(_data.get());
+}
+
+Status QueryTelemetryControl::setFromString(StringData value, const boost::optional<TenantId>&) {
+ _data = QueryTelemetryFieldNameRedactionStrategy_parse(
+ IDLParserContext("internalQueryConfigureTelemetryFieldNameRedactionStrategy"), value);
+ return Status::OK();
+}
+} // namespace mongo