diff options
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/find_request_shapifier.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/query/request_shapifier.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/serialization_options.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.cpp | 65 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.h | 43 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry_store_test.cpp | 79 |
6 files changed, 117 insertions, 91 deletions
diff --git a/src/mongo/db/query/find_request_shapifier.cpp b/src/mongo/db/query/find_request_shapifier.cpp index fdb96a6ad19..8002a152a13 100644 --- a/src/mongo/db/query/find_request_shapifier.cpp +++ b/src/mongo/db/query/find_request_shapifier.cpp @@ -40,11 +40,6 @@ void addNonShapeObjCmdLiterals(BSONObjBuilder* bob, const FindCommandRequest& findCommand, const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx) { - - if (const auto& comment = expCtx->opCtx->getComment()) { - opts.appendLiteral(bob, "comment", *comment); - } - if (auto noCursorTimeout = findCommand.getNoCursorTimeout()) { // Capture whether noCursorTimeout was specified in the query, do not distinguish between // true or false. @@ -95,9 +90,11 @@ BSONObj FindRequestShapifier::makeTelemetryKey( _request.getAllowPartialResults().value_or(false)); } - // Fields for literal redaction. Adds comment, batchSize, maxTimeMS, and noCursorTimeOut. + // Fields for literal redaction. Adds batchSize, maxTimeMS, and noCursorTimeOut. addNonShapeObjCmdLiterals(&bob, _request, opts, expCtx); - + if (_comment) { + opts.appendLiteral(&bob, "comment", *_comment); + } if (_applicationName.has_value()) { bob.append("applicationName", _applicationName.value()); } diff --git a/src/mongo/db/query/request_shapifier.h b/src/mongo/db/query/request_shapifier.h index 6f3d79f61be..1bae8f913f9 100644 --- a/src/mongo/db/query/request_shapifier.h +++ b/src/mongo/db/query/request_shapifier.h @@ -67,8 +67,16 @@ protected: _applicationName = metadata->getApplicationName().toString(); } } + if (const auto& comment = opCtx->getComment()) { + BSONObjBuilder commentBuilder; + commentBuilder.append(*comment); + _commentObj = commentBuilder.obj(); + _comment = _commentObj.firstElement(); + } } boost::optional<std::string> _applicationName; + BSONObj _commentObj; + boost::optional<BSONElement> _comment = boost::none; }; } // namespace mongo::telemetry diff --git a/src/mongo/db/query/serialization_options.h b/src/mongo/db/query/serialization_options.h index 3b2159585c4..22e376ee4e2 100644 --- a/src/mongo/db/query/serialization_options.h +++ b/src/mongo/db/query/serialization_options.h @@ -95,6 +95,8 @@ struct SerializationOptions { applyHmacToIdentifiers(fieldNamesHmacPolicy_), identifierHmacPolicy(fieldNamesHmacPolicy_) {} + SerializationOptions(LiteralSerializationPolicy policy) : literalPolicy(policy) {} + // Helper function for removing identifiable information (like collection/db names). // Note: serializeFieldPath/serializeFieldPathFromString should be used for field // names. diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp index 10da833147e..af17da7af02 100644 --- a/src/mongo/db/query/telemetry.cpp +++ b/src/mongo/db/query/telemetry.cpp @@ -46,6 +46,7 @@ #include "mongo/db/query/query_planner_params.h" #include "mongo/db/query/query_request_helper.h" #include "mongo/db/query/rate_limiting.h" +#include "mongo/db/query/serialization_options.h" #include "mongo/db/query/sort_pattern.h" #include "mongo/db/query/telemetry_util.h" #include "mongo/logv2/log.h" @@ -320,13 +321,13 @@ std::string fleSafeFieldNameRedactor(const BSONElement& e) { * Append the element to the builder and apply hmac to any literals within the element. The element * may be of any type. */ -void appendWithHmacAppliedLiterals(BSONObjBuilder& builder, const BSONElement& el) { +void appendWithAbstractedLiterals(BSONObjBuilder& builder, const BSONElement& el) { if (el.type() == Object) { builder.append(el.fieldNameStringData(), el.Obj().redact(false, fleSafeFieldNameRedactor)); } else if (el.type() == Array) { BSONObjBuilder arrayBuilder = builder.subarrayStart(fleSafeFieldNameRedactor(el)); for (auto&& arrayElem : el.Obj()) { - appendWithHmacAppliedLiterals(arrayBuilder, arrayElem); + appendWithAbstractedLiterals(arrayBuilder, arrayElem); } arrayBuilder.done(); } else { @@ -337,21 +338,18 @@ void appendWithHmacAppliedLiterals(BSONObjBuilder& builder, const BSONElement& e static const StringData replacementForLiteralArgs = "?"_sd; -} // namespace +std::size_t hash(const BSONObj& obj) { + return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); +} -BSONObj TelemetryEntry::makeTelemetryKey(const BSONObj& key, - bool applyHmacToIdentifiers, - std::string hmacKey, - OperationContext* opCtx) const { - if (!applyHmacToIdentifiers) { - return key; - } +} // namespace - if (_hmacAppliedKey) { - return *_hmacAppliedKey; - } +BSONObj TelemetryEntry::computeTelemetryKey(OperationContext* opCtx, + bool applyHmacToIdentifiers, + std::string hmacKey) const { // The telemetry key for find queries is generated by serializing all the command fields - // and applied hmac if SerializationOptions indicate to do so. The resulting key is of the form: + // and applying hmac if SerializationOptions indicate to do so. The resulting key is of the + // form: // { // queryShape: { // cmdNs: {db: "...", coll: "..."}, @@ -370,7 +368,7 @@ BSONObj TelemetryEntry::makeTelemetryKey(const BSONObj& key, ? SerializationOptions( [&](StringData sd) { return sha256HmacStringDataHasher(hmacKey, sd); }, LiteralSerializationPolicy::kToDebugTypeString) - : SerializationOptions(false); + : SerializationOptions(LiteralSerializationPolicy::kToDebugTypeString); return requestShapifier->makeTelemetryKey(serializationOpts, opCtx); } @@ -384,8 +382,13 @@ BSONObj TelemetryEntry::makeTelemetryKey(const BSONObj& key, // { "$addFields" : { "x" : { "$someExpr" {} } } } ], // We should preserve the top-level stage names in the pipeline but apply hmac to all field // names of children. + + // TODO: SERVER-73152 literal and field name redaction for aggregate command. + if (!applyHmacToIdentifiers) { + return oldTelemetryKey; + } BSONObjBuilder hmacAppliedBuilder; - for (BSONElement e : key) { + for (BSONElement e : oldTelemetryKey) { if ((e.type() == Object || e.type() == Array) && kKeysToApplyHmac.count(e.fieldNameStringData().toString()) == 1) { auto hmacApplicator = [&](BSONObjBuilder subObj, const BSONObj& obj) { @@ -416,8 +419,7 @@ BSONObj TelemetryEntry::makeTelemetryKey(const BSONObj& key, hmacAppliedBuilder.append(e); } } - _hmacAppliedKey = hmacAppliedBuilder.obj(); - return *_hmacAppliedKey; + return hmacAppliedBuilder.obj(); } // The originating command/query does not persist through the end of query execution. In order to @@ -453,7 +455,7 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext try { for (auto&& stage : request.getPipeline()) { BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd); - appendWithHmacAppliedLiterals(stageBuilder, stage.firstElement()); + appendWithAbstractedLiterals(stageBuilder, stage.firstElement()); stageBuilder.done(); } pipelineBuilder.done(); @@ -468,7 +470,9 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext return; } - CurOp::get(opCtx)->debug().telemetryStoreKey = telemetryKey.obj(); + BSONObj key = telemetryKey.obj(); + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = hash(key); + CurOp::get(opCtx)->debug().telemetryStoreKey = key.getOwned(); } void registerRequest(std::unique_ptr<RequestShapifier> requestShapifier, @@ -490,8 +494,8 @@ void registerRequest(std::unique_ptr<RequestShapifier> requestShapifier, SerializationOptions options; options.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; options.replacementForLiteralArgs = replacementForLiteralArgs; - CurOp::get(opCtx)->debug().telemetryStoreKey = - requestShapifier->makeTelemetryKey(options, expCtx); + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = + hash(requestShapifier->makeTelemetryKey(options, expCtx)); CurOp::get(opCtx)->debug().telemetryRequestShapifier = std::move(requestShapifier); } @@ -504,26 +508,29 @@ TelemetryStore& getTelemetryStore(OperationContext* opCtx) { } void writeTelemetry(OperationContext* opCtx, + boost::optional<size_t> telemetryKeyHash, boost::optional<BSONObj> telemetryKey, std::unique_ptr<RequestShapifier> requestShapifier, const uint64_t queryExecMicros, const uint64_t docsReturned) { - if (!telemetryKey) { + if (!telemetryKeyHash) { return; } auto&& telemetryStore = getTelemetryStore(opCtx); - auto&& [statusWithMetrics, partitionLock] = telemetryStore.getWithPartitionLock(*telemetryKey); + auto&& [statusWithMetrics, partitionLock] = + telemetryStore.getWithPartitionLock(*telemetryKeyHash); std::shared_ptr<TelemetryEntry> metrics; if (statusWithMetrics.isOK()) { metrics = *statusWithMetrics.getValue(); } else { + BSONObj key = telemetryKey.value_or(BSONObj{}); size_t numEvicted = - telemetryStore.put(*telemetryKey, - std::make_shared<TelemetryEntry>(std::move(requestShapifier), - CurOp::get(opCtx)->getNSS()), + telemetryStore.put(*telemetryKeyHash, + std::make_shared<TelemetryEntry>( + std::move(requestShapifier), CurOp::get(opCtx)->getNSS(), key), partitionLock); telemetryEvictedMetric.increment(numEvicted); - auto newMetrics = partitionLock->get(*telemetryKey); + auto newMetrics = partitionLock->get(*telemetryKeyHash); if (!newMetrics.isOK()) { // This can happen if the budget is immediately exceeded. Specifically if the there is // not enough room for a single new entry if the number of partitions is too high @@ -533,7 +540,7 @@ void writeTelemetry(OperationContext* opCtx, 1, "Failed to store telemetry entry.", "status"_attr = newMetrics.getStatus(), - "rawKey"_attr = redact(*telemetryKey)); + "telemetryKeyHash"_attr = telemetryKeyHash); return; } metrics = newMetrics.getValue()->second; diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h index 9d2fdaa7b5b..e7e0f3ccfd1 100644 --- a/src/mongo/db/query/telemetry.h +++ b/src/mongo/db/query/telemetry.h @@ -99,10 +99,13 @@ extern CounterMetric telemetryStoreSizeEstimateBytesMetric; // Used to aggregate the metrics for one telemetry key over all its executions. class TelemetryEntry { public: - TelemetryEntry(std::unique_ptr<RequestShapifier> requestShapifier, NamespaceStringOrUUID nss) + TelemetryEntry(std::unique_ptr<RequestShapifier> requestShapifier, + NamespaceStringOrUUID nss, + const BSONObj& cmdObj) : firstSeenTimestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0), requestShapifier(std::move(requestShapifier)), - nss(nss) { + nss(nss), + oldTelemetryKey(cmdObj.copy()) { telemetryStoreSizeEstimateBytesMetric.increment(sizeof(TelemetryEntry) + sizeof(BSONObj)); } @@ -123,10 +126,9 @@ public: /** * Redact a given telemetry key and set _keySize. */ - BSONObj makeTelemetryKey(const BSONObj& key, - bool applyHmacToIdentifiers, - std::string hmacKey, - OperationContext* opCtx) const; + BSONObj computeTelemetryKey(OperationContext* opCtx, + bool applyHmacToIdentifiers, + std::string hmacKey) const; /** * Timestamp for when this query shape was added to the store. Set on construction. @@ -151,35 +153,31 @@ public: NamespaceStringOrUUID nss; -private: - /** - * We cache the hmac applied key the first time it's computed. - */ - mutable boost::optional<BSONObj> _hmacAppliedKey; + // TODO: SERVER-73152 remove oldTelemetryKey when RequestShapifier is used for agg. + BSONObj oldTelemetryKey; }; struct TelemetryPartitioner { // The partitioning function for use with the 'Partitioned' utility. - std::size_t operator()(const BSONObj& k, const std::size_t nPartitions) const { - return SimpleBSONObjComparator::Hasher()(k) % nPartitions; + std::size_t operator()(const std::size_t k, const std::size_t nPartitions) const { + return k % nPartitions; } }; struct TelemetryStoreEntryBudgetor { - size_t operator()(const BSONObj& key, const std::shared_ptr<TelemetryEntry>& value) { - // The buget estimator for <key,value> pair in LRU cache accounts for size of value - // (TelemetryEntry) size of the key, and size of the key's underlying data struture - // (BSONObj). - return sizeof(TelemetryEntry) + sizeof(BSONObj) + key.objsize(); + size_t operator()(const std::size_t key, const std::shared_ptr<TelemetryEntry>& value) { + // The buget estimator for <key,value> pair in LRU cache accounts for the size of the key + // and the size of the metrics, including the bson object used for generating the telemetry + // key at read time. + + return sizeof(TelemetryEntry) + sizeof(std::size_t) + value->oldTelemetryKey.objsize(); } }; -using TelemetryStore = PartitionedCache<BSONObj, +using TelemetryStore = PartitionedCache<std::size_t, std::shared_ptr<TelemetryEntry>, TelemetryStoreEntryBudgetor, - TelemetryPartitioner, - SimpleBSONObjComparator::Hasher, - SimpleBSONObjComparator::EqualTo>; + TelemetryPartitioner>; /** * Acquire a reference to the global telemetry store. @@ -208,6 +206,7 @@ void registerRequest(std::unique_ptr<RequestShapifier> requestShapifier, * Writes telemetry to the telemetry store for the operation identified by `telemetryKey`. */ void writeTelemetry(OperationContext* opCtx, + boost::optional<size_t> telemetryKeyHash, boost::optional<BSONObj> telemetryKey, std::unique_ptr<RequestShapifier> requestShapifier, uint64_t queryExecMicros, diff --git a/src/mongo/db/query/telemetry_store_test.cpp b/src/mongo/db/query/telemetry_store_test.cpp index cbf1104d29c..8d68ee566c6 100644 --- a/src/mongo/db/query/telemetry_store_test.cpp +++ b/src/mongo/db/query/telemetry_store_test.cpp @@ -46,6 +46,11 @@ namespace mongo::telemetry { std::string applyHmacForTest(StringData s) { return str::stream() << "HASH<" << s << ">"; } + +std::size_t hash(const BSONObj& obj) { + return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); +} + class TelemetryStoreTest : public ServiceContextTest { public: BSONObj makeTelemetryKeyFindRequest( @@ -73,17 +78,18 @@ public: TEST_F(TelemetryStoreTest, BasicUsage) { TelemetryStore telStore{5000000, 1000}; - auto getMetrics = [&](BSONObj& key) { - auto lookupResult = telStore.lookup(key); + auto getMetrics = [&](const BSONObj& key) { + auto lookupResult = telStore.lookup(hash(key)); return *lookupResult.getValue(); }; auto collectMetrics = [&](BSONObj& key) { std::shared_ptr<TelemetryEntry> metrics; - auto lookupResult = telStore.lookup(key); + auto lookupResult = telStore.lookup(hash(key)); if (!lookupResult.isOK()) { - telStore.put(key, std::make_shared<TelemetryEntry>(nullptr, NamespaceString{})); - lookupResult = telStore.lookup(key); + telStore.put(hash(key), + std::make_shared<TelemetryEntry>(nullptr, NamespaceString{}, key)); + lookupResult = telStore.lookup(hash(key)); } metrics = *lookupResult.getValue(); metrics->execCount += 1; @@ -105,7 +111,7 @@ TEST_F(TelemetryStoreTest, BasicUsage) { ASSERT_EQ(getMetrics(query2)->execCount, 1); auto collectMetricsWithLock = [&](BSONObj& key) { - auto [lookupResult, lock] = telStore.getWithPartitionLock(key); + auto [lookupResult, lock] = telStore.getWithPartitionLock(hash(key)); auto metrics = *lookupResult.getValue(); metrics->execCount += 1; metrics->lastExecutionMicros += 123456; @@ -121,7 +127,7 @@ TEST_F(TelemetryStoreTest, BasicUsage) { int numKeys = 0; telStore.forEach( - [&](const BSONObj& key, const std::shared_ptr<TelemetryEntry>& entry) { numKeys++; }); + [&](std::size_t key, const std::shared_ptr<TelemetryEntry>& entry) { numKeys++; }); ASSERT_EQ(numKeys, 2); } @@ -135,13 +141,15 @@ TEST_F(TelemetryStoreTest, EvictEntries) { for (int i = 0; i < 20; i++) { auto query = BSON("query" + std::to_string(i) << 1 << "xEquals" << 42); - telStore.put(query, std::make_shared<TelemetryEntry>(nullptr, NamespaceString{})); + telStore.put(hash(query), + std::make_shared<TelemetryEntry>(nullptr, NamespaceString{}, BSONObj{})); } int numKeys = 0; telStore.forEach( - [&](const BSONObj& key, const std::shared_ptr<TelemetryEntry>& entry) { numKeys++; }); + [&](std::size_t key, const std::shared_ptr<TelemetryEntry>& entry) { numKeys++; }); - int entriesPerPartition = (cacheSize / numPartitions) / (46 + sizeof(TelemetryEntry)); + int entriesPerPartition = (cacheSize / numPartitions) / + (sizeof(std::size_t) + sizeof(TelemetryEntry) + BSONObj().objsize()); ASSERT_EQ(numKeys, entriesPerPartition * numPartitions); } @@ -683,33 +691,38 @@ TEST_F(TelemetryStoreTest, DefinesLetVariables) { const auto cmdObj = fcr.toBSON(BSON("$db" << "testDB")); TelemetryEntry testMetrics{std::make_unique<telemetry::FindRequestShapifier>(fcr, opCtx.get()), - fcr.getNamespaceOrUUID()}; + fcr.getNamespaceOrUUID(), + cmdObj}; bool applyHmacToIdentifiers = false; auto hmacApplied = - testMetrics.makeTelemetryKey(cmdObj, applyHmacToIdentifiers, std::string{}, opCtx.get()); - // As the query never moves through registerFindRequest and hmac is not enabled, - // makeTelemetryKey() never gets called and consequently the query never gets shapified. + testMetrics.computeTelemetryKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ - "find": "testColl", - "filter": { - "$expr": [ - { - "$eq": [ - "$a", - "$$var" - ] - } - ] - }, - "projection": { - "varIs": "$$var" - }, - "let": { - "var": 2 - }, - "$db": "testDB" + "queryShape": { + "cmdNs": { + "db": "testDB", + "coll": "testColl" + }, + "command": "find", + "filter": { + "$expr": [ + { + "$eq": [ + "$a", + "$$var" + ] + } + ] + }, + "let": { + "var": "?number" + }, + "projection": { + "varIs": "$$var", + "_id": true + } + } })", hmacApplied); @@ -717,7 +730,7 @@ TEST_F(TelemetryStoreTest, DefinesLetVariables) { // do the hashing, so we'll just stick with the big long strings here for now. applyHmacToIdentifiers = true; hmacApplied = - testMetrics.makeTelemetryKey(cmdObj, applyHmacToIdentifiers, std::string{}, opCtx.get()); + testMetrics.computeTelemetryKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { |