diff options
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/query/find_request_shapifier.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/query/find_request_shapifier.h | 55 | ||||
-rw-r--r-- | src/mongo/db/query/query_shape.cpp | 104 | ||||
-rw-r--r-- | src/mongo/db/query/query_shape.h | 13 | ||||
-rw-r--r-- | src/mongo/db/query/query_shape_test.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/query/request_shapifier.h | 64 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry_store_test.cpp | 445 |
10 files changed, 727 insertions, 66 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index bf41bae73f3..1a3c43dd060 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -138,6 +138,8 @@ env.Library( "explain_common.cpp", "find_common.cpp", "parsed_distinct.cpp", + # TODO SERVER-76557 move into new query_shape target + "find_request_shapifier.cpp", ], LIBDEPS=[ "$BUILD_DIR/mongo/base", diff --git a/src/mongo/db/query/find_request_shapifier.cpp b/src/mongo/db/query/find_request_shapifier.cpp new file mode 100644 index 00000000000..ae76951562a --- /dev/null +++ b/src/mongo/db/query/find_request_shapifier.cpp @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/find_request_shapifier.h" + +#include "mongo/db/query/projection_ast_util.h" +#include "mongo/db/query/projection_parser.h" +#include "mongo/db/query/query_request_helper.h" +#include "mongo/db/query/query_shape.h" + +namespace mongo::telemetry { +BSONObj FindRequestShapifier::makeTelemetryKey( + const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx) const { + BSONObjBuilder bob; + + bob.append("queryShape", query_shape::extractQueryShape(_request, opts, expCtx)); + + + if (auto optObj = _request.getReadConcern()) { + // Read concern should not be considered a literal. + bob.append(FindCommandRequest::kReadConcernFieldName, optObj.get()); + } + + if (_applicationName.has_value()) { + // TODO SERVER-76143 don't serialize appName + bob.append("applicationName", opts.serializeIdentifier(_applicationName.value())); + } + + return bob.obj(); +} +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/find_request_shapifier.h b/src/mongo/db/query/find_request_shapifier.h new file mode 100644 index 00000000000..2885a2cd025 --- /dev/null +++ b/src/mongo/db/query/find_request_shapifier.h @@ -0,0 +1,55 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/find_command_gen.h" +#include "mongo/db/query/request_shapifier.h" + +namespace mongo::telemetry { + +/** + * Handles shapification for FindCommandRequests. + */ +class FindRequestShapifier final : public RequestShapifier { +public: + FindRequestShapifier(const FindCommandRequest& request, + OperationContext* opCtx, + const boost::optional<std::string> applicationName = boost::none) + : RequestShapifier(opCtx, applicationName), _request(request) {} + + virtual ~FindRequestShapifier() = default; + + BSONObj makeTelemetryKey(const SerializationOptions& opts, + const boost::intrusive_ptr<ExpressionContext>& expCtx) const final; + +private: + const FindCommandRequest& _request; +}; +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/query_shape.cpp b/src/mongo/db/query/query_shape.cpp index ec5179ab96d..05d91f94279 100644 --- a/src/mongo/db/query/query_shape.cpp +++ b/src/mongo/db/query/query_shape.cpp @@ -28,14 +28,12 @@ */ #include "mongo/db/query/query_shape.h" -#include "query_request_helper.h" -#include "sort_pattern.h" #include "mongo/base/status.h" #include "mongo/db/query/find_command_gen.h" -#include "mongo/db/query/plan_explainer.h" #include "mongo/db/query/projection_ast_util.h" #include "mongo/db/query/projection_parser.h" +#include "mongo/db/query/query_request_helper.h" #include "mongo/db/query/sort_pattern.h" namespace mongo::query_shape { @@ -70,9 +68,9 @@ BSONObj representativePredicateShape( return predicate->serialize(opts); } -BSONObj sortShape(const BSONObj& sortSpec, - const boost::intrusive_ptr<ExpressionContext>& expCtx, - const SerializationOptions& opts) { +BSONObj extractSortShape(const BSONObj& sortSpec, + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const SerializationOptions& opts) { if (sortSpec.isEmpty()) { return sortSpec; } @@ -160,7 +158,8 @@ void addRemainingFindCommandFields(BSONObjBuilder* bob, opts.appendLiteral(bob, FindCommandRequest::kCollationFieldName, collation); } } -BSONObj redactHintComponent(BSONObj obj, const SerializationOptions& opts, bool redactValues) { + +BSONObj extractHintShape(BSONObj obj, const SerializationOptions& opts, bool redactValues) { BSONObjBuilder bob; for (BSONElement elem : obj) { if (hintSpecialField.compare(elem.fieldName()) == 0) { @@ -191,9 +190,9 @@ BSONObj redactHintComponent(BSONObj obj, const SerializationOptions& opts, bool * In a let specification all field names are variable names, and all values are either expressions * or constants. */ -BSONObj redactLetSpec(BSONObj letSpec, - const SerializationOptions& opts, - boost::intrusive_ptr<ExpressionContext> expCtx) { +BSONObj extractLetSpecShape(BSONObj letSpec, + const SerializationOptions& opts, + boost::intrusive_ptr<ExpressionContext> expCtx) { BSONObjBuilder bob; for (BSONElement elem : letSpec) { @@ -206,26 +205,30 @@ BSONObj redactLetSpec(BSONObj letSpec, return bob.obj(); } +BSONObj extractNamespaceShape(NamespaceString nss, const SerializationOptions& opts) { + BSONObjBuilder bob; + if (nss.tenantId()) { + bob.append("tenantId", opts.serializeIdentifier(nss.tenantId().value().toString())); + } + bob.append("db", opts.serializeIdentifier(nss.db())); + bob.append("coll", opts.serializeIdentifier(nss.coll())); + return bob.obj(); +} + BSONObj extractQueryShape(const FindCommandRequest& findCommand, const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx) { BSONObjBuilder bob; // Serialize the namespace as part of the query shape. { - BSONObjBuilder cmdNs = bob.subobjStart("cmdNs"); auto ns = findCommand.getNamespaceOrUUID(); if (ns.nss()) { - auto nss = ns.nss().value(); - if (nss.tenantId()) { - cmdNs.append("tenantId", - opts.serializeIdentifier(nss.tenantId().value().toString())); - } - cmdNs.append("db", opts.serializeIdentifier(nss.db())); - cmdNs.append("coll", opts.serializeIdentifier(nss.coll())); + bob.append("cmdNs", extractNamespaceShape(ns.nss().value(), opts)); } else { + BSONObjBuilder cmdNs = bob.subobjStart("cmdNs"); cmdNs.append("uuid", opts.serializeIdentifier(ns.uuid()->toString())); + cmdNs.done(); } - cmdNs.done(); } // Redact the namespace of the command. @@ -256,7 +259,7 @@ BSONObj extractQueryShape(const FindCommandRequest& findCommand, // Let Spec. if (auto letSpec = findCommand.getLet()) { - auto redactedObj = redactLetSpec(letSpec.get(), opts, expCtx); + auto redactedObj = extractLetSpecShape(letSpec.get(), opts, expCtx); auto ownedObj = redactedObj.getOwned(); bob.append(FindCommandRequest::kLetFieldName, std::move(ownedObj)); } @@ -279,22 +282,22 @@ BSONObj extractQueryShape(const FindCommandRequest& findCommand, // Hint, max, and min won't serialize if the object is empty. if (!findCommand.getHint().isEmpty()) { bob.append(FindCommandRequest::kHintFieldName, - redactHintComponent(findCommand.getHint(), opts, false)); + extractHintShape(findCommand.getHint(), opts, false)); // Max/Min aren't valid without hint. if (!findCommand.getMax().isEmpty()) { bob.append(FindCommandRequest::kMaxFieldName, - redactHintComponent(findCommand.getMax(), opts, true)); + extractHintShape(findCommand.getMax(), opts, true)); } if (!findCommand.getMin().isEmpty()) { bob.append(FindCommandRequest::kMinFieldName, - redactHintComponent(findCommand.getMin(), opts, true)); + extractHintShape(findCommand.getMin(), opts, true)); } } // Sort. if (!findCommand.getSort().isEmpty()) { bob.append(FindCommandRequest::kSortFieldName, - query_shape::sortShape(findCommand.getSort(), expCtx, opts)); + query_shape::extractSortShape(findCommand.getSort(), expCtx, opts)); } // Fields for literal redaction. Adds limit, skip, batchSize, maxTimeMS, and noCursorTimeOut @@ -305,4 +308,57 @@ BSONObj extractQueryShape(const FindCommandRequest& findCommand, return bob.obj(); } + +BSONObj extractQueryShape(const AggregateCommandRequest& aggregateCommand, + const std::vector<BSONObj>& serializedPipeline, + const SerializationOptions& opts, + const boost::intrusive_ptr<ExpressionContext>& expCtx) { + BSONObjBuilder bob; + + // TODO SERVER-73152 update to newest query shape definition + + // namespace + bob.append("ns", extractNamespaceShape(aggregateCommand.getNamespace(), opts)); + bob.append(AggregateCommandRequest::kCommandName, + opts.serializeIdentifier(aggregateCommand.getNamespace().coll())); + + // pipeline + { + BSONArrayBuilder pipelineBab( + bob.subarrayStart(AggregateCommandRequest::kPipelineFieldName)); + for (const auto& stage : serializedPipeline) { + pipelineBab.append(stage); + } + pipelineBab.doneFast(); + } + + // explain + if (aggregateCommand.getExplain().has_value()) { + bob.append(AggregateCommandRequest::kExplainFieldName, true); + } + + // allowDiskUse + if (auto param = aggregateCommand.getAllowDiskUse(); param.has_value()) { + bob.append(AggregateCommandRequest::kAllowDiskUseFieldName, param.value_or(false)); + } + + // collation + if (auto param = aggregateCommand.getCollation()) { + bob.append(AggregateCommandRequest::kCollationFieldName, param.get()); + } + + // hint + if (auto hint = aggregateCommand.getHint()) { + bob.append(AggregateCommandRequest::kHintFieldName, + extractHintShape(hint.get(), opts, false)); + } + + // let + if (auto letSpec = aggregateCommand.getLet()) { + auto redactedObj = extractLetSpecShape(letSpec.get(), opts, expCtx); + auto ownedObj = redactedObj.getOwned(); + bob.append(FindCommandRequest::kLetFieldName, std::move(ownedObj)); + } + return bob.obj(); +} } // namespace mongo::query_shape diff --git a/src/mongo/db/query/query_shape.h b/src/mongo/db/query/query_shape.h index 4576739d671..e27b4795a29 100644 --- a/src/mongo/db/query/query_shape.h +++ b/src/mongo/db/query/query_shape.h @@ -30,6 +30,8 @@ #pragma once #include "mongo/db/matcher/expression.h" +#include "mongo/db/pipeline/aggregate_command_gen.h" +#include "mongo/db/query/find_command_gen.h" #include "mongo/db/query/query_request_helper.h" namespace mongo::query_shape { @@ -59,12 +61,15 @@ BSONObj representativePredicateShape( const MatchExpression* predicate, std::function<std::string(StringData)> identifierRedactionPolicy); -BSONObj sortShape(const BSONObj& sortSpec, - const boost::intrusive_ptr<ExpressionContext>& expCtx, - const SerializationOptions& opts); +BSONObj extractSortShape(const BSONObj& sortSpec, + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const SerializationOptions& opts); BSONObj extractQueryShape(const FindCommandRequest& findCommand, const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx); - +BSONObj extractQueryShape(const AggregateCommandRequest& aggregateCommand, + const std::vector<BSONObj>& serializedPipeline, + const SerializationOptions& opts, + const boost::intrusive_ptr<ExpressionContext>& expCtx); } // namespace mongo::query_shape diff --git a/src/mongo/db/query/query_shape_test.cpp b/src/mongo/db/query/query_shape_test.cpp index e0e02f73ac1..c62b53652d1 100644 --- a/src/mongo/db/query/query_shape_test.cpp +++ b/src/mongo/db/query/query_shape_test.cpp @@ -528,7 +528,7 @@ TEST(SortPatternShape, NormalSortPattern) { opts.replacementForLiteralArgs = query_shape::kLiteralArgString; ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({"a.b.c":1,"foo":-1})", - query_shape::sortShape(fromjson(R"({"a.b.c": 1, "foo": -1})"), expCtx, opts)); + query_shape::extractSortShape(fromjson(R"({"a.b.c": 1, "foo": -1})"), expCtx, opts)); } TEST(SortPatternShape, NaturalSortPattern) { @@ -538,10 +538,10 @@ TEST(SortPatternShape, NaturalSortPattern) { opts.replacementForLiteralArgs = query_shape::kLiteralArgString; ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({$natural: 1})", - query_shape::sortShape(fromjson(R"({$natural: 1})"), expCtx, opts)); + query_shape::extractSortShape(fromjson(R"({$natural: 1})"), expCtx, opts)); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({$natural: -1})", - query_shape::sortShape(fromjson(R"({$natural: -1})"), expCtx, opts)); + query_shape::extractSortShape(fromjson(R"({$natural: -1})"), expCtx, opts)); } TEST(SortPatternShape, NaturalSortPatternWithMeta) { @@ -551,7 +551,7 @@ TEST(SortPatternShape, NaturalSortPatternWithMeta) { opts.replacementForLiteralArgs = query_shape::kLiteralArgString; ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({$natural: 1, x: '?'})", - query_shape::sortShape( + query_shape::extractSortShape( fromjson(R"({$natural: 1, x: {$meta: "textScore"}})"), expCtx, opts)); } @@ -562,7 +562,8 @@ TEST(SortPatternShape, MetaPatternWithoutNatural) { opts.replacementForLiteralArgs = query_shape::kLiteralArgString; ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({"normal":1,"$computed1":{"$meta":"textScore"}})", - query_shape::sortShape(fromjson(R"({normal: 1, x: {$meta: "textScore"}})"), expCtx, opts)); + query_shape::extractSortShape( + fromjson(R"({normal: 1, x: {$meta: "textScore"}})"), expCtx, opts)); } // Here we have one test to ensure that the redaction policy is accepted and applied in the @@ -576,12 +577,12 @@ TEST(SortPatternShape, RespectsRedactionPolicy) { opts.identifierRedactionPolicy = redactFieldNameForTest; ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({"REDACT_normal":1,"REDACT_y":1})", - query_shape::sortShape(fromjson(R"({normal: 1, y: 1})"), expCtx, opts)); + query_shape::extractSortShape(fromjson(R"({normal: 1, y: 1})"), expCtx, opts)); // No need to redact $natural. ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({"$natural":1,"REDACT_y":1})", - query_shape::sortShape(fromjson(R"({$natural: 1, y: 1})"), expCtx, opts)); + query_shape::extractSortShape(fromjson(R"({$natural: 1, y: 1})"), expCtx, opts)); } TEST(QueryShapeIDL, ShapifyIDLStruct) { diff --git a/src/mongo/db/query/request_shapifier.h b/src/mongo/db/query/request_shapifier.h new file mode 100644 index 00000000000..2d12ede90d7 --- /dev/null +++ b/src/mongo/db/query/request_shapifier.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/query/serialization_options.h" +#include "mongo/rpc/metadata/client_metadata.h" + +namespace mongo::telemetry { + +/** + * An abstract base class to handle query shapification for telemetry. Each request type should + * define its own shapification strategy in its implementation of makeTelemetryKey(), and then a + * request should be registered with telemetry via telemetry::registerRequest(RequestShapifier). + */ +class RequestShapifier { +public: + virtual ~RequestShapifier() = default; + virtual BSONObj makeTelemetryKey( + const SerializationOptions& opts, + const boost::intrusive_ptr<ExpressionContext>& expCtx) const = 0; + +protected: + RequestShapifier(OperationContext* opCtx, + const boost::optional<std::string> applicationName = boost::none) + : _applicationName(applicationName) { + if (!_applicationName) { + if (auto metadata = ClientMetadata::get(opCtx->getClient())) { + _applicationName = metadata->getApplicationName().toString(); + } + } + } + + boost::optional<std::string> _applicationName; +}; +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp index 17eb6d5c0b2..7d8f62c32d4 100644 --- a/src/mongo/db/query/telemetry.cpp +++ b/src/mongo/db/query/telemetry.cpp @@ -39,6 +39,8 @@ #include "mongo/db/pipeline/aggregate_command_gen.h" #include "mongo/db/pipeline/process_interface/stub_mongo_process_interface.h" #include "mongo/db/query/find_command_gen.h" +// TODO SERVER-76557 remove include of find_request_shapifier +#include "mongo/db/query/find_request_shapifier.h" #include "mongo/db/query/plan_explainer.h" #include "mongo/db/query/projection_ast_util.h" #include "mongo/db/query/projection_parser.h" @@ -77,6 +79,7 @@ boost::optional<std::string> getApplicationName(const OperationContext* opCtx) { } } // namespace +// TODO SERVER-76557 can remove this makeTelemetryKey BSONObj makeTelemetryKey(const FindCommandRequest& findCommand, const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx, @@ -411,6 +414,7 @@ BSONObj TelemetryMetrics::redactKey(const BSONObj& key, expCtx->maxFeatureCompatibilityVersion = boost::none; // Ensure all features are allowed. expCtx->stopExpressionCounters(); + // TODO SERVER-76557 call makeTelemetryKey thru FindRequestShapifier kept in telemetry store auto key = makeTelemetryKey(*findCommand, serializationOpts, expCtx, *this); // TODO: SERVER-76526 as part of this ticket, no form of the key (redacted or not) will be // cached with TelemetryMetrics. @@ -471,12 +475,12 @@ BSONObj TelemetryMetrics::redactKey(const BSONObj& key, // is necessary to register the original query during planning and persist it after // execution. -// During planning, registerAggRequest or registerFindRequest are called to serialize the query -// shape and context (together, the telemetry context) and save it to OpDebug. Moreover, as query -// execution may span more than one request/operation and OpDebug does not persist through cursor -// iteration, it is necessary to communicate the telemetry context across operations. In this way, -// the telemetry context is registered to the cursor, so upon getMore() calls, the cursor manager -// passes the telemetry key from the pinned cursor to the new OpDebug. +// During planning, registerRequest is called to serialize the query shape and context (together, +// the telemetry context) and save it to OpDebug. Moreover, as query execution may span more than +// one request/operation and OpDebug does not persist through cursor iteration, it is necessary to +// communicate the telemetry context across operations. In this way, the telemetry context is +// registered to the cursor, so upon getMore() calls, the cursor manager passes the telemetry key +// from the pinned cursor to the new OpDebug. // Once query execution is complete, the telemetry context is grabbed from OpDebug, a telemetry key // is generated from this and metrics are paired to this key in the telemetry store. @@ -517,10 +521,10 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext CurOp::get(opCtx)->debug().telemetryStoreKey = telemetryKey.obj(); } -void registerFindRequest(const FindCommandRequest& request, - const NamespaceString& collection, - OperationContext* opCtx, - const boost::intrusive_ptr<ExpressionContext>& expCtx) { +void registerRequest(const RequestShapifier& requestShapifier, + const NamespaceString& collection, + OperationContext* opCtx, + const boost::intrusive_ptr<ExpressionContext>& expCtx) { if (!isTelemetryEnabled(opCtx->getServiceContext())) { return; } @@ -537,7 +541,8 @@ void registerFindRequest(const FindCommandRequest& request, SerializationOptions options; options.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; options.replacementForLiteralArgs = replacementForLiteralArgs; - CurOp::get(opCtx)->debug().telemetryStoreKey = makeTelemetryKey(request, options, expCtx); + CurOp::get(opCtx)->debug().telemetryStoreKey = + requestShapifier.makeTelemetryKey(options, expCtx); } TelemetryStore& getTelemetryStore(OperationContext* opCtx) { diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h index 5eead154300..48b6bc8e7e2 100644 --- a/src/mongo/db/query/telemetry.h +++ b/src/mongo/db/query/telemetry.h @@ -36,6 +36,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/query/partitioned_cache.h" #include "mongo/db/query/plan_explainer.h" +#include "mongo/db/query/request_shapifier.h" #include "mongo/db/query/util/memory_util.h" #include "mongo/db/service_context.h" #include <cstdint> @@ -210,13 +211,13 @@ TelemetryStore& getTelemetryStore(OperationContext* opCtx); * * Note that calling this affects internal state. It should be called once for each request for * which telemetry may be collected. + * TODO SERVER-76557 remove request-specific registers, leave only registerRequest */ void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx); - -void registerFindRequest(const FindCommandRequest& request, - const NamespaceString& collection, - OperationContext* ocCtx, - const boost::intrusive_ptr<ExpressionContext>& expCtx); +void registerRequest(const RequestShapifier&, + const NamespaceString& collection, + OperationContext* opCtx, + const boost::intrusive_ptr<ExpressionContext>& expCtx); /** * Writes telemetry to the telemetry store for the operation identified by `telemetryKey`. diff --git a/src/mongo/db/query/telemetry_store_test.cpp b/src/mongo/db/query/telemetry_store_test.cpp index 9bc5bd6a64c..4d1c608b0dd 100644 --- a/src/mongo/db/query/telemetry_store_test.cpp +++ b/src/mongo/db/query/telemetry_store_test.cpp @@ -29,7 +29,9 @@ #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/catalog/rename_collection.h" +#include "mongo/db/pipeline/aggregate_request_shapifier.h" #include "mongo/db/pipeline/expression_context_for_test.h" +#include "mongo/db/query/find_request_shapifier.h" #include "mongo/db/query/query_feature_flags_gen.h" #include "mongo/db/query/telemetry.h" #include "mongo/db/service_context_test_fixture.h" @@ -127,6 +129,8 @@ std::string redactFieldNameForTest(StringData s) { TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { auto expCtx = make_intrusive<ExpressionContextForTest>(); FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); + fcr.setFilter(BSON("a" << 1)); SerializationOptions opts; // TODO SERVER-75419 Use only 'literalPolicy.' @@ -135,7 +139,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { opts.redactIdentifiers = true; opts.identifierRedactionPolicy = redactFieldNameForTest; - auto redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + auto redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ @@ -156,7 +160,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { // Add sort. fcr.setSort(BSON("sortVal" << 1 << "otherSort" << -1)); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -180,7 +184,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { // Add inclusion projection. fcr.setProjection(BSON("e" << true << "f" << true)); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -212,7 +216,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { << "$a" << "var2" << "const1")); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -247,7 +251,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { fcr.setHint(BSON("z" << 1 << "c" << 1)); fcr.setMax(BSON("z" << 25)); fcr.setMin(BSON("z" << 80)); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -295,7 +299,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { fcr.setMaxTimeMS(1000); fcr.setNoCursorTimeout(false); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -348,6 +352,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { fcr.setShowRecordId(true); fcr.setAwaitData(false); fcr.setMirrored(true); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -387,14 +392,22 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { "limit": "?number", "skip": "?number", "batchSize": "?number", - "maxTimeMS": "?number" + "maxTimeMS": "?number", + "singleBatch": "?bool", + "allowDiskUse": "?bool", + "showRecordId": "?bool", + "awaitData": "?bool", + "allowPartialResults": "?bool", + "mirrored": "?bool" } })", redacted); } + TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestEmptyFields) { auto expCtx = make_intrusive<ExpressionContextForTest>(); FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); fcr.setFilter(BSONObj()); fcr.setSort(BSONObj()); fcr.setProjection(BSONObj()); @@ -403,7 +416,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestEmptyFields) { opts.redactIdentifiers = true; opts.identifierRedactionPolicy = redactFieldNameForTest; - auto redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + auto redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -421,6 +434,8 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestEmptyFields) { TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { auto expCtx = make_intrusive<ExpressionContextForTest>(); FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); + fcr.setFilter(BSON("b" << 1)); SerializationOptions opts; // TODO SERVER-75419 Use only 'literalPolicy.' @@ -430,7 +445,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { fcr.setMax(BSON("z" << 25)); fcr.setMin(BSON("z" << 80)); - auto redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + auto redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ @@ -463,7 +478,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { fcr.setHint(BSON("$hint" << "z")); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -495,7 +510,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { opts.redactIdentifiers = true; opts.replacementForLiteralArgs = boost::none; opts.literalPolicy = LiteralSerializationPolicy::kUnchanged; - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -526,7 +541,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { // TODO SERVER-75419 Use only 'literalPolicy.' opts.replacementForLiteralArgs = "?"; opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -556,7 +571,7 @@ TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { // Test that $natural comes through unmodified. fcr.setHint(BSON("$natural" << -1)); - redacted = telemetry::makeTelemetryKey(fcr, opts, expCtx); + redacted = findShapifier.makeTelemetryKey(opts, expCtx); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "queryShape": { @@ -627,8 +642,8 @@ TEST_F(TelemetryStoreTest, DefinesLetVariables) { })", redacted); - // Now be sure the variable names are redacted. We don't currently expose a different way to do - // the hashing, so we'll just stick with the big long strings here for now. + // Now be sure the variable names are redacted. We don't currently expose a different + // way to do the hashing, so we'll just stick with the big long strings here for now. redactIdentifiers = true; redacted = testMetrics.redactKey(cmdObj, redactIdentifiers, std::string{}, opCtx.get()); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT @@ -660,4 +675,404 @@ TEST_F(TelemetryStoreTest, DefinesLetVariables) { })", redacted); } + +TEST_F(TelemetryStoreTest, CorrectlyRedactsAggregateCommandRequestAllFieldsSimplePipeline) { + auto expCtx = make_intrusive<ExpressionContextForTest>(); + AggregateCommandRequest acr(NamespaceString("testDB.testColl")); + auto matchStage = fromjson(R"({ + $match: { + foo: { $in: ["a", "b"] }, + bar: { $gte: { $date: "2022-01-01T00:00:00Z" } } + } + })"); + auto unwindStage = fromjson("{$unwind: '$x'}"); + auto groupStage = fromjson(R"({ + $group: { + _id: "$_id", + c: { $first: "$d.e" }, + f: { $sum: 1 } + } + })"); + auto limitStage = fromjson("{$limit: 10}"); + auto outStage = fromjson(R"({$out: 'outColl'})"); + auto rawPipeline = {matchStage, unwindStage, groupStage, limitStage, outStage}; + acr.setPipeline(rawPipeline); + auto pipeline = Pipeline::parse(rawPipeline, expCtx); + AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); + + SerializationOptions opts; + opts.literalPolicy = LiteralSerializationPolicy::kUnchanged; + opts.redactIdentifiers = false; + opts.identifierRedactionPolicy = redactFieldNameForTest; + + auto shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "testDB", + "coll": "testColl" + }, + "aggregate": "testColl", + "pipeline": [ + { + "$match": { + "foo": { + "$in": [ + "a", + "b" + ] + }, + "bar": { + "$gte": {"$date":"2022-01-01T00:00:00.000Z"} + } + } + }, + { + "$unwind": { + "path": "$x" + } + }, + { + "$group": { + "_id": "$_id", + "c": { + "$first": "$d.e" + }, + "f": { + "$sum": { + "$const": 1 + } + } + } + }, + { + "$limit": 10 + }, + { + "$out": { + "coll": "outColl", + "db": "test" + } + } + ] + } + })", + shapified); + + // TODO SERVER-75419 Use only 'literalPolicy.' + opts.replacementForLiteralArgs = "?"; + opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + opts.redactIdentifiers = true; + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH<testDB>", + "coll": "HASH<testColl>" + }, + "aggregate": "HASH<testColl>", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH<foo>": { + "$in": "?array<?string>" + } + }, + { + "HASH<bar>": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH<x>" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH<c>": { + "$first": "$HASH<d>.HASH<e>" + }, + "HASH<f>": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH<outColl>", + "db": "HASH<test>" + } + } + ] + } + })", + shapified); + + // Add the fields that shouldn't be abstracted. + acr.setExplain(ExplainOptions::Verbosity::kExecStats); + acr.setAllowDiskUse(false); + acr.setHint(BSON("z" << 1 << "c" << 1)); + acr.setCollation(BSON("locale" + << "simple")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH<testDB>", + "coll": "HASH<testColl>" + }, + "aggregate": "HASH<testColl>", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH<foo>": { + "$in": "?array<?string>" + } + }, + { + "HASH<bar>": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH<x>" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH<c>": { + "$first": "$HASH<d>.HASH<e>" + }, + "HASH<f>": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH<outColl>", + "db": "HASH<test>" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH<z>": 1, + "HASH<c>": 1 + } + } + })", + shapified); + + // Add let. + acr.setLet(BSON("var1" + << "$foo" + << "var2" + << "bar")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH<testDB>", + "coll": "HASH<testColl>" + }, + "aggregate": "HASH<testColl>", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH<foo>": { + "$in": "?array<?string>" + } + }, + { + "HASH<bar>": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH<x>" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH<c>": { + "$first": "$HASH<d>.HASH<e>" + }, + "HASH<f>": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH<outColl>", + "db": "HASH<test>" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH<z>": 1, + "HASH<c>": 1 + }, + "let": { + "HASH<var1>": "$HASH<foo>", + "HASH<var2>": "?string" + } + } + })", + shapified); + + // Add the fields that should be abstracted. + auto cursorOptions = SimpleCursorOptions(); + cursorOptions.setBatchSize(10); + acr.setCursor(cursorOptions); + acr.setMaxTimeMS(500); + acr.setBypassDocumentValidation(true); + expCtx->opCtx->setComment(BSON("comment" + << "note to self")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH<testDB>", + "coll": "HASH<testColl>" + }, + "aggregate": "HASH<testColl>", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH<foo>": { + "$in": "?array<?string>" + } + }, + { + "HASH<bar>": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH<x>" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH<c>": { + "$first": "$HASH<d>.HASH<e>" + }, + "HASH<f>": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH<outColl>", + "db": "HASH<test>" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH<z>": 1, + "HASH<c>": 1 + }, + "let": { + "HASH<var1>": "$HASH<foo>", + "HASH<var2>": "?string" + } + }, + "cursor": { + "batchSize": "?number" + }, + "maxTimeMS": "?number", + "bypassDocumentValidation": "?bool" + })", + shapified); +} +TEST_F(TelemetryStoreTest, CorrectlyRedactsAggregateCommandRequestEmptyFields) { + auto expCtx = make_intrusive<ExpressionContextForTest>(); + AggregateCommandRequest acr(NamespaceString("testDB.testColl")); + acr.setPipeline({}); + auto pipeline = Pipeline::parse({}, expCtx); + AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); + + SerializationOptions opts; + // TODO SERVER-75419 Use only 'literalPolicy.' + opts.replacementForLiteralArgs = "?"; + opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + opts.redactIdentifiers = true; + opts.identifierRedactionPolicy = redactFieldNameForTest; + + auto shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH<testDB>", + "coll": "HASH<testColl>" + }, + "aggregate": "HASH<testColl>", + "pipeline": [] + } + })", + shapified); // NOLINT (test auto-update) +} } // namespace mongo::telemetry |