diff options
author | Maddie Zechar <mez2113@columbia.edu> | 2023-04-26 20:38:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-26 22:42:46 +0000 |
commit | b9de639b5fca5794c83314b1a302008569d51bd3 (patch) | |
tree | 631fc1a293057ac6ad4040ca0e368ad152587f1a /src/mongo/db/query/query_shape.cpp | |
parent | 40bdd0800be643d444094055ab86250eef0802ab (diff) | |
download | mongo-b9de639b5fca5794c83314b1a302008569d51bd3.tar.gz |
SERVER-75156 separate queryShape from telemetryKey for find requests
Diffstat (limited to 'src/mongo/db/query/query_shape.cpp')
-rw-r--r-- | src/mongo/db/query/query_shape.cpp | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/src/mongo/db/query/query_shape.cpp b/src/mongo/db/query/query_shape.cpp index 9136af59ab1..ec5179ab96d 100644 --- a/src/mongo/db/query/query_shape.cpp +++ b/src/mongo/db/query/query_shape.cpp @@ -31,6 +31,13 @@ #include "query_request_helper.h" #include "sort_pattern.h" +#include "mongo/base/status.h" +#include "mongo/db/query/find_command_gen.h" +#include "mongo/db/query/plan_explainer.h" +#include "mongo/db/query/projection_ast_util.h" +#include "mongo/db/query/projection_parser.h" +#include "mongo/db/query/sort_pattern.h" + namespace mongo::query_shape { BSONObj debugPredicateShape(const MatchExpression* predicate) { @@ -95,4 +102,207 @@ BSONObj sortShape(const BSONObj& sortSpec, return bob.obj(); } +static std::string hintSpecialField = "$hint"; +void addLiteralFields(BSONObjBuilder* bob, + const FindCommandRequest& findCommand, + const SerializationOptions& opts) { + + if (auto limit = findCommand.getLimit()) { + opts.appendLiteral( + bob, FindCommandRequest::kLimitFieldName, static_cast<long long>(*limit)); + } + if (auto skip = findCommand.getSkip()) { + opts.appendLiteral(bob, FindCommandRequest::kSkipFieldName, static_cast<long long>(*skip)); + } + if (auto batchSize = findCommand.getBatchSize()) { + opts.appendLiteral( + bob, FindCommandRequest::kBatchSizeFieldName, static_cast<long long>(*batchSize)); + } + if (auto maxTimeMs = findCommand.getMaxTimeMS()) { + opts.appendLiteral(bob, FindCommandRequest::kMaxTimeMSFieldName, *maxTimeMs); + } + if (auto noCursorTimeout = findCommand.getNoCursorTimeout()) { + opts.appendLiteral( + bob, FindCommandRequest::kNoCursorTimeoutFieldName, bool(noCursorTimeout)); + } +} + +static std::vector< + std::pair<StringData, std::function<const OptionalBool(const FindCommandRequest&)>>> + boolArgMap = { + {FindCommandRequest::kSingleBatchFieldName, &FindCommandRequest::getSingleBatch}, + {FindCommandRequest::kAllowDiskUseFieldName, &FindCommandRequest::getAllowDiskUse}, + {FindCommandRequest::kReturnKeyFieldName, &FindCommandRequest::getReturnKey}, + {FindCommandRequest::kShowRecordIdFieldName, &FindCommandRequest::getShowRecordId}, + {FindCommandRequest::kTailableFieldName, &FindCommandRequest::getTailable}, + {FindCommandRequest::kAwaitDataFieldName, &FindCommandRequest::getAwaitData}, + {FindCommandRequest::kAllowPartialResultsFieldName, + &FindCommandRequest::getAllowPartialResults}, + {FindCommandRequest::kMirroredFieldName, &FindCommandRequest::getMirrored}, +}; +std::vector<std::pair<StringData, std::function<const BSONObj(const FindCommandRequest&)>>> + objArgMap = { + {FindCommandRequest::kCollationFieldName, &FindCommandRequest::getCollation}, + +}; + +void addRemainingFindCommandFields(BSONObjBuilder* bob, + const FindCommandRequest& findCommand, + const SerializationOptions& opts) { + for (auto [fieldName, getterFunction] : boolArgMap) { + auto optBool = getterFunction(findCommand); + if (optBool.has_value()) { + opts.appendLiteral(bob, fieldName, optBool.value_or(false)); + } + } + auto collation = findCommand.getCollation(); + if (!collation.isEmpty()) { + opts.appendLiteral(bob, FindCommandRequest::kCollationFieldName, collation); + } +} +BSONObj redactHintComponent(BSONObj obj, const SerializationOptions& opts, bool redactValues) { + BSONObjBuilder bob; + for (BSONElement elem : obj) { + if (hintSpecialField.compare(elem.fieldName()) == 0) { + tassert(7421703, + "Hinted field must be a string with $hint operator", + elem.type() == BSONType::String); + bob.append(hintSpecialField, opts.serializeFieldPathFromString(elem.String())); + continue; + } + + // $natural doesn't need to be redacted. + if (elem.fieldNameStringData().compare(query_request_helper::kNaturalSortField) == 0) { + bob.append(elem); + continue; + } + + if (opts.replacementForLiteralArgs && redactValues) { + bob.append(opts.serializeFieldPathFromString(elem.fieldName()), + opts.replacementForLiteralArgs.get()); + } else { + bob.appendAs(elem, opts.serializeFieldPathFromString(elem.fieldName())); + } + } + return bob.obj(); +} + +/** + * In a let specification all field names are variable names, and all values are either expressions + * or constants. + */ +BSONObj redactLetSpec(BSONObj letSpec, + const SerializationOptions& opts, + boost::intrusive_ptr<ExpressionContext> expCtx) { + + BSONObjBuilder bob; + for (BSONElement elem : letSpec) { + auto redactedValue = + Expression::parseOperand(expCtx.get(), elem, expCtx->variablesParseState) + ->serialize(opts); + // Note that this will throw on deeply nested let variables. + redactedValue.addToBsonObj(&bob, opts.serializeFieldPathFromString(elem.fieldName())); + } + return bob.obj(); +} + +BSONObj extractQueryShape(const FindCommandRequest& findCommand, + const SerializationOptions& opts, + const boost::intrusive_ptr<ExpressionContext>& expCtx) { + BSONObjBuilder bob; + // Serialize the namespace as part of the query shape. + { + BSONObjBuilder cmdNs = bob.subobjStart("cmdNs"); + auto ns = findCommand.getNamespaceOrUUID(); + if (ns.nss()) { + auto nss = ns.nss().value(); + if (nss.tenantId()) { + cmdNs.append("tenantId", + opts.serializeIdentifier(nss.tenantId().value().toString())); + } + cmdNs.append("db", opts.serializeIdentifier(nss.db())); + cmdNs.append("coll", opts.serializeIdentifier(nss.coll())); + } else { + cmdNs.append("uuid", opts.serializeIdentifier(ns.uuid()->toString())); + } + cmdNs.done(); + } + + // Redact the namespace of the command. + { + auto nssOrUUID = findCommand.getNamespaceOrUUID(); + std::string toSerialize; + if (nssOrUUID.uuid()) { + toSerialize = opts.serializeIdentifier(nssOrUUID.toString()); + } else { + // Database is set at the command level, only serialize the collection here. + toSerialize = opts.serializeIdentifier(nssOrUUID.nss()->coll()); + } + bob.append(FindCommandRequest::kCommandName, toSerialize); + } + + std::unique_ptr<MatchExpression> filterExpr; + // Filter. + { + auto filter = findCommand.getFilter(); + filterExpr = uassertStatusOKWithContext( + MatchExpressionParser::parse(findCommand.getFilter(), + expCtx, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures), + "Failed to parse 'filter' option when making telemetry key"); + bob.append(FindCommandRequest::kFilterFieldName, filterExpr->serialize(opts)); + } + + // Let Spec. + if (auto letSpec = findCommand.getLet()) { + auto redactedObj = redactLetSpec(letSpec.get(), opts, expCtx); + auto ownedObj = redactedObj.getOwned(); + bob.append(FindCommandRequest::kLetFieldName, std::move(ownedObj)); + } + + if (!findCommand.getProjection().isEmpty()) { + // Parse to Projection + auto projection = + projection_ast::parseAndAnalyze(expCtx, + findCommand.getProjection(), + filterExpr.get(), + findCommand.getFilter(), + ProjectionPolicies::findProjectionPolicies()); + + bob.append(FindCommandRequest::kProjectionFieldName, + projection_ast::serialize(*projection.root(), opts)); + } + + // Assume the hint is correct and contains field names. It is possible that this hint + // doesn't actually represent an index, but we can't detect that here. + // Hint, max, and min won't serialize if the object is empty. + if (!findCommand.getHint().isEmpty()) { + bob.append(FindCommandRequest::kHintFieldName, + redactHintComponent(findCommand.getHint(), opts, false)); + // Max/Min aren't valid without hint. + if (!findCommand.getMax().isEmpty()) { + bob.append(FindCommandRequest::kMaxFieldName, + redactHintComponent(findCommand.getMax(), opts, true)); + } + if (!findCommand.getMin().isEmpty()) { + bob.append(FindCommandRequest::kMinFieldName, + redactHintComponent(findCommand.getMin(), opts, true)); + } + } + + // Sort. + if (!findCommand.getSort().isEmpty()) { + bob.append(FindCommandRequest::kSortFieldName, + query_shape::sortShape(findCommand.getSort(), expCtx, opts)); + } + + // Fields for literal redaction. Adds limit, skip, batchSize, maxTimeMS, and noCursorTimeOut + addLiteralFields(&bob, findCommand, opts); + + // Add the fields that require no redaction. + addRemainingFindCommandFields(&bob, findCommand, opts); + + return bob.obj(); +} } // namespace mongo::query_shape |