summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/query_shape.cpp
diff options
context:
space:
mode:
authorMaddie Zechar <mez2113@columbia.edu>2023-04-26 20:38:19 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-26 22:42:46 +0000
commitb9de639b5fca5794c83314b1a302008569d51bd3 (patch)
tree631fc1a293057ac6ad4040ca0e368ad152587f1a /src/mongo/db/query/query_shape.cpp
parent40bdd0800be643d444094055ab86250eef0802ab (diff)
downloadmongo-b9de639b5fca5794c83314b1a302008569d51bd3.tar.gz
SERVER-75156 separate queryShape from telemetryKey for find requests
Diffstat (limited to 'src/mongo/db/query/query_shape.cpp')
-rw-r--r--src/mongo/db/query/query_shape.cpp210
1 files changed, 210 insertions, 0 deletions
diff --git a/src/mongo/db/query/query_shape.cpp b/src/mongo/db/query/query_shape.cpp
index 9136af59ab1..ec5179ab96d 100644
--- a/src/mongo/db/query/query_shape.cpp
+++ b/src/mongo/db/query/query_shape.cpp
@@ -31,6 +31,13 @@
#include "query_request_helper.h"
#include "sort_pattern.h"
+#include "mongo/base/status.h"
+#include "mongo/db/query/find_command_gen.h"
+#include "mongo/db/query/plan_explainer.h"
+#include "mongo/db/query/projection_ast_util.h"
+#include "mongo/db/query/projection_parser.h"
+#include "mongo/db/query/sort_pattern.h"
+
namespace mongo::query_shape {
BSONObj debugPredicateShape(const MatchExpression* predicate) {
@@ -95,4 +102,207 @@ BSONObj sortShape(const BSONObj& sortSpec,
return bob.obj();
}
+static std::string hintSpecialField = "$hint";
+void addLiteralFields(BSONObjBuilder* bob,
+ const FindCommandRequest& findCommand,
+ const SerializationOptions& opts) {
+
+ if (auto limit = findCommand.getLimit()) {
+ opts.appendLiteral(
+ bob, FindCommandRequest::kLimitFieldName, static_cast<long long>(*limit));
+ }
+ if (auto skip = findCommand.getSkip()) {
+ opts.appendLiteral(bob, FindCommandRequest::kSkipFieldName, static_cast<long long>(*skip));
+ }
+ if (auto batchSize = findCommand.getBatchSize()) {
+ opts.appendLiteral(
+ bob, FindCommandRequest::kBatchSizeFieldName, static_cast<long long>(*batchSize));
+ }
+ if (auto maxTimeMs = findCommand.getMaxTimeMS()) {
+ opts.appendLiteral(bob, FindCommandRequest::kMaxTimeMSFieldName, *maxTimeMs);
+ }
+ if (auto noCursorTimeout = findCommand.getNoCursorTimeout()) {
+ opts.appendLiteral(
+ bob, FindCommandRequest::kNoCursorTimeoutFieldName, bool(noCursorTimeout));
+ }
+}
+
+static std::vector<
+ std::pair<StringData, std::function<const OptionalBool(const FindCommandRequest&)>>>
+ boolArgMap = {
+ {FindCommandRequest::kSingleBatchFieldName, &FindCommandRequest::getSingleBatch},
+ {FindCommandRequest::kAllowDiskUseFieldName, &FindCommandRequest::getAllowDiskUse},
+ {FindCommandRequest::kReturnKeyFieldName, &FindCommandRequest::getReturnKey},
+ {FindCommandRequest::kShowRecordIdFieldName, &FindCommandRequest::getShowRecordId},
+ {FindCommandRequest::kTailableFieldName, &FindCommandRequest::getTailable},
+ {FindCommandRequest::kAwaitDataFieldName, &FindCommandRequest::getAwaitData},
+ {FindCommandRequest::kAllowPartialResultsFieldName,
+ &FindCommandRequest::getAllowPartialResults},
+ {FindCommandRequest::kMirroredFieldName, &FindCommandRequest::getMirrored},
+};
+std::vector<std::pair<StringData, std::function<const BSONObj(const FindCommandRequest&)>>>
+ objArgMap = {
+ {FindCommandRequest::kCollationFieldName, &FindCommandRequest::getCollation},
+
+};
+
+void addRemainingFindCommandFields(BSONObjBuilder* bob,
+ const FindCommandRequest& findCommand,
+ const SerializationOptions& opts) {
+ for (auto [fieldName, getterFunction] : boolArgMap) {
+ auto optBool = getterFunction(findCommand);
+ if (optBool.has_value()) {
+ opts.appendLiteral(bob, fieldName, optBool.value_or(false));
+ }
+ }
+ auto collation = findCommand.getCollation();
+ if (!collation.isEmpty()) {
+ opts.appendLiteral(bob, FindCommandRequest::kCollationFieldName, collation);
+ }
+}
+BSONObj redactHintComponent(BSONObj obj, const SerializationOptions& opts, bool redactValues) {
+ BSONObjBuilder bob;
+ for (BSONElement elem : obj) {
+ if (hintSpecialField.compare(elem.fieldName()) == 0) {
+ tassert(7421703,
+ "Hinted field must be a string with $hint operator",
+ elem.type() == BSONType::String);
+ bob.append(hintSpecialField, opts.serializeFieldPathFromString(elem.String()));
+ continue;
+ }
+
+ // $natural doesn't need to be redacted.
+ if (elem.fieldNameStringData().compare(query_request_helper::kNaturalSortField) == 0) {
+ bob.append(elem);
+ continue;
+ }
+
+ if (opts.replacementForLiteralArgs && redactValues) {
+ bob.append(opts.serializeFieldPathFromString(elem.fieldName()),
+ opts.replacementForLiteralArgs.get());
+ } else {
+ bob.appendAs(elem, opts.serializeFieldPathFromString(elem.fieldName()));
+ }
+ }
+ return bob.obj();
+}
+
+/**
+ * In a let specification all field names are variable names, and all values are either expressions
+ * or constants.
+ */
+BSONObj redactLetSpec(BSONObj letSpec,
+ const SerializationOptions& opts,
+ boost::intrusive_ptr<ExpressionContext> expCtx) {
+
+ BSONObjBuilder bob;
+ for (BSONElement elem : letSpec) {
+ auto redactedValue =
+ Expression::parseOperand(expCtx.get(), elem, expCtx->variablesParseState)
+ ->serialize(opts);
+ // Note that this will throw on deeply nested let variables.
+ redactedValue.addToBsonObj(&bob, opts.serializeFieldPathFromString(elem.fieldName()));
+ }
+ return bob.obj();
+}
+
+BSONObj extractQueryShape(const FindCommandRequest& findCommand,
+ const SerializationOptions& opts,
+ const boost::intrusive_ptr<ExpressionContext>& expCtx) {
+ BSONObjBuilder bob;
+ // Serialize the namespace as part of the query shape.
+ {
+ BSONObjBuilder cmdNs = bob.subobjStart("cmdNs");
+ auto ns = findCommand.getNamespaceOrUUID();
+ if (ns.nss()) {
+ auto nss = ns.nss().value();
+ if (nss.tenantId()) {
+ cmdNs.append("tenantId",
+ opts.serializeIdentifier(nss.tenantId().value().toString()));
+ }
+ cmdNs.append("db", opts.serializeIdentifier(nss.db()));
+ cmdNs.append("coll", opts.serializeIdentifier(nss.coll()));
+ } else {
+ cmdNs.append("uuid", opts.serializeIdentifier(ns.uuid()->toString()));
+ }
+ cmdNs.done();
+ }
+
+ // Redact the namespace of the command.
+ {
+ auto nssOrUUID = findCommand.getNamespaceOrUUID();
+ std::string toSerialize;
+ if (nssOrUUID.uuid()) {
+ toSerialize = opts.serializeIdentifier(nssOrUUID.toString());
+ } else {
+ // Database is set at the command level, only serialize the collection here.
+ toSerialize = opts.serializeIdentifier(nssOrUUID.nss()->coll());
+ }
+ bob.append(FindCommandRequest::kCommandName, toSerialize);
+ }
+
+ std::unique_ptr<MatchExpression> filterExpr;
+ // Filter.
+ {
+ auto filter = findCommand.getFilter();
+ filterExpr = uassertStatusOKWithContext(
+ MatchExpressionParser::parse(findCommand.getFilter(),
+ expCtx,
+ ExtensionsCallbackNoop(),
+ MatchExpressionParser::kAllowAllSpecialFeatures),
+ "Failed to parse 'filter' option when making telemetry key");
+ bob.append(FindCommandRequest::kFilterFieldName, filterExpr->serialize(opts));
+ }
+
+ // Let Spec.
+ if (auto letSpec = findCommand.getLet()) {
+ auto redactedObj = redactLetSpec(letSpec.get(), opts, expCtx);
+ auto ownedObj = redactedObj.getOwned();
+ bob.append(FindCommandRequest::kLetFieldName, std::move(ownedObj));
+ }
+
+ if (!findCommand.getProjection().isEmpty()) {
+ // Parse to Projection
+ auto projection =
+ projection_ast::parseAndAnalyze(expCtx,
+ findCommand.getProjection(),
+ filterExpr.get(),
+ findCommand.getFilter(),
+ ProjectionPolicies::findProjectionPolicies());
+
+ bob.append(FindCommandRequest::kProjectionFieldName,
+ projection_ast::serialize(*projection.root(), opts));
+ }
+
+ // Assume the hint is correct and contains field names. It is possible that this hint
+ // doesn't actually represent an index, but we can't detect that here.
+ // Hint, max, and min won't serialize if the object is empty.
+ if (!findCommand.getHint().isEmpty()) {
+ bob.append(FindCommandRequest::kHintFieldName,
+ redactHintComponent(findCommand.getHint(), opts, false));
+ // Max/Min aren't valid without hint.
+ if (!findCommand.getMax().isEmpty()) {
+ bob.append(FindCommandRequest::kMaxFieldName,
+ redactHintComponent(findCommand.getMax(), opts, true));
+ }
+ if (!findCommand.getMin().isEmpty()) {
+ bob.append(FindCommandRequest::kMinFieldName,
+ redactHintComponent(findCommand.getMin(), opts, true));
+ }
+ }
+
+ // Sort.
+ if (!findCommand.getSort().isEmpty()) {
+ bob.append(FindCommandRequest::kSortFieldName,
+ query_shape::sortShape(findCommand.getSort(), expCtx, opts));
+ }
+
+ // Fields for literal redaction. Adds limit, skip, batchSize, maxTimeMS, and noCursorTimeOut
+ addLiteralFields(&bob, findCommand, opts);
+
+ // Add the fields that require no redaction.
+ addRemainingFindCommandFields(&bob, findCommand, opts);
+
+ return bob.obj();
+}
} // namespace mongo::query_shape