From 95b038c4078a30a797f369b9df1b22ddb427de5f Mon Sep 17 00:00:00 2001 From: Ian Boros Date: Wed, 9 Oct 2019 18:12:41 +0000 Subject: SERVER-42433 use ProjectionAST in canonical query encoding --- src/mongo/db/query/canonical_query_encoder.cpp | 74 +++++++++++----------- .../db/query/canonical_query_encoder_test.cpp | 68 ++++++++++++-------- 2 files changed, 80 insertions(+), 62 deletions(-) diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index 6698e56766c..18ba78b5476 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -38,6 +38,7 @@ #include "mongo/base/simple_string_data_comparator.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/query/projection.h" #include "mongo/util/log.h" namespace mongo { @@ -49,6 +50,7 @@ const char kEncodeChildrenEnd = ']'; const char kEncodeChildrenSeparator = ','; const char kEncodeCollationSection = '#'; const char kEncodeProjectionSection = '|'; +const char kEncodeProjectionRequirementSeparator = '-'; const char kEncodeRegexFlagsSeparator = '/'; const char kEncodeSortSection = '~'; @@ -65,6 +67,7 @@ void encodeUserString(StringData s, StringBuilder* keyBuilder) { case kEncodeChildrenSeparator: case kEncodeCollationSection: case kEncodeProjectionSection: + case kEncodeProjectionRequirementSeparator: case kEncodeRegexFlagsSeparator: case kEncodeSortSection: case '\\': @@ -463,51 +466,48 @@ void encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) { } /** - * Encodes parsed projection into cache key. - * Does a simple toString() on each projected field - * in the BSON object. - * Orders the encoded elements in the projection by field name. - * This handles all the special projection types ($meta, $elemMatch, etc.) + * Encodes projection AST into a cache key. + * + * For projections which have a finite set of required fields (inclusion-only projections), encodes + * those field names in order. + * + * For projections which require the entire document (exclusion projections, projections with + * expressions), the projection section is empty. */ -void encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuilder) { - // Sorts the BSON elements by field name using a map. - std::map elements; +void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* keyBuilder) { + if (!proj || proj->requiresDocument()) { + // Don't encode anything for the projection section to indicate the entire document is + // required. + return; + } - BSONObjIterator it(projObj); - while (it.more()) { - BSONElement elt = it.next(); - StringData fieldName = elt.fieldNameStringData(); + std::vector requiredFields = proj->getRequiredFields(); + invariant(!requiredFields.empty()); - // Internal callers may add $-prefixed fields to the projection. These are not part of a - // user query, and therefore are not considered part of the cache key. - if (fieldName[0] == '$') { - continue; - } + // Keep track of whether we appended the character marking the beginning of the projection + // section. We may not have to if all of the fields in the projection are $-prefixed. + bool appendedStart = false; - elements[fieldName] = elt; - } + // Encode the fields required by the projection in order. + std::sort(requiredFields.begin(), requiredFields.end()); + for (auto&& requiredField : requiredFields) { + invariant(!requiredField.empty()); - if (!elements.empty()) { - *keyBuilder << kEncodeProjectionSection; - } + // Internal callers (e.g, from mongos) may add "$sortKey" to the projection. This is not + // part of the user query, and therefore are not considered part of the cache key. + if (requiredField == "$sortKey") { + continue; + } - // Read elements in order of field name - for (std::map::const_iterator i = elements.begin(); - i != elements.end(); - ++i) { - const BSONElement& elt = (*i).second; + const bool isFirst = !appendedStart; - if (elt.type() != BSONType::Object) { - // For inclusion/exclusion projections, we encode as "i" or "e". - *keyBuilder << (elt.trueValue() ? "i" : "e"); + if (isFirst) { + *keyBuilder << kEncodeProjectionSection; + appendedStart = true; } else { - // For projection operators, we use the verbatim string encoding of the element. - encodeUserString(elt.toString(false, // includeFieldName - false), // full - keyBuilder); + *keyBuilder << kEncodeProjectionRequirementSeparator; } - - encodeUserString(elt.fieldName(), keyBuilder); + encodeUserString(requiredField, keyBuilder); } } } // namespace @@ -518,7 +518,7 @@ CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) { StringBuilder keyBuilder; encodeKeyForMatch(cq.root(), &keyBuilder); encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder); - encodeKeyForProj(cq.getQueryRequest().getProj(), &keyBuilder); + encodeKeyForProj(cq.getProj(), &keyBuilder); encodeCollation(cq.getCollator(), &keyBuilder); return keyBuilder.str(); diff --git a/src/mongo/db/query/canonical_query_encoder_test.cpp b/src/mongo/db/query/canonical_query_encoder_test.cpp index 4086f64402f..1d647910ce8 100644 --- a/src/mongo/db/query/canonical_query_encoder_test.cpp +++ b/src/mongo/db/query/canonical_query_encoder_test.cpp @@ -123,50 +123,68 @@ TEST(CanonicalQueryEncoderTest, ComputeKey) { // With sort testComputeKey("{}", "{a: 1}", "{}", "an~aa"); testComputeKey("{}", "{a: -1}", "{}", "an~da"); - testComputeKey("{}", - "{a: {$meta: 'textScore'}}", - "{a: {$meta: 'textScore'}}", - "an~ta|{ $meta: \"textScore\" }a"); + testComputeKey("{}", "{a: {$meta: 'textScore'}}", "{a: {$meta: 'textScore'}}", "an~ta"); testComputeKey("{a: 1}", "{b: 1}", "{}", "eqa~ab"); // With projection - testComputeKey("{}", "{}", "{a: 1}", "an|ia"); - testComputeKey("{}", "{}", "{a: -1}", "an|ia"); - testComputeKey("{}", "{}", "{a: -1.0}", "an|ia"); - testComputeKey("{}", "{}", "{a: true}", "an|ia"); - testComputeKey("{}", "{}", "{a: 0}", "an|ea"); - testComputeKey("{}", "{}", "{a: false}", "an|ea"); - testComputeKey("{}", "{}", "{a: 99}", "an|ia"); - testComputeKey("{}", "{}", "{a: 'foo'}", "an|ia"); - testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an|{ $slice: \\[ 3\\, 5 \\] }a"); - testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an|{ $elemMatch: { x: 2 } }a"); - testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|ia"); - testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa|ia.$"); - testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|ia"); + testComputeKey("{}", "{}", "{a: 1}", "an|_id-a"); + testComputeKey("{}", "{}", "{a: -1}", "an|_id-a"); + testComputeKey("{}", "{}", "{a: -1.0}", "an|_id-a"); + testComputeKey("{}", "{}", "{a: true}", "an|_id-a"); + testComputeKey("{}", "{}", "{a: 0}", "an"); + testComputeKey("{}", "{}", "{a: false}", "an"); + testComputeKey("{}", "{}", "{a: 99}", "an|_id-a"); + testComputeKey("{}", "{}", "{a: 'foo'}", "an|_id-a"); + // $slice defaults to exclusion. + testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an"); + testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: 0}", "an"); + + // But even when using $slice in an inclusion, the entire document is needed. + testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: 1}", "an"); + + testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an"); + testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}, b: 0}", "an"); + testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}, b: 1}", "an"); + + testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: {$elemMatch: {x: 2}}}", "an"); + + testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|_id-a"); + testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa"); + testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|_id-a"); // Projection should be order-insensitive - testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|iaib"); - testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|iaib"); + testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|_id-a-b"); + testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|_id-a-b"); + + // And should escape the separation character. + testComputeKey("{}", "{}", "{'b-1': 1, 'a-2': 1}", "an|_id-a\\-2-b\\-1"); + + // And should exclude $-prefixed fields which can be added internally. + testComputeKey("{}", "{x: 1}", "{$sortKey: {$meta: 'sortKey'}}", "an~ax"); + testComputeKey("{}", "{}", "{}", "an"); + + testComputeKey("{}", "{x: 1}", "{a: 1, $sortKey: {$meta: 'sortKey'}}", "an~ax|_id-a"); + testComputeKey("{}", "{}", "{a: 1}", "an|_id-a"); // With or-elimination and projection - testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|e_idia"); - testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa|ia.$"); + testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|a"); + testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa"); } // Delimiters found in user field names or non-standard projection field values // must be escaped. TEST(CanonicalQueryEncoderTest, ComputeKeyEscaped) { // Field name in query. - testComputeKey("{'a,[]~|<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|<>"); + testComputeKey("{'a,[]~|-<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|\\-<>"); // Field name in sort. - testComputeKey("{}", "{'a,[]~|<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|<>"); + testComputeKey("{}", "{'a,[]~|-<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|\\-<>"); // Field name in projection. - testComputeKey("{}", "{}", "{'a,[]~|<>': 1}", "an|ia\\,\\[\\]\\~\\|<>"); + testComputeKey("{}", "{}", "{'a,[]~|-<>': 1}", "an|_id-a\\,\\[\\]\\~\\|\\-<>"); // Value in projection. - testComputeKey("{}", "{}", "{a: 'foo,[]~|<>'}", "an|ia"); + testComputeKey("{}", "{}", "{a: 'foo,[]~|-<>'}", "an|_id-a"); } // Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should -- cgit v1.2.1