diff options
author | Ruoxin Xu <ruoxin.xu@mongodb.com> | 2022-03-15 15:40:06 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-15 16:40:02 +0000 |
commit | 63a4f1e011860e34b8a29b6171e3cbdd58f15c8d (patch) | |
tree | 9040f6bb43278ae541d1c647d5361b3234abb7a5 /src | |
parent | e43a65ada2aea66c9ab21134bbd72a71fa581758 (diff) | |
download | mongo-63a4f1e011860e34b8a29b6171e3cbdd58f15c8d.tar.gz |
SERVER-61421 Change SBE plan cache key encoding to reflect auto-parameterization
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/matcher/expression.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_walker.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query_encoder.cpp | 436 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/query/query_feature_flags.idl | 7 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.h | 1 |
7 files changed, 459 insertions, 14 deletions
diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 812781a3ad9..b844f33dd75 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -208,7 +208,7 @@ public: using Iterator = MatchExpressionIterator<false>; using ConstIterator = MatchExpressionIterator<true>; - using InputParamId = int64_t; + using InputParamId = int32_t; /** * Tracks the information needed to generate a document validation error for a diff --git a/src/mongo/db/pipeline/expression_walker.h b/src/mongo/db/pipeline/expression_walker.h index 6aec35ed7af..b492bd0ad2b 100644 --- a/src/mongo/db/pipeline/expression_walker.h +++ b/src/mongo/db/pipeline/expression_walker.h @@ -60,7 +60,7 @@ template <typename Node, typename Walker> inline constexpr auto hasVoidPreVisit = stdx::is_detected_exact_v<void, PreVisit, Walker, MaybeConstPtr<std::is_const_v<Node>, Node>>; /** - * hasVoidPreVisit is a template variable indicating whether such a pointer-returning member + * hasPtrPreVisit is a template variable indicating whether such a pointer-returning member * function exists for a given Walker type when called on a pointer to our Node type. */ template <typename Node, typename Walker> diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index ec8c798ca5e..82d496b1ba9 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -203,10 +203,11 @@ Status CanonicalQuery::init(OperationContext* opCtx, } auto unavailableMetadata = validStatus.getValue(); _root = MatchExpression::normalize(std::move(root)); - if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) { - // TODO SERVER-61421: Call 'MatchExpression::parameterize()' on '_root' in order to enable - // auto-parameterization. This cannot be done until the SBE plan cache code is prepared to - // deal with auto-parameterized queries. + if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() && + feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + // Both the SBE plan cache and auto-parameterization are enabled. Add parameter markers to + // the appropriate match expression leaf nodes. + MatchExpression::parameterize(_root.get()); } // The tree must always be valid after normalization. dassert(isValid(_root.get(), *_findCommand).isOK()); diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index 4892e62fdc2..9bd348b20fe 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -29,16 +29,22 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery -#include "mongo/platform/basic.h" - #include "mongo/db/query/canonical_query_encoder.h" #include <boost/iterator/transform_iterator.hpp> #include "mongo/base/simple_string_data_comparator.h" #include "mongo/db/matcher/expression_array.h" +#include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/matcher/expression_text.h" +#include "mongo/db/matcher/expression_text_noop.h" +#include "mongo/db/matcher/expression_where.h" +#include "mongo/db/matcher/expression_where_noop.h" #include "mongo/db/query/projection.h" +#include "mongo/db/query/query_feature_flags_gen.h" +#include "mongo/db/query/query_knobs_gen.h" +#include "mongo/db/query/tree_walker.h" #include "mongo/logv2/log.h" #include "mongo/util/base64.h" @@ -78,11 +84,35 @@ const char kEncodeRegexFlagsSeparator = '/'; const char kEncodeSortSection = '~'; const char kEncodeEngineSection = '@'; +// These special bytes are used in the encoding of auto-parameterized match expressions in the SBE +// plan cache key. + +// Precedes the id number of a parameter marker. +const char kEncodeParamMarker = '?'; +// Precedes the encoding of a constant when that constant has not been auto-paramterized. The +// constant is typically encoded as a BSON type byte followed by a BSON value (without the +// BSONElement's field name). +const char kEncodeConstantLiteralMarker = ':'; + +/** + * AppendChar provides the compiler with a type for a "appendChar(...)" member function. + */ +template <class BuilderType> +using AppendChar = decltype(std::declval<BuilderType>().appendChar(std::declval<char>())); + +/** + * hasAppendChar is a template variable indicating whether such a void-returning member function + * exists for a 'BuilderType'. + */ +template <typename BuilderType> +inline constexpr auto hasAppendChar = stdx::is_detected_exact_v<void, AppendChar, BuilderType>; + /** - * Encode user-provided string. Cache key delimiters seen in the - * user string are escaped with a backslash. + * Encode user-provided string. Cache key delimiters seen in the user string are escaped with a + * backslash. */ -void encodeUserString(StringData s, StringBuilder* keyBuilder) { +template <class BuilderType> +void encodeUserString(StringData s, BuilderType* builder) { for (size_t i = 0; i < s.size(); ++i) { char c = s[i]; switch (c) { @@ -95,11 +125,21 @@ void encodeUserString(StringData s, StringBuilder* keyBuilder) { case kEncodeRegexFlagsSeparator: case kEncodeSortSection: case kEncodeEngineSection: + case kEncodeParamMarker: + case kEncodeConstantLiteralMarker: case '\\': - *keyBuilder << '\\'; + if constexpr (hasAppendChar<BuilderType>) { + builder->appendChar('\\'); + } else { + *builder << '\\'; + } // Fall through to default case. default: - *keyBuilder << c; + if constexpr (hasAppendChar<BuilderType>) { + builder->appendChar(c); + } else { + *builder << c; + } } } } @@ -621,7 +661,380 @@ CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) { return keyBuilder.str(); } +namespace { +/** + * A visitor intended for use in combination with the corresponding walker class below to encode a + * 'MatchExpression' into the SBE plan cache key. + * + * Handles potentially parameterized queries, in which case parameter markers are encoded into the + * cache key in place of the actual constant values. + */ +class MatchExpressionSbePlanCacheKeySerializationVisitor final + : public MatchExpressionConstVisitor { +public: + explicit MatchExpressionSbePlanCacheKeySerializationVisitor(BufBuilder* builder) + : _builder(builder) { + invariant(_builder); + } + + void visit(const BitsAllClearMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAllSetMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAnyClearMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAnySetMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + + void visit(const ExistsMatchExpression* expr) final { + encodeRhs(expr); + } + + void visit(const ExprMatchExpression* expr) final { + encodeFull(expr); + } + + void visit(const EqualityMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const GTEMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const GTMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const LTEMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const LTMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const InMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const ModMatchExpression* expr) final { + auto divisorParam = expr->getDivisorInputParamId(); + auto remainderParam = expr->getRemainderInputParamId(); + if (divisorParam) { + tassert(6142105, + "$mod expression had divisor param but not remainder param", + remainderParam); + encodeParamMarker(*divisorParam); + encodeParamMarker(*remainderParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142106, + "$mod expression had remainder param but not divisor param", + !remainderParam); + encodeRhs(expr); + } + } + + void visit(const RegexMatchExpression* expr) final { + auto sourceRegexParam = expr->getSourceRegexInputParamId(); + auto compiledRegexParam = expr->getCompiledRegexInputParamId(); + if (sourceRegexParam) { + tassert(6142107, + "regex expression had source param but not compiled param", + compiledRegexParam); + encodeParamMarker(*sourceRegexParam); + encodeParamMarker(*compiledRegexParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142108, + "regex expression had compiled param but not source param", + !compiledRegexParam); + encodeRhs(expr); + } + } + + void visit(const SizeMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const TextMatchExpression* expr) final { + encodeFull(expr); + } + void visit(const TextNoOpMatchExpression* expr) final { + encodeFull(expr); + } + + void visit(const TypeMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const WhereMatchExpression* expr) final { + encodeSingleParamNode(expr); + } + void visit(const WhereNoOpMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142109); + } + + /** + * Nothing needs to be encoded for these nodes beyond their type, their path (if they have one), + * and their children. + */ + void visit(const AlwaysFalseMatchExpression* expr) final {} + void visit(const AlwaysTrueMatchExpression* expr) final {} + void visit(const AndMatchExpression* expr) final {} + void visit(const ElemMatchObjectMatchExpression* matchExpr) final {} + void visit(const NorMatchExpression* expr) final {} + void visit(const NotMatchExpression* expr) final {} + void visit(const OrMatchExpression* expr) final {} + // The 'InternalExpr*' match expressions are generated internally from a $expr, so they do not + // need to contribute anything else to the cache key. + void visit(const InternalExprEqMatchExpression* expr) final {} + void visit(const InternalExprGTEMatchExpression* expr) final {} + void visit(const InternalExprGTMatchExpression* expr) final {} + void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalExprLTMatchExpression* expr) final {} + + /** + * These node types are not yet supported in SBE. + */ + void visit(const ElemMatchValueMatchExpression* matchExpr) final { + MONGO_UNREACHABLE_TASSERT(6142110); + } + void visit(const GeoMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142111); + } + void visit(const GeoNearMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142112); + } + void visit(const InternalBucketGeoWithinMatchExpression* expr) final { + // This is only used for time-series collections, but SBE isn't yet used for querying + // time-series collections. + MONGO_UNREACHABLE_TASSERT(6142113); + } + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142114); + } + void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142115); + } + void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142116); + } + void visit(const InternalSchemaBinDataSubTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142117); + } + void visit(const InternalSchemaCondMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142118); + } + void visit(const InternalSchemaEqMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142119); + } + void visit(const InternalSchemaFmodMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142120); + } + void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142121); + } + void visit(const InternalSchemaMaxItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142122); + } + void visit(const InternalSchemaMaxLengthMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142123); + } + void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142124); + } + void visit(const InternalSchemaMinItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142125); + } + void visit(const InternalSchemaMinLengthMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142126); + } + void visit(const InternalSchemaMinPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142127); + } + void visit(const InternalSchemaObjectMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142128); + } + void visit(const InternalSchemaRootDocEqMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142129); + } + void visit(const InternalSchemaTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142130); + } + void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142131); + } + void visit(const InternalSchemaXorMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142132); + } + // Used in the implementation of geoNear, which is not yet supported in SBE. + void visit(const TwoDPtInAnnulusExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142133); + } + +private: + /** + * Encodes a 'PathMatchExpression' node of type T whose constant can be replaced with a single + * parameter marker. If the parameter marker is not present, encodes the node's BSON constant + * into the cache key. + */ + template <typename T, + typename = std::enable_if_t<std::is_convertible_v<T*, PathMatchExpression*>>> + void encodeSingleParamPathNode(const T* expr) { + if (expr->getInputParamId()) { + encodeParamMarker(*expr->getInputParamId()); + } else { + encodeRhs(expr); + } + } + + /** + * Encodes a non-path 'MatchExpression' node of type T whose constant can be replaced with a + * single parameter marker. If the parameter marker is not present, encodes the entire node into + * the cache key. + */ + template <typename T> + void encodeSingleParamNode(const T* expr) { + static_assert(!std::is_convertible_v<T*, PathMatchExpression*>); + if (expr->getInputParamId()) { + encodeParamMarker(*expr->getInputParamId()); + } else { + encodeFull(expr); + } + } + + void encodeBitTestExpression(const BitTestMatchExpression* expr) { + auto bitPositionsParam = expr->getBitPositionsParamId(); + auto bitMaskParam = expr->getBitMaskParamId(); + if (bitPositionsParam) { + tassert(6142100, + "bit-test expression had bit positions param but not bitmask param", + bitMaskParam); + encodeParamMarker(*bitPositionsParam); + encodeParamMarker(*bitMaskParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142101, + "bit-test expression had bitmask param but not bit positions param", + !bitMaskParam); + encodeRhs(expr); + } + } + + /** + * Adds a special parameter marker byte to the cache key, followed by a four byte integer for + * the parameter id. + */ + void encodeParamMarker(MatchExpression::InputParamId paramId) { + _builder->appendChar(kEncodeParamMarker); + _builder->appendNum(paramId); + } + + /** + * For path match expressions which can be written as {"some.path": {$operator: <RHS>}}, encodes + * the right-hand side portion of the expression verbatim. Illegal to call if 'expr' has a + * parameter marker. + */ + void encodeRhs(const PathMatchExpression* expr) { + encodeHelper(expr->getSerializedRightHandSide()); + } + + /** + * Similar to 'encodeRhs()' above, but for non-path match expressions. In this case, rather than + * encode just the right-hand side, we call 'serialize()' to get a serialized version of the + * full expression, and encode the result into the plan cache key. Illegal to call if 'expr' has + * a parameter marker. + */ + void encodeFull(const MatchExpression* expr) { + encodeHelper(expr->serialize()); + } + + void encodeHelper(BSONObj toEncode) { + tassert(6142102, "expected object to encode to be non-empty", !toEncode.isEmpty()); + BSONObjIterator objIter{toEncode}; + BSONElement firstElem = objIter.next(); + tassert(6142103, "expected object to encode to have exactly one element", !objIter.more()); + encodeBsonValue(firstElem); + } + + /** + * Encodes a special byte to mark a constant, followed by a byte for the BSON type of 'elem', + * followed by the bytes of the value part of 'elem' (for types that have such a value). + * + * Note that the element's field name is not encoded, just the type and value. + */ + void encodeBsonValue(BSONElement elem) { + _builder->appendChar(kEncodeConstantLiteralMarker); + _builder->appendChar(elem.type()); + _builder->appendBuf(elem.value(), elem.valuesize()); + } + + BufBuilder* const _builder; +}; + +/** + * A tree walker which walks a 'MatchExpression' tree and encodes the corresponding portion of the + * SBE plan cache key into 'builder'. + * + * Handles potentially parameterized queries, in which case parameter markers are encoded into the + * cache key in place of the actual constant values. + */ +class MatchExpressionSbePlanCacheKeySerializationWalker { +public: + explicit MatchExpressionSbePlanCacheKeySerializationWalker(BufBuilder* builder) + : _builder{builder}, _visitor{_builder} { + invariant(_builder); + } + + void preVisit(const MatchExpression* expr) { + // Encode the type of the node as well as the path (if there is a non-empty path). + _builder->appendStr(encodeMatchType(expr->matchType())); + encodeUserString(expr->path(), _builder); + + // The node encodes itself, and then its children. + expr->acceptVisitor(&_visitor); + + if (expr->numChildren() > 0) { + _builder->appendChar(kEncodeChildrenBegin); + } + } + + void inVisit(long count, const MatchExpression* expr) { + _builder->appendChar(kEncodeChildrenSeparator); + } + + void postVisit(const MatchExpression* expr) { + if (expr->numChildren() > 0) { + _builder->appendChar(kEncodeChildrenEnd); + } + } + +private: + BufBuilder* const _builder; + MatchExpressionSbePlanCacheKeySerializationVisitor _visitor; +}; + +/** + * Given a 'matchExpr' which may have parameter markers, encodes a key into 'builder' with the + * following property: Two match expression trees which are identical after auto-parameterization + * have the same key, otherwise the keys must differ. + */ +void encodeKeyForAutoParameterizedMatchSBE(MatchExpression* matchExpr, BufBuilder* builder) { + MatchExpressionSbePlanCacheKeySerializationWalker walker{builder}; + tree_walker::walk<true, MatchExpression>(matchExpr, &walker); +} +} // namespace + std::string encodeSBE(const CanonicalQuery& cq) { + tassert(6142104, + "attempting to encode SBE plan cache key for SBE-incompatible query", + cq.isSbeCompatible()); + const auto& filter = cq.getQueryObj(); const auto& proj = cq.getFindCommandRequest().getProjection(); const auto& sort = cq.getFindCommandRequest().getSort(); @@ -639,7 +1052,14 @@ std::string encodeSBE(const CanonicalQuery& cq) { kBufferSizeConstant + (let ? let->objsize() : 0); BufBuilder bufBuilder(bufSize); - bufBuilder.appendBuf(filter.objdata(), filter.objsize()); + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + encodeKeyForAutoParameterizedMatchSBE(cq.root(), &bufBuilder); + } else { + // When auto-parameterization is off, just add the entire filter BSON to the cache key, + // including any constants. + bufBuilder.appendBuf(filter.objdata(), filter.objsize()); + } + bufBuilder.appendBuf(proj.objdata(), proj.objsize()); // TODO SERVER-62100: No need to encode the entire "let" object. if (let) { diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 797a61f064e..106889832e8 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -281,6 +281,13 @@ void fillOutIndexEntries(OperationContext* opCtx, CanonicalQuery* canonicalQuery, const CollectionPtr& collection, std::vector<IndexEntry>& entries) { + // TODO SERVER-63352: Eliminate this check once we support auto-parameterized index scan plans. + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + // Indexed plans are not yet supported when auto-parameterization is enabled, so make it + // look to the planner like there are no indexes. + return; + } + auto ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); while (ii->more()) { const IndexCatalogEntry* ice = ii->next(); @@ -984,6 +991,15 @@ protected: invariant(descriptor); invariant(plannerParams); + // Auto-parameterization currently only works for collection scan plans, but idhack plans + // use the _id index. Therefore, we inhibit idhack when auto-parametrization is enabled. + // + // TODO SERVER-64237: Eliminate this check once we support auto-parameterized ID hack + // plans. + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + return nullptr; + } + tassert(5536100, "SBE cannot handle query with metadata", !_cq->metadataDeps()[DocumentMetadataFields::kSortKey]); diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl index b3565da4f5f..b60ea4d04cc 100644 --- a/src/mongo/db/query/query_feature_flags.idl +++ b/src/mongo/db/query/query_feature_flags.idl @@ -82,6 +82,13 @@ feature_flags: cpp_varname: gFeatureFlagSbePlanCache default: false + featureFlagAutoParameterization: + description: "Feature flag for enabling auto-parameterization of match expressions. This + feature is used to store auto-parameterized plans in the SBE plan cache, so this flag is only + meaningful to turn on in combination with 'featureFlagSbePlanCache'." + cpp_varname: gFeatureFlagAutoParameterization + default: false + featureFlagSortArray: description: "Feature flag for allowing use of the $sortArray aggregation expression" cpp_varname: gFeatureFlagSortArray diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index f5a0904a773..852f187c6f0 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -334,6 +334,7 @@ private: } else { debugInfo.reset(); } + inputParamToSlotMap = other.inputParamToSlotMap; } }; |