/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include #include #include "mongo/base/clonable_ptr.h" #include "mongo/base/status.h" #include "mongo/bson/bsonobj.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/matcher/expression_visitor.h" #include "mongo/db/matcher/match_details.h" #include "mongo/db/matcher/matchable.h" #include "mongo/db/pipeline/dependencies.h" #include "mongo/util/fail_point.h" namespace mongo { /** * Enabling the disableMatchExpressionOptimization fail point will stop match expressions from * being optimized. */ extern FailPoint disableMatchExpressionOptimization; class CollatorInterface; class MatchExpression; class TreeMatchExpression; typedef StatusWith> StatusWithMatchExpression; class MatchExpression { MatchExpression(const MatchExpression&) = delete; MatchExpression& operator=(const MatchExpression&) = delete; public: enum MatchType { // tree types AND, OR, // array types ELEM_MATCH_OBJECT, ELEM_MATCH_VALUE, SIZE, // leaf types EQ, LTE, LT, GT, GTE, REGEX, MOD, EXISTS, MATCH_IN, BITS_ALL_SET, BITS_ALL_CLEAR, BITS_ANY_SET, BITS_ANY_CLEAR, // Negations. NOT, NOR, // special types TYPE_OPERATOR, GEO, WHERE, EXPRESSION, // Boolean expressions. ALWAYS_FALSE, ALWAYS_TRUE, // Things that we parse but cannot be answered without an index. GEO_NEAR, TEXT, // Expressions that are only created internally INTERNAL_2D_POINT_IN_ANNULUS, // Used to represent an expression language equality in a match expression tree, since $eq // in the expression language has different semantics than the equality match expression. INTERNAL_EXPR_EQ, // JSON Schema expressions. INTERNAL_SCHEMA_ALLOWED_PROPERTIES, INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX, INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE, INTERNAL_SCHEMA_BIN_DATA_SUBTYPE, INTERNAL_SCHEMA_COND, INTERNAL_SCHEMA_EQ, INTERNAL_SCHEMA_FMOD, INTERNAL_SCHEMA_MATCH_ARRAY_INDEX, INTERNAL_SCHEMA_MAX_ITEMS, INTERNAL_SCHEMA_MAX_LENGTH, INTERNAL_SCHEMA_MAX_PROPERTIES, INTERNAL_SCHEMA_MIN_ITEMS, INTERNAL_SCHEMA_MIN_LENGTH, INTERNAL_SCHEMA_MIN_PROPERTIES, INTERNAL_SCHEMA_OBJECT_MATCH, INTERNAL_SCHEMA_ROOT_DOC_EQ, INTERNAL_SCHEMA_TYPE, INTERNAL_SCHEMA_UNIQUE_ITEMS, INTERNAL_SCHEMA_XOR, }; /** * An iterator to walk through the children expressions of the given MatchExpressions. Along * with the defined 'begin()' and 'end()' functions, which take a reference to a * MatchExpression, this iterator can be used with a range-based loop. For example, * * const MatchExpression* expr = makeSomeExpression(); * for (const auto& child : *expr) { * ... * } * * When incrementing the iterator, no checks are made to ensure the iterator does not pass * beyond the boundary. The caller is responsible to compare the iterator against an iterator * referring to the past-the-end child in the given expression, which can be obtained using * the 'mongo::end(*expr)' call. */ template class MatchExpressionIterator { public: MatchExpressionIterator(const MatchExpression* expr, size_t index) : _expr(expr), _index(index) {} template > MatchExpressionIterator(const MatchExpressionIterator& other) : _expr(other._expr), _index(other._index) {} template > MatchExpressionIterator& operator=(const MatchExpressionIterator& other) { _expr = other._expr; _index = other._index; return *this; } MatchExpressionIterator& operator++() { ++_index; return *this; } MatchExpressionIterator operator++(int) { const auto ret{*this}; ++(*this); return ret; } bool operator==(const MatchExpressionIterator& other) const { return _expr == other._expr && _index == other._index; } bool operator!=(const MatchExpressionIterator& other) const { return !(*this == other); } template auto operator*() const -> std::enable_if_t { return _expr->getChild(_index); } template auto operator*() const -> std::enable_if_t { return _expr->getChild(_index); } private: const MatchExpression* _expr; size_t _index; }; using Iterator = MatchExpressionIterator; using ConstIterator = MatchExpressionIterator; /** * Tracks the information needed to generate a document validation error for a * MatchExpression node. */ struct ErrorAnnotation { /** * Enumerated type describing what action to take when generating errors for document * validation failures. */ enum class Mode { // Do not generate an error for this MatchExpression or any of its children. kIgnore, // Do not generate an error for this MatchExpression, but iterate over any children // as they may provide useful information. This is particularly useful for translated // jsonSchema keywords. kIgnoreButDescend, // Generate an error message. kGenerateError, }; /** * Constructs an annotation for a MatchExpression which does not contribute to error output. */ ErrorAnnotation(Mode mode) : operatorName(""), annotation(BSONObj()), mode(mode) { invariant(mode != Mode::kGenerateError); } /** * Constructs a complete annotation for a MatchExpression which contributes to error output. */ ErrorAnnotation(std::string operatorName, BSONObj annotation) : operatorName(std::move(operatorName)), annotation(annotation.getOwned()), mode(Mode::kGenerateError) {} std::unique_ptr clone() const { return std::make_unique(*this); } const std::string operatorName; // Tracks the original expression as specified by the user. const BSONObj annotation; const Mode mode; }; /** * Make simplifying changes to the structure of a MatchExpression tree without altering its * semantics. This function may return: * - a pointer to the original, unmodified MatchExpression, * - a pointer to the original MatchExpression that has been mutated, or * - a pointer to a new MatchExpression. * * The value of 'expression' must not be nullptr. */ static std::unique_ptr optimize(std::unique_ptr expression) { // If the disableMatchExpressionOptimization failpoint is enabled, optimizations are skipped // and the expression is left unmodified. if (MONGO_unlikely(disableMatchExpressionOptimization.shouldFail())) { return expression; } auto optimizer = expression->getOptimizer(); try { return optimizer(std::move(expression)); } catch (DBException& ex) { ex.addContext("Failed to optimize expression"); throw; } } /** * Traverses expression tree post-order. Sorts children at each non-leaf node by (MatchType, * path(), children, number of children). */ static void sortTree(MatchExpression* tree); /** * Convenience method which normalizes a MatchExpression tree by optimizing and then sorting it. */ static std::unique_ptr normalize(std::unique_ptr tree) { tree = optimize(std::move(tree)); sortTree(tree.get()); return tree; } MatchExpression(MatchType type, clonable_ptr annotation = nullptr); virtual ~MatchExpression() {} // // Structural/AST information // /** * What type is the node? See MatchType above. */ MatchType matchType() const { return _matchType; } /** * Returns the number of child MatchExpression nodes contained by this node. It is expected that * a node that does not support child nodes will return 0. */ virtual size_t numChildren() const = 0; /** * Returns the child of the current node at zero-based position 'index'. 'index' must be within * the range of [0, numChildren()). */ virtual MatchExpression* getChild(size_t index) const = 0; /** * For MatchExpression nodes that can participate in tree restructuring (like AND/OR), returns a * non-const vector of MatchExpression* child nodes. If the MatchExpression does not * participated in tree restructuring, returns boost::none. * Do not use to traverse the MatchExpression tree. Use numChildren() and getChild(), which * provide access to all nodes. */ virtual boost::optional&> getChildVector() = 0; /** * Get the path of the leaf. Returns StringData() if there is no path (node is logical). */ virtual const StringData path() const { return StringData(); } enum class MatchCategory { // Expressions that are leaves on the AST, these do not have any children. kLeaf, // Logical Expressions such as $and, $or, etc. that do not have a path and may have // one or more children. kLogical, // Expressions that operate on arrays only. kArrayMatching, // Expressions that don't fall into any particular bucket. kOther, }; virtual MatchCategory getCategory() const = 0; /** * This method will perform a clone of the entire match expression tree, but will not clone the * memory pointed to by underlying BSONElements. To perform a "deep clone" use this method and * also ensure that the buffer held by the underlying BSONObj will not be destroyed during the * lifetime of the clone. */ virtual std::unique_ptr shallowClone() const = 0; // XXX document virtual bool equivalent(const MatchExpression* other) const = 0; // // Determine if a document satisfies the tree-predicate. // virtual bool matches(const MatchableDocument* doc, MatchDetails* details = nullptr) const = 0; virtual bool matchesBSON(const BSONObj& doc, MatchDetails* details = nullptr) const; /** * Determines if 'elem' would satisfy the predicate if wrapped with the top-level field name of * the predicate. Does not check that the predicate has a single top-level field name. For * example, given the object obj={a: [5]}, the predicate {i: {$gt: 0}} would match the element * obj["a"]["0"] because it performs the match as if the element at "a.0" were the BSONObj {i: * 5}. */ virtual bool matchesBSONElement(BSONElement elem, MatchDetails* details = nullptr) const; /** * Determines if the element satisfies the tree-predicate. * Not valid for all expressions (e.g. $where); in those cases, returns false. */ virtual bool matchesSingleElement(const BSONElement& e, MatchDetails* details = nullptr) const = 0; // // Tagging mechanism: Hang data off of the tree for retrieval later. // class TagData { public: enum class Type { IndexTag, RelevantTag, OrPushdownTag }; virtual ~TagData() {} virtual void debugString(StringBuilder* builder) const = 0; virtual TagData* clone() const = 0; virtual Type getType() const = 0; }; /** * Takes ownership */ void setTag(TagData* data) { _tagData.reset(data); } TagData* getTag() const { return _tagData.get(); } virtual void resetTag() { setTag(nullptr); for (size_t i = 0; i < numChildren(); ++i) { getChild(i)->resetTag(); } } /** * Set the collator 'collator' on this match expression and all its children. * * 'collator' must outlive the match expression. */ void setCollator(const CollatorInterface* collator); /** * Add the fields required for matching to 'deps'. */ void addDependencies(DepsTracker* deps) const; /** * Serialize the MatchExpression to BSON, appending to 'out'. Output of this method is expected * to be a valid query object, that, when parsed, produces a logically equivalent * MatchExpression. If 'includePath' is false then the serialization should assume it's in a * context where the path has been serialized elsewhere, such as within an $elemMatch value. */ virtual void serialize(BSONObjBuilder* out, bool includePath = true) const = 0; /** * Convenience method which serializes this MatchExpression to a BSONObj. */ BSONObj serialize(bool includePath = true) const { BSONObjBuilder bob; serialize(&bob, includePath); return bob.obj(); } /** * Returns true if this expression will always evaluate to false, such as an $or with no * children. */ virtual bool isTriviallyFalse() const { return false; } /** * Returns true if this expression will always evaluate to true, such as an $and with no * children. */ virtual bool isTriviallyTrue() const { return false; } virtual void acceptVisitor(MatchExpressionMutableVisitor* visitor) = 0; virtual void acceptVisitor(MatchExpressionConstVisitor* visitor) const = 0; // Returns nullptr if this MatchExpression node has no annotation. ErrorAnnotation* getErrorAnnotation() const { return _errorAnnotation.get(); } void setErrorAnnotation(clonable_ptr annotation) { _errorAnnotation = std::move(annotation); } // // Debug information // /** * Returns a debug string representing the match expression tree, including any tags attached * for planning. This debug string format may spill across multiple lines, so it is not suitable * for logging at low debug levels or for error messages. */ std::string debugString() const; virtual void debugString(StringBuilder& debug, int indentationLevel = 0) const = 0; /** * Serializes this MatchExpression to BSON, and then returns a standard string representation of * the resulting BSON object. */ std::string toString() const; protected: /** * An ExpressionOptimizerFunc implements tree simplifications for a MatchExpression tree with a * specific type of MatchExpression at the root. Except for requiring a specific MatchExpression * subclass, an ExpressionOptimizerFunc has the same requirements and functionality as described * in the specification of MatchExpression::getOptimizer(std::unique_ptr). */ using ExpressionOptimizerFunc = std::function(std::unique_ptr)>; /** * Subclasses that are collation-aware must implement this method in order to capture changes * to the collator that occur after initialization time. */ virtual void _doSetCollator(const CollatorInterface* collator){}; virtual void _doAddDependencies(DepsTracker* deps) const {} void _debugAddSpace(StringBuilder& debug, int indentationLevel) const; clonable_ptr _errorAnnotation; private: /** * Subclasses should implement this function to provide an ExpressionOptimizerFunc specific to * the subclass. This function is only called by * MatchExpression::optimize(std::unique_ptr), which is responsible for calling * MatchExpression::getOptimizer() on its input MatchExpression and then passing the same * MatchExpression to the resulting ExpressionOptimizerFunc. There should be no other callers * to this function. * * Any MatchExpression subclass that stores child MatchExpression objects is responsible for * returning an ExpressionOptimizerFunc that recursively calls * MatchExpression::optimize(std::unique_ptr) on those children. * * See the descriptions of MatchExpression::optimize(std::unique_ptr) and * ExpressionOptimizerFunc for additional explanation of their interfaces and functionality. */ virtual ExpressionOptimizerFunc getOptimizer() const = 0; MatchType _matchType; std::unique_ptr _tagData; }; inline MatchExpression::Iterator begin(MatchExpression& expr) { return {&expr, 0}; } inline MatchExpression::ConstIterator begin(const MatchExpression& expr) { return {&expr, 0}; } inline MatchExpression::Iterator end(MatchExpression& expr) { return {&expr, expr.numChildren()}; } inline MatchExpression::ConstIterator end(const MatchExpression& expr) { return {&expr, expr.numChildren()}; } } // namespace mongo