diff options
18 files changed, 1254 insertions, 76 deletions
diff --git a/src/mongo/db/matcher/SConscript b/src/mongo/db/matcher/SConscript index 641bc8b8a20..b85be870ac1 100644 --- a/src/mongo/db/matcher/SConscript +++ b/src/mongo/db/matcher/SConscript @@ -19,6 +19,7 @@ env.Library( env.Library( target='expressions', source=[ + 'match_expression_util.cpp', 'doc_validation_error.cpp', 'doc_validation_util.cpp', 'expression.cpp', @@ -98,6 +99,7 @@ env.Library( env.CppUnitTest( target='db_matcher_test', source=[ + 'match_expression_util_test.cpp', 'doc_validation_error_json_schema_test.cpp', 'doc_validation_error_test.cpp', 'expression_algo_test.cpp', diff --git a/src/mongo/db/matcher/doc_validation_error.cpp b/src/mongo/db/matcher/doc_validation_error.cpp index 10ce71c01ab..87de53f4188 100644 --- a/src/mongo/db/matcher/doc_validation_error.cpp +++ b/src/mongo/db/matcher/doc_validation_error.cpp @@ -43,13 +43,20 @@ #include "mongo/db/matcher/expression_tree.h" #include "mongo/db/matcher/expression_type.h" #include "mongo/db/matcher/expression_visitor.h" +#include "mongo/db/matcher/match_expression_util.h" #include "mongo/db/matcher/match_expression_walker.h" +#include "mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h" #include "mongo/db/matcher/schema/expression_internal_schema_fmod.h" +#include "mongo/db/matcher/schema/expression_internal_schema_match_array_index.h" +#include "mongo/db/matcher/schema/expression_internal_schema_max_items.h" #include "mongo/db/matcher/schema/expression_internal_schema_max_length.h" +#include "mongo/db/matcher/schema/expression_internal_schema_min_items.h" #include "mongo/db/matcher/schema/expression_internal_schema_min_length.h" #include "mongo/db/matcher/schema/expression_internal_schema_object_match.h" #include "mongo/db/matcher/schema/expression_internal_schema_str_length.h" +#include "mongo/db/matcher/schema/expression_internal_schema_unique_items.h" #include "mongo/db/matcher/schema/expression_internal_schema_xor.h" +#include "mongo/db/matcher/schema/json_schema_parser.h" namespace mongo::doc_validation_error { namespace { @@ -58,6 +65,7 @@ MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(DocumentValidationFailureInfo); using ErrorAnnotation = MatchExpression::ErrorAnnotation; using AnnotationMode = ErrorAnnotation::Mode; using LeafArrayBehavior = ElementPath::LeafArrayBehavior; +using NonLeafArrayBehavior = ElementPath::NonLeafArrayBehavior; /** * Enumerated type which describes whether an error should be described normally or in an @@ -196,6 +204,10 @@ struct ValidationErrorContext { } return rootDoc; } + void setCurrentDocument(const BSONObj& document) { + invariant(!frames.empty()); + frames.top().currentDoc = document; + } InvertError getCurrentInversion() const { invariant(!frames.empty()); return frames.top().inversion; @@ -313,6 +325,39 @@ void finishLogicalOperatorChildError(const ListOfMatchExpression* expr, } /** + * Enumerated type to encode JSON Schema array keyword "items" and "additionalItems", and their + * variants. + */ +enum class ItemsKeywordType { + kItems, // 'items': {schema} + kAdditionalItemsFalse, // 'additionalItems': false + kAdditionalItemsSchema, // 'additionalItems': {schema} +}; + +/** + * Decodes the JSON Schema "items"/"additionalItems" keyword type from an error annotation of + * expression 'expr'. + */ +ItemsKeywordType toItemsKeywordType( + const InternalSchemaAllElemMatchFromIndexMatchExpression& expr) { + auto* errorAnnotation = expr.getErrorAnnotation(); + if ("items" == errorAnnotation->operatorName) { + return ItemsKeywordType::kItems; + } + if ("additionalItems" == errorAnnotation->operatorName) { + switch (errorAnnotation->annotation.firstElementType()) { + case BSONType::Bool: + return ItemsKeywordType::kAdditionalItemsFalse; + case BSONType::Object: + return ItemsKeywordType::kAdditionalItemsSchema; + default: + MONGO_UNREACHABLE; + } + } + MONGO_UNREACHABLE; +} + +/** * Visitor which is primarily responsible for error generation. */ class ValidationErrorPreVisitor final : public MatchExpressionConstVisitor { @@ -325,12 +370,16 @@ public: generateAlwaysBooleanError(*expr); } void visit(const AndMatchExpression* expr) final { + auto&& operatorName = expr->getErrorAnnotation()->operatorName; // $all is treated as a leaf operator. - auto operatorName = expr->getErrorAnnotation()->operatorName; if (operatorName == "$all") { static constexpr auto kNormalReason = "array did not contain all specified values"; static constexpr auto kInvertedReason = "array did contain all specified values"; generateLogicalLeafError(*expr, kNormalReason, kInvertedReason); + } else if (operatorName == "items") { + // $and only gets annotated as an "items" only for JSON Schema keyword "items" set to an + // array of subschemas. + generateJSONSchemaItemsSchemaArrayError(*expr); } else { preVisitTreeOperator(expr); // An AND needs its children to call 'matches' in a normal context to discern which @@ -429,7 +478,25 @@ public: generatePathError(*expr, kNormalReason, kInvertedReason); } void visit(const InternalExprEqMatchExpression* expr) final {} - void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { + switch (toItemsKeywordType(*expr)) { + case ItemsKeywordType::kItems: { + static constexpr auto kNormalReason = + "At least one item did not match the sub-schema"; + generateJSONSchemaArraySingleSchemaError(expr, kNormalReason, ""); + } break; + case ItemsKeywordType::kAdditionalItemsSchema: { + static constexpr auto kNormalReason = + "At least one additional item did not match the sub-schema"; + generateJSONSchemaArraySingleSchemaError(expr, kNormalReason, ""); + } break; + case ItemsKeywordType::kAdditionalItemsFalse: + generateJSONSchemaAdditionalItemsFalseError(expr); + break; + default: + MONGO_UNREACHABLE; + } + } void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {} void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final { static constexpr auto kNormalReason = "encrypted value has wrong type"; @@ -447,8 +514,7 @@ public: // appropriate error. if (elem.type() == BSONType::BinData && elem.binDataType() == BinDataType::Encrypt && _context->getCurrentInversion() == InvertError::kNormal) { - auto& builder = _context->getCurrentObjBuilder(); - appendOperatorName(*expr->getErrorAnnotation(), &builder); + appendOperatorName(*expr); appendErrorReason(kNormalReason, kInvertedReason); } else { _context->setCurrentRuntimeState(RuntimeState::kNoError); @@ -460,8 +526,7 @@ public: static constexpr auto kInvertedReason = "value was encrypted"; _context->pushNewFrame(*expr, _context->getCurrentDocument()); if (_context->shouldGenerateError(*expr)) { - auto& builder = _context->getCurrentObjBuilder(); - appendOperatorName(*expr->getErrorAnnotation(), &builder); + appendOperatorName(*expr); appendErrorReason(kNormalReason, kInvertedReason); } } @@ -482,13 +547,43 @@ public: &kExpectedTypes, LeafArrayBehavior::kNoTraversal); } - void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final {} - void visit(const InternalSchemaMaxItemsMatchExpression* expr) final {} + void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final { + _context->pushNewFrame(*expr, _context->getCurrentDocument()); + if (_context->shouldGenerateError(*expr)) { + // Get an element of an array. + ElementPath path( + expr->path(), LeafArrayBehavior::kNoTraversal, NonLeafArrayBehavior::kNoTraversal); + auto attributeValue = getValueAt(path); + + // Attribute should be present and be an array, since it has been ensured by handling of + // AndMatchExpression with error annotation "items". + invariant(attributeValue.type() == BSONType::Array); + auto valueAsArray = BSONArray(attributeValue.embeddedObject()); + + // If array is shorter than the index the match expression applies to, then document + // validation should not fail. + invariant(expr->arrayIndex() < valueAsArray.nFields()); + + // Append information about array element to the error. + BSONElement arrayElement = valueAsArray[expr->arrayIndex()]; + BSONObjBuilder& bob = _context->getCurrentObjBuilder(); + bob.append("itemIndex"_sd, expr->arrayIndex()); + + // Build a document corresponding to the array element for the child expression to + // operate on. + _context->setCurrentDocument(toObjectWithPlaceholder(arrayElement)); + } + } + void visit(const InternalSchemaMaxItemsMatchExpression* expr) final { + generateJSONSchemaMinItemsMaxItemsError(expr); + } void visit(const InternalSchemaMaxLengthMatchExpression* expr) final { generateStringLengthError(*expr); } void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) final {} - void visit(const InternalSchemaMinItemsMatchExpression* expr) final {} + void visit(const InternalSchemaMinItemsMatchExpression* expr) final { + generateJSONSchemaMinItemsMaxItemsError(expr); + } void visit(const InternalSchemaMinLengthMatchExpression* expr) final { generateStringLengthError(*expr); } @@ -528,7 +623,21 @@ public: void visit(const InternalSchemaTypeExpression* expr) final { generateTypeError(expr, LeafArrayBehavior::kNoTraversal); } - void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final {} + void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final { + static constexpr auto normalReason = "found a duplicate item"; + _context->pushNewFrame(*expr, _context->getCurrentDocument()); + if (auto attributeValue = getValueForArrayKeywordExpressionIfShouldGenerateError(*expr)) { + appendErrorDetails(*expr); + appendErrorReason(normalReason, ""); + auto attributeValueAsArray = BSONArray(attributeValue.embeddedObject()); + appendConsideredValue(attributeValueAsArray); + auto duplicateValue = expr->findFirstDuplicateValue(attributeValueAsArray); + invariant(duplicateValue); + _context->getCurrentObjBuilder().appendAs(duplicateValue, "duplicatedValue"_sd); + } else { + _context->setCurrentRuntimeState(RuntimeState::kNoError); + } + } void visit(const InternalSchemaXorMatchExpression* expr) final { preVisitTreeOperator(expr); _context->setCurrentRuntimeState(RuntimeState::kErrorNeedChildrenInfo); @@ -639,11 +748,11 @@ public: private: // Set of utilities responsible for appending various fields to build a descriptive error. - void appendOperatorName(const ErrorAnnotation& annotation, BSONObjBuilder* bob) { - auto operatorName = annotation.operatorName; + void appendOperatorName(const MatchExpression& expr) { + auto operatorName = expr.getErrorAnnotation()->operatorName; // Only append the operator name if 'annotation' has one. if (!operatorName.empty()) { - bob->append("operatorName", operatorName); + _context->getCurrentObjBuilder().append("operatorName", operatorName); } } void appendSpecifiedAs(const ErrorAnnotation& annotation, BSONObjBuilder* bob) { @@ -652,7 +761,7 @@ private: void appendErrorDetails(const MatchExpression& expr) { auto annotation = expr.getErrorAnnotation(); BSONObjBuilder& bob = _context->getCurrentObjBuilder(); - appendOperatorName(*annotation, &bob); + appendOperatorName(expr); appendSpecifiedAs(*annotation, &bob); } @@ -675,6 +784,23 @@ private: } /** + * Returns a value at path 'path' in the current document, or an empty (End-Of-Object type) + * element if the value is not present. Illegal to call if, due to implicit array traversal, + * 'path' would result in multiple elements. + */ + BSONElement getValueAt(const ElementPath& path) { + BSONMatchableDocument doc(_context->getCurrentDocument()); + MatchableDocument::IteratorHolder cursor(&doc, &path); + if (cursor->more()) { + auto element = cursor->next().element(); + invariant(!cursor->more()); // We expect only 1 item. + return element; + } else { + return {}; + } + } + + /** * Appends a missing field error if 'arr' is empty. */ void appendMissingField(const BSONArray& arr) { @@ -733,6 +859,9 @@ private: bob.append("reason", invertedReason); } } + void appendConsideredValue(const BSONArray& array) { + _context->getCurrentObjBuilder().append("consideredValue"_sd, array); + } void appendConsideredValues(const BSONArray& arr) { int size = arr.nFields(); if (size == 0) { @@ -851,7 +980,7 @@ private: // Only append the operator name if it will produce an object error corresponding to // a user-facing operator. if (!_context->producesArray(*expr)) - appendOperatorName(*annotation, &_context->getCurrentObjBuilder()); + appendOperatorName(*expr); _context->getCurrentObjBuilder().appendElements(annotation->annotation); } } @@ -908,6 +1037,176 @@ private: expr, kNormalReason, kInvertedReason, &expectedTypes, LeafArrayBehavior::kNoTraversal); } + /** + * Determines if a validation error should be generated for a JSON Schema array keyword match + * expression 'expr' given the current document validation context and returns the array 'expr' + * expression applies over. If a validation error should not be generated, then the + * End-Of-Object (EOO) value is returned. If a validation error should be generated, then the + * type of the value of the returned BSONElement is always an array. + */ + BSONElement getValueForArrayKeywordExpressionIfShouldGenerateError( + const MatchExpression& expr) { + if (!_context->shouldGenerateError(expr)) { + return {}; + } + if (InvertError::kInverted == _context->getCurrentInversion()) { + // Inverted errors are not supported. + return {}; + } + + // Determine what value does 'expr' expression apply over. + ElementPath path( + expr.path(), LeafArrayBehavior::kNoTraversal, NonLeafArrayBehavior::kNoTraversal); + auto attributeValue = getValueAt(path); + + // If attribute value is either not present or is not an array, do not generate an error, + // since related match expressions do that instead. There are 4 cases of how an array + // keyword can be defined in combination with 'required' and 'type' keywords (in the + // explanation below parameter 'expr' corresponds to '(array keyword match expression)'): + // + // 1) 'required' is not present, {type: 'array'} is not present. In this case the expression + // tree corresponds to ((array keyword match expression) OR NOT (is array)) OR (NOT + // (attribute exists)). This tree can fail to match only if the attribute is present and is + // an array. + // + // 2) 'required' is not present, {type: 'array'} is present. In this case the expression + // tree corresponds to ((array keyword match expression) AND (is array)) OR (NOT (attribute + // exists)). If the input is an attribute of a non-array type, then both (array keyword + // match expression) and (is array) expressions fail to match and are asked to contribute to + // the validation error. We expect only (is array) expression, not an (array keyword match + // expression), to report a type mismatch, since otherwise the error would contain redundant + // elements. + // + // 3) 'required' is present, {type: 'array'} is not present. In this case the expression + // tree corresponds to ((array keyword match expression) OR NOT (is array)) AND (attribute + // exists). This tree can fail to match if the attribute is present and is an array, and + // fails to match when the attribute is not present. In the latter case expression part + // ((array keyword match expression) OR NOT (is array)) matches and (array keyword match + // expression) is not asked to contribute to the error. + // + // 4) 'required' is present, {type: 'array'} is present. In this case the expression tree + // corresponds to ((array keyword match expression) AND (is array)) AND (attribute exists). + // This tree can fail to match if the attribute is present and is an array, and fails to + // match when the attribute is not present or is not an array. In the case when the + // attribute is not present all parts of the expression fail to match and are asked to + // contribute to the error, but we expect only (attribute exists) expression to contribute, + // since otherwise the error would contain redundant elements. + return (attributeValue.type() == BSONType::Array) ? attributeValue : BSONElement{}; + } + + /** + * Generates an error for JSON Schema "minItems"/"maxItems" keyword match expression 'expr'. + */ + void generateJSONSchemaMinItemsMaxItemsError( + const InternalSchemaNumArrayItemsMatchExpression* expr) { + static constexpr auto normalReason = "array did not match specified length"; + _context->pushNewFrame(*expr, _context->getCurrentDocument()); + if (auto attributeValue = getValueForArrayKeywordExpressionIfShouldGenerateError(*expr)) { + appendErrorDetails(*expr); + appendErrorReason(normalReason, ""); + auto attributeValueAsArray = BSONArray(attributeValue.embeddedObject()); + appendConsideredValue(attributeValueAsArray); + } else { + _context->setCurrentRuntimeState(RuntimeState::kNoError); + } + } + + /** + * Generates an error for JSON Schema "additionalItems" keyword set to 'false'. + */ + void generateJSONSchemaAdditionalItemsFalseError( + const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) { + static constexpr auto normalReason = "found additional items"; + _context->pushNewFrame(*expr, _context->getCurrentDocument()); + if (auto attributeValue = getValueForArrayKeywordExpressionIfShouldGenerateError(*expr)) { + appendErrorDetails(*expr); + appendErrorReason(normalReason, ""); + appendAdditionalItems(BSONArray(attributeValue.embeddedObject()), expr->startIndex()); + } else { + _context->setCurrentRuntimeState(RuntimeState::kNoError); + } + } + + /** + * Generates an error for JSON Schema "items" keyword set to an array of subschemas that is used + * to validate elements of the array. + */ + void generateJSONSchemaItemsSchemaArrayError(const AndMatchExpression& expr) { + _context->pushNewFrame(expr, _context->getCurrentDocument()); + + // Determine if we need to generate an error using a child of the "$and" expression, which + // must be of InternalSchemaMatchArrayIndexMatchExpression type, since "$and" does not have + // a path associated with it. + + // If 'expr' does not have any children then we have 'items':[] case and we don't need to + // generate an error. + if (expr.numChildren() == 0) { + return; + } + invariant(expr.getChild(0)->matchType() == + MatchExpression::MatchType::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX); + if (getValueForArrayKeywordExpressionIfShouldGenerateError(*expr.getChild(0))) { + appendOperatorName(expr); + + // Since the "items" keyword set to an array of subschemas logically behaves as "$and", + // it needs its children to call 'matches' to discern which clauses failed. + _context->setCurrentRuntimeState(RuntimeState::kErrorNeedChildrenInfo); + } else { + // Force children match expressions to not generate any errors. + _context->setCurrentRuntimeState(RuntimeState::kNoError); + } + } + + /** + * Builds a BSON object from a BSON element 'element' using the same name placeholder as the + * JSON Schema match expressions. + */ + BSONObj toObjectWithPlaceholder(BSONElement element) { + return BSON(JSONSchemaParser::kNamePlaceholder << element); + } + + /** + * Adds elements starting from index 'startIndex' from array 'array' to the current object as + * "additionalItems" attribute. + */ + void appendAdditionalItems(const mongo::BSONArray& array, size_t startIndex) { + auto it = BSONObjIterator(array); + + // Skip first 'startIndex' elements. + match_expression_util::advanceBy(startIndex, it); + + // Add remaining array elements as "additionalItems" attribute. + auto& detailsArrayBuilder = _context->getCurrentArrayBuilder(); + while (it.more()) { + detailsArrayBuilder.append(it.next()); + } + _context->getCurrentObjBuilder().append("additionalItems"_sd, detailsArrayBuilder.arr()); + } + + /** + * Generates an error for JSON Schema array keyword set to a single schema value that is used + * to validate elements of the array. + */ + void generateJSONSchemaArraySingleSchemaError( + const InternalSchemaAllElemMatchFromIndexMatchExpression* expr, + const std::string& normalReason, + const std::string& invertedReason) { + _context->pushNewFrame(*expr, _context->getCurrentDocument()); + if (auto attributeValue = getValueForArrayKeywordExpressionIfShouldGenerateError(*expr)) { + appendOperatorName(*expr); + appendErrorReason(normalReason, invertedReason); + auto failingElement = + expr->findFirstMismatchInArray(attributeValue.embeddedObject(), nullptr); + invariant(failingElement); + _context->getCurrentObjBuilder().appendNumber( + "itemIndex"_sd, std::stoll(failingElement.fieldNameStringData().toString())); + _context->setCurrentDocument(toObjectWithPlaceholder(failingElement)); + } else { + // Disable error generation by the child expression of 'expr'. + _context->setCurrentRuntimeState(RuntimeState::kNoError); + } + } + ValidationErrorContext* _context; }; @@ -1031,6 +1330,7 @@ public: {"properties", {"propertiesNotSatisfied", ""}}, {"$jsonSchema", {"schemaRulesNotSatisfied", ""}}, {"_internalSubschema", {"", ""}}, + {"items", {"details", ""}}, {"", {"details", ""}}}; auto detailsStringPair = detailsStringMap.find(operatorName); invariant(detailsStringPair != detailsStringMap.end()); @@ -1084,7 +1384,21 @@ public: _context->finishCurrentError(expr); } void visit(const InternalExprEqMatchExpression* expr) final {} - void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { + switch (toItemsKeywordType(*expr)) { + case ItemsKeywordType::kItems: + case ItemsKeywordType::kAdditionalItemsSchema: + if (_context->shouldGenerateError(*expr)) { + _context->appendLatestCompleteError(&_context->getCurrentObjBuilder()); + } + break; + case ItemsKeywordType::kAdditionalItemsFalse: + break; + default: + MONGO_UNREACHABLE; + } + _context->finishCurrentError(expr); + } void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {} void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final { _context->finishCurrentError(expr); @@ -1097,13 +1411,23 @@ public: void visit(const InternalSchemaFmodMatchExpression* expr) final { _context->finishCurrentError(expr); } - void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final {} - void visit(const InternalSchemaMaxItemsMatchExpression* expr) final {} + void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final { + // If generating an error, append the error details. + if (_context->shouldGenerateError(*expr)) { + _context->appendLatestCompleteError(&_context->getCurrentObjBuilder()); + } + _context->finishCurrentError(expr); + } + void visit(const InternalSchemaMaxItemsMatchExpression* expr) final { + _context->finishCurrentError(expr); + } void visit(const InternalSchemaMaxLengthMatchExpression* expr) final { _context->finishCurrentError(expr); } void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) final {} - void visit(const InternalSchemaMinItemsMatchExpression* expr) final {} + void visit(const InternalSchemaMinItemsMatchExpression* expr) final { + _context->finishCurrentError(expr); + } void visit(const InternalSchemaMinLengthMatchExpression* expr) final { _context->finishCurrentError(expr); } @@ -1115,7 +1439,9 @@ public: void visit(const InternalSchemaTypeExpression* expr) final { _context->finishCurrentError(expr); } - void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final {} + void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final { + _context->finishCurrentError(expr); + } void visit(const InternalSchemaXorMatchExpression* expr) final { static constexpr auto normalDetailString = "schemasNotSatisfied"; if (_context->getCurrentInversion() == InvertError::kNormal) { @@ -1230,6 +1556,33 @@ bool hasErrorAnnotations(const MatchExpression& validatorExpr) { return true; } +/** + * Generates a document validation error using match expression 'validatorExpr' for document + * 'doc'. + */ +BSONObj generateDocumentValidationError(const MatchExpression& validatorExpr, const BSONObj& doc) { + ValidationErrorContext context(doc); + ValidationErrorPreVisitor preVisitor{&context}; + ValidationErrorInVisitor inVisitor{&context}; + ValidationErrorPostVisitor postVisitor{&context}; + + // TODO SERVER-49446: Once all nodes have ErrorAnnotations, this check should be converted to an + // invariant check that all nodes have an annotation. Also add an invariant to the + // DocumentValidationFailureInfo constructor to check that it is initialized with a non-empty + // object. + if (!hasErrorAnnotations(validatorExpr)) { + return BSONObj(); + } + MatchExpressionWalker walker{&preVisitor, &inVisitor, &postVisitor}; + tree_walker::walk<true, MatchExpression>(&validatorExpr, &walker); + + // There should be no frames when error generation is complete as the finished error will be + // stored in 'context'. + invariant(context.frames.empty()); + auto error = context.getLatestCompleteErrorObject(); + invariant(!error.isEmpty()); + return error; +} } // namespace std::shared_ptr<const ErrorExtraInfo> DocumentValidationFailureInfo::parse(const BSONObj& obj) { @@ -1250,24 +1603,9 @@ void DocumentValidationFailureInfo::serialize(BSONObjBuilder* bob) const { const BSONObj& DocumentValidationFailureInfo::getDetails() const { return _details; } -BSONObj generateError(const MatchExpression& validatorExpr, const BSONObj& doc) { - ValidationErrorContext context(doc); - ValidationErrorPreVisitor preVisitor{&context}; - ValidationErrorInVisitor inVisitor{&context}; - ValidationErrorPostVisitor postVisitor{&context}; - // TODO SERVER-49446: Once all nodes have ErrorAnnotations, this check should be converted to an - // invariant check that all nodes have an annotation. Also add an invariant to the - // DocumentValidationFailureInfo constructor to check that it is initialized with a non-empty - // object. - if (!hasErrorAnnotations(validatorExpr)) { - return BSONObj(); - } - MatchExpressionWalker walker{&preVisitor, &inVisitor, &postVisitor}; - tree_walker::walk<true, MatchExpression>(&validatorExpr, &walker); - // There should be no frames when error generation is complete as the finished error will be - // stored in 'context'. - invariant(context.frames.empty()); +BSONObj generateError(const MatchExpression& validatorExpr, const BSONObj& doc) { + auto error = generateDocumentValidationError(validatorExpr, doc); BSONObjBuilder objBuilder; // Add document id to the error object. @@ -1276,10 +1614,7 @@ BSONObj generateError(const MatchExpression& validatorExpr, const BSONObj& doc) objBuilder.appendAs(objectIdElement, "failingDocumentId"_sd); // Add errors from match expressions. - auto error = context.getLatestCompleteErrorObject(); - invariant(!error.isEmpty()); objBuilder.append("details"_sd, std::move(error)); return objBuilder.obj(); } - } // namespace mongo::doc_validation_error diff --git a/src/mongo/db/matcher/doc_validation_error_json_schema_test.cpp b/src/mongo/db/matcher/doc_validation_error_json_schema_test.cpp index 147cd3cf8f5..22af39c869f 100644 --- a/src/mongo/db/matcher/doc_validation_error_json_schema_test.cpp +++ b/src/mongo/db/matcher/doc_validation_error_json_schema_test.cpp @@ -1442,5 +1442,603 @@ TEST(JSONSchemaLogicalKeywordValidation, CombineLogicalKeywords) { doc_validation_error::verifyGeneratedError(query, document, expectedError); } +TEST(JSONSchemaValidation, ArrayType) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array'}}}}"); + BSONObj document = fromjson("{'a': {'b': 1}}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': {'b': 1}," + " 'consideredType': 'object'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMinItemsTypeArray) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'minItems': 2}}}}"); + BSONObj document = fromjson("{'a': [1]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 2}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMinItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'minItems': 2}}}}"); + BSONObj document = fromjson("{'a': [1]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 2}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMinItemsAlwaysTrue) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'minItems': 2}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMinItemsTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'minItems': 2}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMaxItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'maxItems': 2}}}}"); + BSONObj document = fromjson("{'a': [1, 2, 3]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'maxItems'," + " 'specifiedAs': {'maxItems': 2}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1, 2, 3]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayMaxItemsAlwaysTrue) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'maxItems': 2}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayUniqueItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'uniqueItems': true}}}}"); + BSONObj document = fromjson("{'a': [1, 2, 3, 3, 4, 4]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'uniqueItems'," + " 'specifiedAs': {'uniqueItems': true}," + " 'reason': 'found a duplicate item'," + " 'consideredValue': [1, 2, 3, 3, 4, 4]," + " 'duplicatedValue': 3}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayUniqueItemTypeArray) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'uniqueItems': true}}}}"); + BSONObj document = fromjson("{'a': [1, 2, 3, 3, 4, 4]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'uniqueItems'," + " 'specifiedAs': {'uniqueItems': true}," + " 'reason': 'found a duplicate item'," + " 'consideredValue': [1, 2, 3, 3, 4, 4]," + " 'duplicatedValue': 3}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayUniqueItemsTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'uniqueItems': true}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayUniqueItemsAlwaysTrue) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'uniqueItems': true}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchema) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': {'type': 'string'}}}}}"); + BSONObj document = fromjson("{'a': [1, 'A', {}]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the " + "sub-schema', 'itemIndex': 0, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'string'}, " + "'reason': 'type did not match', 'consideredValue': 1, 'consideredType': 'int'}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchemaTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': {'type': 'string'}, 'type': 'array'}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +// Verifies that "items" with a single schema does not produce any unwanted artifacts when it does +// not fail. We use "minItems" that fails validation to check that. +TEST(JSONSchemaValidation, ArrayItemsSingleSchemaCombinedWithMinItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': {'type': 'string'}, 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': ['A', 'B']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': ['A', 'B']}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchemaAlwaysTrue) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'items': {'type': 'string'}}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchemaNested) { + BSONObj query = fromjson( + "{'$jsonSchema': {'properties': " + " {'a': {'items': {'properties': {'b': {'minItems': 2}}}}}}}"); + BSONObj document = fromjson("{'a': [{'b': [1]}]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the " + "sub-schema', 'itemIndex': 0, 'details': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'b', 'details': [ " + " {'operatorName': 'minItems', 'specifiedAs': { 'minItems': 2 }, 'reason': " + "'array did not match specified length', 'consideredValue': [1]}]}]}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchema2DArray) { + BSONObj query = + fromjson("{'$jsonSchema': {'properties': {'a': {'items': {'items': {'minimum': 0}}}}}}"); + BSONObj document = fromjson("{'a': [[1],[],[2, 4], [-1]]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the " + "sub-schema', 'itemIndex': 3, 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the " + "sub-schema', 'itemIndex': 0, 'details': [" + " {'operatorName': 'minimum', 'specifiedAs': {'minimum': 0}, 'reason': " + "'comparison failed', 'consideredValue': -1}]}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSingleSchemaNestedWithMinimum) { + BSONObj query = fromjson( + "{'$jsonSchema': {'properties': " + " {'a': {'items': {'properties': {'b': {'minimum': 2}}}}}}}"); + BSONObj document = fromjson("{'a': [{'b': 2}, {'b': 1}]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + "{'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the sub-schema', " + "'itemIndex': 1, 'details': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'b', 'details': [" + " {'operatorName': 'minimum', 'specifiedAs': {'minimum': 2}, 'reason': " + "'comparison failed', 'consideredValue': 1}]}]}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSchemaArray) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': [{'type': 'number'}, {'type': 'string'}]}}}}"); + BSONObj document = fromjson("{'a': [1, 2]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'details': [" + " {'itemIndex': 1, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'string'}, 'reason': 'type did " + "not match', 'consideredValue': 2, 'consideredType': 'int'}]}" + "]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +// Verifies that "items" with an array of schemas does not produce any unwanted artifacts when it +// does not fail. We use "minItems" that fails validation to check that. +TEST(JSONSchemaValidation, ArrayItemsSchemaArrayCombinedWithMinItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': [{'type': 'number'}, {'type': 'string'}], 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': [1, 'A']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1, 'A']}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +// Verifies that "items" with an array of schemas does not produce any unwanted artifacts when it +// does not fail on array elements that do not exist. We use "minItems" that fails validation to +// check that. +TEST(JSONSchemaValidation, ArrayItemsSchemaArrayCombinedWithMinItemsOnShortArray) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': [{'type': 'number'}, {'type': 'string'}], 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': [1]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSchemaArrayTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': [{'type': 'number'}, {'type': 'string'}], 'type': 'array'}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +// Verifies that "items" with an empty array of schemas does not produce any unwanted artifacts. We +// use "minItems" that fails validation to check that. +TEST(JSONSchemaValidation, ArrayItemsEmptySchemaArrayCombinedWithMinItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'items': [], 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': [1, 'A']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1, 'A']}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSchemaArrayAlwaysTrue) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'items': [{'type': 'string'}]}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayItemsSchemaArrayOneEmptyObject) { + BSONObj query = fromjson("{$nor: [{'$jsonSchema': {'items': [{}]}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchema) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'items': [{'type': 'number'}, {'type': 'string'}], " + "'additionalItems': {'type': 'object'}}}}}"); + BSONObj document = fromjson("{'a': [1, 'First', {}, 'Extra element']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'additionalItems', 'reason': 'At least one additional item did not " + "match the sub-schema', 'itemIndex': 3, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'object'}, 'reason': 'type did " + "not match', 'consideredValue': 'Extra element', 'consideredType': 'string'}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchemaItemsAndItemsSchemaFail) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'items': [{'type': 'number'}, {'type': 'string'}], " + "'additionalItems': {'type': 'object'}}}}}"); + BSONObj document = fromjson("{'a': ['1', 2, {}, {'b': 1}, 'Fail']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'items', 'details': [" + " {'itemIndex': 0, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'number'}, 'reason': 'type did " + "not match', 'consideredValue': '1', 'consideredType': 'string'}]}," + " {'itemIndex': 1, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'string'}, 'reason': 'type did " + "not match', 'consideredValue': 2, 'consideredType': 'int'}]}" + "]}," + " {'operatorName': 'additionalItems', 'reason': 'At least one additional item did not " + "match the sub-schema', 'itemIndex': 4, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'object'}, 'reason': 'type did " + "not match', 'consideredValue': 'Fail', 'consideredType': 'string'}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchemaNested) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'items': [{'type': 'string'}], 'additionalItems': " + " {'properties': {'b': {'items': {'type': 'object'}}}}}}}}"); + BSONObj document = fromjson("{'a': ['A', {'b': [{}, 'A']}]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'additionalItems', 'reason': 'At least one additional item did not " + "match the sub-schema', 'itemIndex': 1, 'details': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'b', 'details': [" + " {'operatorName': 'items', 'reason': 'At least one item did not match the " + "sub-schema', 'itemIndex': 1, 'details': [" + " {'operatorName': 'type', 'specifiedAs': {'type': 'object'}, 'reason': " + "'type did not match', 'consideredValue': 'A', 'consideredType': 'string'}]}]}]}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchemaTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'items': [{'type': 'number'}, {'type': 'string'}], " + "'additionalItems': {'type': 'object'}}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +// Verifies that "additionalItems" with a single schema does not produce any unwanted artifacts when +// it does not fail. We use "minItems" that fails validation to check that. +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchemaCombinedWithMinItems) { + BSONObj query = fromjson( + " {'$jsonSchema':" + " {'properties': " + " {'a': {'type': 'array', 'items': [{'type': 'number'}, {'type': 'string'}], " + "'additionalItems': {'type': 'object'}, 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': [1, 'First', {}]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1, 'First', {}]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsSchemaAlwaysTrue) { + BSONObj query = fromjson( + "{$nor: [{'$jsonSchema': " + "{'items': [{'type': 'object'}], 'additionalItems': {'type': 'object'}}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsFalse) { + BSONObj query = fromjson( + "{'$jsonSchema': " + " {'properties': {'a': " + " {'items': [{'type': 'number'}, {'type': 'string'}], " + "'additionalItems': false}}}}"); + BSONObj document = fromjson("{'a': [1, 'First', 'Extra element']}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema', 'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'additionalItems', 'specifiedAs': {'additionalItems': false}, " + "'reason': 'found additional items', 'additionalItems': ['Extra element']}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsFalseTypeArrayOnNonArrayAttribute) { + BSONObj query = fromjson( + "{'$jsonSchema': " + " {'properties': {'a': " + " {'items': [{'type': 'number'}], 'additionalItems': false, 'type': 'array'}}}}"); + BSONObj document = fromjson("{'a': 1}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'type'," + " 'specifiedAs': {'type': 'array'}," + " 'reason': 'type did not match'," + " 'consideredValue': 1," + " 'consideredType': 'int'}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsFalseCombinedWithMinItems) { + BSONObj query = fromjson( + "{'$jsonSchema': " + " {'properties': {'a': " + " {'items': [{'type': 'number'}], 'additionalItems': false, 'minItems': 5}}}}"); + BSONObj document = fromjson("{'a': [1]}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$jsonSchema'," + "'schemaRulesNotSatisfied': [" + " {'operatorName': 'properties', 'propertiesNotSatisfied': [" + " {'propertyName': 'a', 'details': [" + " {'operatorName': 'minItems'," + " 'specifiedAs': {'minItems': 5}," + " 'reason': 'array did not match specified length'," + " 'consideredValue': [1]}]}]}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} + +TEST(JSONSchemaValidation, ArrayAdditionalItemsFalseAlwaysTrue) { + BSONObj query = fromjson( + "{$nor: [{'$jsonSchema': {'items': [{'type': 'number'}], 'additionalItems': false}}]}"); + BSONObj document = fromjson("{}"); + BSONObj expectedError = fromjson( + "{'operatorName': '$nor', 'clausesSatisfied': [" + " {'index': 0, 'details': " + " {'operatorName': '$jsonSchema', 'reason': 'schema matched'}}]}"); + doc_validation_error::verifyGeneratedError(query, document, expectedError); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/matcher/match_expression_util.cpp b/src/mongo/db/matcher/match_expression_util.cpp new file mode 100644 index 00000000000..397bfa4cda5 --- /dev/null +++ b/src/mongo/db/matcher/match_expression_util.cpp @@ -0,0 +1,40 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/matcher/match_expression_util.h" + +namespace mongo::match_expression_util { +void advanceBy(size_t numberOfElements, BSONObjIterator& iterator) { + for (size_t i = 0; iterator.more() && i < numberOfElements; ++i) { + iterator.next(); + } +} +} // namespace mongo::match_expression_util
\ No newline at end of file diff --git a/src/mongo/db/matcher/match_expression_util.h b/src/mongo/db/matcher/match_expression_util.h new file mode 100644 index 00000000000..191c4152d4e --- /dev/null +++ b/src/mongo/db/matcher/match_expression_util.h @@ -0,0 +1,40 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" + +namespace mongo::match_expression_util { +/** + * Advances position of iterator 'iterator' by 'numberOfElements' elements but no more than the end + * of the object. + */ +void advanceBy(size_t numberOfElements, BSONObjIterator& iterator); +} // namespace mongo::match_expression_util diff --git a/src/mongo/db/matcher/match_expression_util_test.cpp b/src/mongo/db/matcher/match_expression_util_test.cpp new file mode 100644 index 00000000000..3ff67328c32 --- /dev/null +++ b/src/mongo/db/matcher/match_expression_util_test.cpp @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/matcher/match_expression_util.h" + +#include "mongo/unittest/unittest.h" + +namespace mongo::match_expression_util { +namespace { +TEST(BSONObjIteratorUtils, AdvanceBy) { + auto obj = BSON("a" << 1 << "b" << 2 << "c" << 3); + BSONObjIterator iter = BSONObjIterator(obj); + advanceBy(0, iter); + ASSERT_TRUE(iter.more()); + ASSERT_EQUALS(iter.next().fieldNameStringData(), "a"); + advanceBy(1, iter); + ASSERT_TRUE(iter.more()); + ASSERT_EQUALS(iter.next().fieldNameStringData(), "c"); + ASSERT_FALSE(iter.more()); + advanceBy(0, iter); + ASSERT_FALSE(iter.more()); + advanceBy(1, iter); + ASSERT_FALSE(iter.more()); +} +} // namespace +} // namespace mongo::match_expression_util diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.cpp index 1d83970844c..27b2adae68d 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.cpp @@ -39,16 +39,19 @@ constexpr StringData InternalSchemaAllElemMatchFromIndexMatchExpression::kName; InternalSchemaAllElemMatchFromIndexMatchExpression:: InternalSchemaAllElemMatchFromIndexMatchExpression( - StringData path, long long index, std::unique_ptr<ExpressionWithPlaceholder> expression) - : ArrayMatchingMatchExpression(MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX, - path), + StringData path, + long long index, + std::unique_ptr<ExpressionWithPlaceholder> expression, + clonable_ptr<ErrorAnnotation> annotation) + : ArrayMatchingMatchExpression( + MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX, path, std::move(annotation)), _index(index), _expression(std::move(expression)) {} std::unique_ptr<MatchExpression> InternalSchemaAllElemMatchFromIndexMatchExpression::shallowClone() const { auto clone = std::make_unique<InternalSchemaAllElemMatchFromIndexMatchExpression>( - path(), _index, _expression->shallowClone()); + path(), _index, _expression->shallowClone(), _errorAnnotation); if (getTag()) { clone->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h index 723c1b68084..58994d4f612 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h @@ -34,6 +34,7 @@ #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_with_placeholder.h" +#include "mongo/db/matcher/match_expression_util.h" namespace mongo { /** @@ -46,21 +47,32 @@ public: static constexpr StringData kName = "$_internalSchemaAllElemMatchFromIndex"_sd; InternalSchemaAllElemMatchFromIndexMatchExpression( - StringData path, long long index, std::unique_ptr<ExpressionWithPlaceholder> expression); + StringData path, + long long index, + std::unique_ptr<ExpressionWithPlaceholder> expression, + clonable_ptr<ErrorAnnotation> annotation = nullptr); std::unique_ptr<MatchExpression> shallowClone() const final; bool matchesArray(const BSONObj& array, MatchDetails* details) const final { + return !findFirstMismatchInArray(array, details); + } + + /** + * Finds the first element in the sub-array of array 'array' that the expression applies to that + * does not match the sub-expression. If such element does not exist, then returns empty (i.e. + * EOO) value. + */ + BSONElement findFirstMismatchInArray(const BSONObj& array, MatchDetails* details) const { auto iter = BSONObjIterator(array); - for (int i = 0; iter.more() && i < _index; i++) { - iter.next(); - } + match_expression_util::advanceBy(_index, iter); while (iter.more()) { - if (!_expression->matchesBSONElement(iter.next(), details)) { - return false; + auto element = iter.next(); + if (!_expression->matchesBSONElement(element, details)) { + return element; } } - return true; + return {}; } void debugString(StringBuilder& debug, int indentationLevel) const final; @@ -69,6 +81,13 @@ public: bool equivalent(const MatchExpression* other) const final; + /** + * Returns an index of the first element of the array this match expression applies to. + */ + long long startIndex() const { + return _index; + } + boost::optional<std::vector<MatchExpression*>&> getChildVector() final { return boost::none; } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index_test.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index_test.cpp index 44f43bf2832..49e1b3b0235 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index_test.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index_test.cpp @@ -109,6 +109,22 @@ TEST(InternalSchemaAllElemMatchFromIndexMatchExpression, HasSingleChild) { ASSERT(objMatch.getValue()->getChild(0)); } +TEST(InternalSchemaAllElemMatchFromIndexMatchExpression, FindsFirstMismatchInArray) { + auto query = fromjson("{a: {$_internalSchemaAllElemMatchFromIndex: [2, {a: {$lt: 5}}]}}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto expr = MatchExpressionParser::parse(query, expCtx); + ASSERT_OK(expr.getStatus()); + auto elemMatchExpr = dynamic_cast<const InternalSchemaAllElemMatchFromIndexMatchExpression*>( + expr.getValue().get()); + ASSERT(elemMatchExpr); + ASSERT_FALSE(elemMatchExpr->findFirstMismatchInArray(BSON("a" << BSON_ARRAY(1 << 2 << 3 << 4)), + nullptr)); + auto inputArray = BSON_ARRAY(1 << 2 << 3 << 3 << 6 << 7); + auto mismatchedElement = elemMatchExpr->findFirstMismatchInArray(inputArray, nullptr); + ASSERT_TRUE(mismatchedElement); + ASSERT_EQ(mismatchedElement.Int(), 6); +} + DEATH_TEST_REGEX(InternalSchemaAllElemMatchFromIndexMatchExpression, GetChildFailsIndexGreaterThanOne, "Invariant failure.*i == 0") { diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.cpp index b529fb9188a..a09ad27ff22 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.cpp @@ -35,8 +35,12 @@ namespace mongo { constexpr StringData InternalSchemaMatchArrayIndexMatchExpression::kName; InternalSchemaMatchArrayIndexMatchExpression::InternalSchemaMatchArrayIndexMatchExpression( - StringData path, long long index, std::unique_ptr<ExpressionWithPlaceholder> expression) - : ArrayMatchingMatchExpression(MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX, path), + StringData path, + long long index, + std::unique_ptr<ExpressionWithPlaceholder> expression, + clonable_ptr<ErrorAnnotation> annotation) + : ArrayMatchingMatchExpression( + MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX, path, std::move(annotation)), _index(index), _expression(std::move(expression)) { invariant(static_cast<bool>(_expression)); @@ -87,7 +91,7 @@ BSONObj InternalSchemaMatchArrayIndexMatchExpression::getSerializedRightHandSide std::unique_ptr<MatchExpression> InternalSchemaMatchArrayIndexMatchExpression::shallowClone() const { auto clone = std::make_unique<InternalSchemaMatchArrayIndexMatchExpression>( - path(), _index, _expression->shallowClone()); + path(), _index, _expression->shallowClone(), _errorAnnotation); if (getTag()) { clone->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.h b/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.h index a6c063f9244..05519e17ff3 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_match_array_index.h @@ -45,7 +45,10 @@ public: static constexpr StringData kName = "$_internalSchemaMatchArrayIndex"_sd; InternalSchemaMatchArrayIndexMatchExpression( - StringData path, long long index, std::unique_ptr<ExpressionWithPlaceholder> expression); + StringData path, + long long index, + std::unique_ptr<ExpressionWithPlaceholder> expression, + clonable_ptr<ErrorAnnotation> annotation = nullptr); void debugString(StringBuilder& debug, int indentationLevel) const final; @@ -94,6 +97,13 @@ public: visitor->visit(this); } + /** + * Returns an index of an array element this expression applies to. + */ + long long arrayIndex() const { + return _index; + } + private: ExpressionOptimizerFunc getOptimizer() const final; diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_max_items.h b/src/mongo/db/matcher/schema/expression_internal_schema_max_items.h index ce8cf2a2931..cbb4dc5da0e 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_max_items.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_max_items.h @@ -55,7 +55,8 @@ public: std::unique_ptr<MatchExpression> shallowClone() const final { std::unique_ptr<InternalSchemaMaxItemsMatchExpression> maxItems = - std::make_unique<InternalSchemaMaxItemsMatchExpression>(path(), numItems()); + std::make_unique<InternalSchemaMaxItemsMatchExpression>( + path(), numItems(), _errorAnnotation); if (getTag()) { maxItems->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_min_items.h b/src/mongo/db/matcher/schema/expression_internal_schema_min_items.h index 50d4768458b..c4f317e1b95 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_min_items.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_min_items.h @@ -55,7 +55,8 @@ public: std::unique_ptr<MatchExpression> shallowClone() const final { std::unique_ptr<InternalSchemaMinItemsMatchExpression> minItems = - std::make_unique<InternalSchemaMinItemsMatchExpression>(path(), numItems()); + std::make_unique<InternalSchemaMinItemsMatchExpression>( + path(), numItems(), _errorAnnotation); if (getTag()) { minItems->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.cpp index dd2f9e382ff..f78dca0c37f 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.cpp @@ -66,7 +66,8 @@ BSONObj InternalSchemaUniqueItemsMatchExpression::getSerializedRightHandSide() c } std::unique_ptr<MatchExpression> InternalSchemaUniqueItemsMatchExpression::shallowClone() const { - auto clone = std::make_unique<InternalSchemaUniqueItemsMatchExpression>(path()); + auto clone = + std::make_unique<InternalSchemaUniqueItemsMatchExpression>(path(), _errorAnnotation); if (getTag()) { clone->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.h b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.h index f1708b92cef..2bc5c2f1671 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.h +++ b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items.h @@ -47,8 +47,10 @@ class InternalSchemaUniqueItemsMatchExpression final : public ArrayMatchingMatch public: static constexpr StringData kName = "$_internalSchemaUniqueItems"_sd; - explicit InternalSchemaUniqueItemsMatchExpression(StringData path) - : ArrayMatchingMatchExpression(MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS, path) {} + explicit InternalSchemaUniqueItemsMatchExpression( + StringData path, clonable_ptr<ErrorAnnotation> annotation = nullptr) + : ArrayMatchingMatchExpression( + MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS, path, std::move(annotation)) {} size_t numChildren() const final { return 0; @@ -63,13 +65,17 @@ public: } bool matchesArray(const BSONObj& array, MatchDetails*) const final { + return !findFirstDuplicateValue(array); + } + + BSONElement findFirstDuplicateValue(const BSONObj& array) const { auto set = _comparator.makeBSONEltSet(); for (auto&& elem : array) { if (!std::get<bool>(set.insert(elem))) { - return false; + return elem; } } - return true; + return {}; } void debugString(StringBuilder& builder, int indentationLevel) const final; diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items_test.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items_test.cpp index e4412662b8f..7215499b78b 100644 --- a/src/mongo/db/matcher/schema/expression_internal_schema_unique_items_test.cpp +++ b/src/mongo/db/matcher/schema/expression_internal_schema_unique_items_test.cpp @@ -105,5 +105,15 @@ TEST(InternalSchemaUniqueItemsMatchExpression, AlwaysUsesBinaryComparisonRegardl ASSERT_TRUE(uniqueItems.matchesBSON(fromjson("{foo: [{x: 'two'}, {y: 'two'}]}"))); ASSERT_TRUE(uniqueItems.matchesBSON(fromjson("{foo: [{a: 'three'}, {a: 'THREE'}]}"))); } + +TEST(InternalSchemaUniqueItemsMatchExpression, FindsFirstDuplicateValue) { + InternalSchemaUniqueItemsMatchExpression uniqueItems(""); + auto inputArray = fromjson("[1, 2, 2, 1]}"); + auto result = uniqueItems.findFirstDuplicateValue(inputArray); + ASSERT_TRUE(result); + ASSERT_EQUALS(result.Int(), 2); + ASSERT_FALSE(uniqueItems.findFirstDuplicateValue(fromjson("[1, 2]}"))); + ASSERT_FALSE(uniqueItems.findFirstDuplicateValue(fromjson("[]}"))); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/matcher/schema/json_schema_parser.cpp b/src/mongo/db/matcher/schema/json_schema_parser.cpp index 16baae059db..2eec36b36a3 100644 --- a/src/mongo/db/matcher/schema/json_schema_parser.cpp +++ b/src/mongo/db/matcher/schema/json_schema_parser.cpp @@ -86,7 +86,7 @@ const std::set<StringData> unsupportedKeywords{ "id"_sd, }; -constexpr StringData kNamePlaceholder = "i"_sd; +constexpr StringData kNamePlaceholder = JSONSchemaParser::kNamePlaceholder; /** * Parses 'schema' to the semantically equivalent match expression. If the schema has an associated @@ -898,19 +898,22 @@ StatusWithMatchExpression parseUniqueItems(const boost::intrusive_ptr<Expression BSONElement uniqueItemsElt, StringData path, InternalSchemaTypeExpression* typeExpr) { + auto errorAnnotation = doc_validation_error::createAnnotation( + expCtx, uniqueItemsElt.fieldNameStringData().toString(), uniqueItemsElt.wrap()); if (!uniqueItemsElt.isBoolean()) { return {ErrorCodes::TypeMismatch, str::stream() << "$jsonSchema keyword '" << JSONSchemaParser::kSchemaUniqueItemsKeyword << "' must be a boolean"}; } else if (path.empty()) { - return {std::make_unique<AlwaysTrueMatchExpression>()}; + return {std::make_unique<AlwaysTrueMatchExpression>(std::move(errorAnnotation))}; } else if (uniqueItemsElt.boolean()) { - auto uniqueItemsExpr = std::make_unique<InternalSchemaUniqueItemsMatchExpression>(path); + auto uniqueItemsExpr = std::make_unique<InternalSchemaUniqueItemsMatchExpression>( + path, std::move(errorAnnotation)); return makeRestriction(expCtx, BSONType::Array, path, std::move(uniqueItemsExpr), typeExpr); } - return {std::make_unique<AlwaysTrueMatchExpression>()}; + return {std::make_unique<AlwaysTrueMatchExpression>(std::move(errorAnnotation))}; } /** @@ -928,8 +931,12 @@ StatusWith<boost::optional<long long>> parseItems( boost::optional<long long> startIndexForAdditionalItems; if (itemsElt.type() == BSONType::Array) { // When "items" is an array, generate match expressions for each subschema for each position - // in the array, which are bundled together in an AndMatchExpression. - auto andExprForSubschemas = std::make_unique<AndMatchExpression>(); + // in the array, which are bundled together in an AndMatchExpression. Annotate the + // AndMatchExpression with the 'items' operator name, since it logically corresponds to the + // user visible JSON Schema "items" keyword. + auto andExprForSubschemas = + std::make_unique<AndMatchExpression>(doc_validation_error::createAnnotation( + expCtx, itemsElt.fieldNameStringData().toString(), itemsElt.wrap())); auto index = 0LL; for (auto subschema : itemsElt.embeddedObject()) { if (subschema.type() != BSONType::Object) { @@ -954,14 +961,24 @@ StatusWith<boost::optional<long long>> parseItems( auto exprWithPlaceholder = std::make_unique<ExpressionWithPlaceholder>( kNamePlaceholder.toString(), std::move(parsedSubschema.getValue())); auto matchArrayIndex = std::make_unique<InternalSchemaMatchArrayIndexMatchExpression>( - path, index, std::move(exprWithPlaceholder)); + path, + index, + std::move(exprWithPlaceholder), + doc_validation_error::createAnnotation( + expCtx, + "" /* 'andExprForSubschemas' carries the operator name, not this expression */, + BSONObj())); andExprForSubschemas->add(matchArrayIndex.release()); ++index; } startIndexForAdditionalItems = index; if (path.empty()) { - andExpr->add(std::make_unique<AlwaysTrueMatchExpression>().release()); + andExpr->add( + std::make_unique<AlwaysTrueMatchExpression>( + doc_validation_error::createAnnotation( + expCtx, itemsElt.fieldNameStringData().toString(), itemsElt.wrap())) + .release()); } else { andExpr->add( makeRestriction( @@ -983,13 +1000,19 @@ StatusWith<boost::optional<long long>> parseItems( auto exprWithPlaceholder = std::make_unique<ExpressionWithPlaceholder>( kNamePlaceholder.toString(), std::move(nestedItemsSchema.getValue())); + auto errorAnnotation = doc_validation_error::createAnnotation( + expCtx, itemsElt.fieldNameStringData().toString(), itemsElt.wrap()); if (path.empty()) { - andExpr->add(std::make_unique<AlwaysTrueMatchExpression>().release()); + andExpr->add( + std::make_unique<AlwaysTrueMatchExpression>(std::move(errorAnnotation)).release()); } else { constexpr auto startIndexForItems = 0LL; auto allElemMatch = std::make_unique<InternalSchemaAllElemMatchFromIndexMatchExpression>( - path, startIndexForItems, std::move(exprWithPlaceholder)); + path, + startIndexForItems, + std::move(exprWithPlaceholder), + std::move(errorAnnotation)); andExpr->add( makeRestriction(expCtx, BSONType::Array, path, std::move(allElemMatch), typeExpr) .release()); @@ -1014,12 +1037,18 @@ Status parseAdditionalItems(const boost::intrusive_ptr<ExpressionContext>& expCt std::unique_ptr<ExpressionWithPlaceholder> otherwiseExpr; if (additionalItemsElt.type() == BSONType::Bool) { const auto emptyPlaceholder = boost::none; + // Ignore the expression, since InternalSchemaAllElemMatchFromIndexMatchExpression reports + // the details in this case. + auto errorAnnotation = + doc_validation_error::createAnnotation(expCtx, AnnotationMode::kIgnore); if (additionalItemsElt.boolean()) { otherwiseExpr = std::make_unique<ExpressionWithPlaceholder>( - emptyPlaceholder, std::make_unique<AlwaysTrueMatchExpression>()); + emptyPlaceholder, + std::make_unique<AlwaysTrueMatchExpression>(std::move(errorAnnotation))); } else { otherwiseExpr = std::make_unique<ExpressionWithPlaceholder>( - emptyPlaceholder, std::make_unique<AlwaysFalseMatchExpression>()); + emptyPlaceholder, + std::make_unique<AlwaysFalseMatchExpression>(std::move(errorAnnotation))); } } else if (additionalItemsElt.type() == BSONType::Object) { auto parsedOtherwiseExpr = _parse(expCtx, @@ -1042,12 +1071,18 @@ Status parseAdditionalItems(const boost::intrusive_ptr<ExpressionContext>& expCt // Only generate a match expression if needed. if (startIndexForAdditionalItems) { + auto errorAnnotation = doc_validation_error::createAnnotation( + expCtx, additionalItemsElt.fieldNameStringData().toString(), additionalItemsElt.wrap()); if (path.empty()) { - andExpr->add(std::make_unique<AlwaysTrueMatchExpression>().release()); + andExpr->add( + std::make_unique<AlwaysTrueMatchExpression>(std::move(errorAnnotation)).release()); } else { auto allElemMatch = std::make_unique<InternalSchemaAllElemMatchFromIndexMatchExpression>( - path, *startIndexForAdditionalItems, std::move(otherwiseExpr)); + path, + *startIndexForAdditionalItems, + std::move(otherwiseExpr), + std::move(errorAnnotation)); andExpr->add( makeRestriction(expCtx, BSONType::Array, path, std::move(allElemMatch), typeExpr) .release()); diff --git a/src/mongo/db/matcher/schema/json_schema_parser.h b/src/mongo/db/matcher/schema/json_schema_parser.h index b1fd403dac4..62bc46a7343 100644 --- a/src/mongo/db/matcher/schema/json_schema_parser.h +++ b/src/mongo/db/matcher/schema/json_schema_parser.h @@ -83,6 +83,9 @@ public: static constexpr StringData kSchemaEncryptKeyword = "encrypt"_sd; static constexpr StringData kSchemaEncryptMetadataKeyword = "encryptMetadata"_sd; + // A name of placeholder used in ExpressionWithPlaceholder expressions. + static constexpr StringData kNamePlaceholder = "i"_sd; + /** * Converts a JSON schema, represented as BSON, into a semantically equivalent match expression * tree. Returns a non-OK status if the schema is invalid or cannot be parsed. |