diff options
author | Yuhong Zhang <yuhong.zhang@mongodb.com> | 2022-07-26 00:34:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-26 01:33:46 +0000 |
commit | 541802d0bf8547f1d93e909858f884b6f83c8f42 (patch) | |
tree | ed1a7b3ec3df29392ae9d2970d1e1b8541cace89 | |
parent | b8c57455bd0dd2ffea40e2b4f6e05c84f1ef9d6d (diff) | |
download | mongo-541802d0bf8547f1d93e909858f884b6f83c8f42.tar.gz |
SERVER-67561 Create the interface to enable `validateBSON()` to run more thorough checks
-rw-r--r-- | src/mongo/bson/bson_validate.cpp | 69 | ||||
-rw-r--r-- | src/mongo/bson/bson_validate.h | 27 | ||||
-rw-r--r-- | src/mongo/db/catalog/validate_adaptor.cpp | 5 |
3 files changed, 89 insertions, 12 deletions
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp index 8725dfbc645..5bfbe2f4865 100644 --- a/src/mongo/bson/bson_validate.cpp +++ b/src/mongo/bson/bson_validate.cpp @@ -27,12 +27,13 @@ * it in the license file. */ +#include "mongo/bson/bson_validate.h" + #include <cstring> #include <vector> #include "mongo/base/data_view.h" #include "mongo/bson/bson_depth.h" -#include "mongo/bson/bson_validate.h" #include "mongo/bson/bsonelement.h" #include "mongo/logv2/log.h" @@ -82,10 +83,42 @@ MONGO_STATIC_ASSERT(sizeof(kTypeInfoTable) == 32); constexpr ErrorCodes::Error InvalidBSON = ErrorCodes::InvalidBSON; -template <bool precise> +class DefaultValidator { +public: + void checkNonConformantElem(const char* ptr, uint8_t type) {} + + void checkUTF8Char() {} + + void checkDuplicateFieldName() {} +}; + +class ExtendedValidator { +public: + void checkNonConformantElem(const char* ptr, uint8_t type) { + // TODO: Add checks for different BSON types. + } + + void checkUTF8Char() {} + + void checkDuplicateFieldName() {} +}; + +class FullValidator : public ExtendedValidator { +public: + void checkNonConformantElem(const char* ptr, uint8_t type) { + // TODO: Add checks for different BSON types. + } + + void checkUTF8Char() {} + + void checkDuplicateFieldName() {} +}; + +template <bool precise, typename BSONValidator> class ValidateBuffer { public: - ValidateBuffer(const char* data, uint64_t maxLength) : _data(data), _maxLength(maxLength) { + ValidateBuffer(const char* data, uint64_t maxLength, BSONValidator validator) + : _data(data), _maxLength(maxLength), _validator(validator) { if constexpr (precise) _frames.resize(BSONDepth::getMaxAllowableDepth() + 1); } @@ -264,6 +297,10 @@ private: cursor.ptr += len + 1; cursor.ptr = _validateElem(cursor, type); + // Check if the data is compliant to other BSON specifications if the element is + // structurally correct. + _validator.checkNonConformantElem(_currElem + len + 1, type); + if constexpr (precise) { // See if the _id field was just validated. If so, set the global scope element. if (_currFrame == _frames.begin() && StringData(_currElem + 1) == "_id"_sd) @@ -304,16 +341,34 @@ private: const char* _currElem = nullptr; // Element to validate: only the name is known to be good. typename Frames::iterator _currFrame; // Frame currently being validated. Frames _frames; // Has end pointers to check and the containing element for precise mode. + BSONValidator _validator; }; -} // namespace -Status validateBSON(const char* originalBuffer, uint64_t maxLength) noexcept { +template <typename BSONValidator> +Status _doValidate(const char* originalBuffer, uint64_t maxLength, BSONValidator validator) { // First try validating using the fast but less precise version. That version will return // a not-OK status for objects with CodeWScope or nesting exceeding 32 levels. These cases and // actual failures will rerun the precise version that gives a detailed error context. - if (MONGO_likely(ValidateBuffer<false>(originalBuffer, maxLength).validate().isOK())) + if (MONGO_likely((ValidateBuffer<false, BSONValidator>(originalBuffer, maxLength, validator) + .validate() + .isOK()))) return Status::OK(); - return ValidateBuffer<true>(originalBuffer, maxLength).validate(); + return ValidateBuffer<true, BSONValidator>(originalBuffer, maxLength, validator).validate(); +} +} // namespace + +Status validateBSON(const char* originalBuffer, + uint64_t maxLength, + BSONValidateMode mode) noexcept { + if (MONGO_likely(mode == BSONValidateMode::kDefault)) + return _doValidate(originalBuffer, maxLength, DefaultValidator()); + else if (mode == BSONValidateMode::kExtended) + return _doValidate(originalBuffer, maxLength, ExtendedValidator()); + else if (mode == BSONValidateMode::kFull) + return ValidateBuffer<true, FullValidator>(originalBuffer, maxLength, FullValidator()) + .validate(); + else + MONGO_UNREACHABLE; } } // namespace mongo diff --git a/src/mongo/bson/bson_validate.h b/src/mongo/bson/bson_validate.h index e169f852754..a748d372d39 100644 --- a/src/mongo/bson/bson_validate.h +++ b/src/mongo/bson/bson_validate.h @@ -36,18 +36,37 @@ namespace mongo { +enum class BSONValidateMode { + // Only fast structural BSON consistency checks. + kDefault, + // Structural BSON consistency and extra fast checks on BSON specifications. + kExtended, + // Structural BSON consistency and extra comprehensive checks on BSON specifications. + kFull, +}; + /** * Checks that the buf holds a BSON object as defined in http://bsonspec.org/spec.html. * Note that maxLength is the buffer size, NOT the BSON size. * Validation errors result in returning an InvalidBSON or Overflow status. - * The checks are structural only, and include: + * For the default validation mode, the checks are structural only, and include: * - String, Object, Array, BinData, DBRef, Code, Symbol and CodeWScope lengths are correct. * - Field names, String, Object, Array, DBRef, Code, Symbol, and CodeWScope end with NUL. * - Bool values are false (0) or true (1). * - Correct nesting, not exceeding maximum allowable nesting depth. - * They do not include validity of UTF-8 strings, contents of array indices, regular expression - * validity, code validity, correct length and formatting of binary subtypes, etc. + * For the extended validation mode, the checks include everything above and: + * - Deprecated types are not used. + * - Contents of array indices are consecutively numbered from zero. + * - Correct UUID and MD5 lengths. + * - Structurally correct encrypted BSON values. + * - Valid regular expression options. + * For the full validation mode, the checks include everything above and: + * - Field names are not duplicated in the same level. + * - Validity of UTF-8 strings. + * - Valid compressed BSON columns. * Length is only limited by the buffer's maxLength and the inherent 2GB - 1 format limitation. */ -Status validateBSON(const char* buf, uint64_t maxLength) noexcept; +Status validateBSON(const char* buf, + uint64_t maxLength, + BSONValidateMode mode = BSONValidateMode::kDefault) noexcept; } // namespace mongo diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index e4e03057066..473ba784fb7 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -326,7 +326,10 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, const RecordData& record, size_t* dataSize, ValidateResults* results) { - const Status status = validateBSON(record.data(), record.size()); + auto validateBSONMode = _validateState->isCheckingBSONConsistencies() + ? BSONValidateMode::kFull + : BSONValidateMode::kExtended; + const Status status = validateBSON(record.data(), record.size(), validateBSONMode); if (!status.isOK()) return status; |