summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuhong Zhang <yuhong.zhang@mongodb.com>2022-07-26 00:34:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-26 01:33:46 +0000
commit541802d0bf8547f1d93e909858f884b6f83c8f42 (patch)
treeed1a7b3ec3df29392ae9d2970d1e1b8541cace89
parentb8c57455bd0dd2ffea40e2b4f6e05c84f1ef9d6d (diff)
downloadmongo-541802d0bf8547f1d93e909858f884b6f83c8f42.tar.gz
SERVER-67561 Create the interface to enable `validateBSON()` to run more thorough checks
-rw-r--r--src/mongo/bson/bson_validate.cpp69
-rw-r--r--src/mongo/bson/bson_validate.h27
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp5
3 files changed, 89 insertions, 12 deletions
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index 8725dfbc645..5bfbe2f4865 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -27,12 +27,13 @@
* it in the license file.
*/
+#include "mongo/bson/bson_validate.h"
+
#include <cstring>
#include <vector>
#include "mongo/base/data_view.h"
#include "mongo/bson/bson_depth.h"
-#include "mongo/bson/bson_validate.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/logv2/log.h"
@@ -82,10 +83,42 @@ MONGO_STATIC_ASSERT(sizeof(kTypeInfoTable) == 32);
constexpr ErrorCodes::Error InvalidBSON = ErrorCodes::InvalidBSON;
-template <bool precise>
+class DefaultValidator {
+public:
+ void checkNonConformantElem(const char* ptr, uint8_t type) {}
+
+ void checkUTF8Char() {}
+
+ void checkDuplicateFieldName() {}
+};
+
+class ExtendedValidator {
+public:
+ void checkNonConformantElem(const char* ptr, uint8_t type) {
+ // TODO: Add checks for different BSON types.
+ }
+
+ void checkUTF8Char() {}
+
+ void checkDuplicateFieldName() {}
+};
+
+class FullValidator : public ExtendedValidator {
+public:
+ void checkNonConformantElem(const char* ptr, uint8_t type) {
+ // TODO: Add checks for different BSON types.
+ }
+
+ void checkUTF8Char() {}
+
+ void checkDuplicateFieldName() {}
+};
+
+template <bool precise, typename BSONValidator>
class ValidateBuffer {
public:
- ValidateBuffer(const char* data, uint64_t maxLength) : _data(data), _maxLength(maxLength) {
+ ValidateBuffer(const char* data, uint64_t maxLength, BSONValidator validator)
+ : _data(data), _maxLength(maxLength), _validator(validator) {
if constexpr (precise)
_frames.resize(BSONDepth::getMaxAllowableDepth() + 1);
}
@@ -264,6 +297,10 @@ private:
cursor.ptr += len + 1;
cursor.ptr = _validateElem(cursor, type);
+ // Check if the data is compliant to other BSON specifications if the element is
+ // structurally correct.
+ _validator.checkNonConformantElem(_currElem + len + 1, type);
+
if constexpr (precise) {
// See if the _id field was just validated. If so, set the global scope element.
if (_currFrame == _frames.begin() && StringData(_currElem + 1) == "_id"_sd)
@@ -304,16 +341,34 @@ private:
const char* _currElem = nullptr; // Element to validate: only the name is known to be good.
typename Frames::iterator _currFrame; // Frame currently being validated.
Frames _frames; // Has end pointers to check and the containing element for precise mode.
+ BSONValidator _validator;
};
-} // namespace
-Status validateBSON(const char* originalBuffer, uint64_t maxLength) noexcept {
+template <typename BSONValidator>
+Status _doValidate(const char* originalBuffer, uint64_t maxLength, BSONValidator validator) {
// First try validating using the fast but less precise version. That version will return
// a not-OK status for objects with CodeWScope or nesting exceeding 32 levels. These cases and
// actual failures will rerun the precise version that gives a detailed error context.
- if (MONGO_likely(ValidateBuffer<false>(originalBuffer, maxLength).validate().isOK()))
+ if (MONGO_likely((ValidateBuffer<false, BSONValidator>(originalBuffer, maxLength, validator)
+ .validate()
+ .isOK())))
return Status::OK();
- return ValidateBuffer<true>(originalBuffer, maxLength).validate();
+ return ValidateBuffer<true, BSONValidator>(originalBuffer, maxLength, validator).validate();
+}
+} // namespace
+
+Status validateBSON(const char* originalBuffer,
+ uint64_t maxLength,
+ BSONValidateMode mode) noexcept {
+ if (MONGO_likely(mode == BSONValidateMode::kDefault))
+ return _doValidate(originalBuffer, maxLength, DefaultValidator());
+ else if (mode == BSONValidateMode::kExtended)
+ return _doValidate(originalBuffer, maxLength, ExtendedValidator());
+ else if (mode == BSONValidateMode::kFull)
+ return ValidateBuffer<true, FullValidator>(originalBuffer, maxLength, FullValidator())
+ .validate();
+ else
+ MONGO_UNREACHABLE;
}
} // namespace mongo
diff --git a/src/mongo/bson/bson_validate.h b/src/mongo/bson/bson_validate.h
index e169f852754..a748d372d39 100644
--- a/src/mongo/bson/bson_validate.h
+++ b/src/mongo/bson/bson_validate.h
@@ -36,18 +36,37 @@
namespace mongo {
+enum class BSONValidateMode {
+ // Only fast structural BSON consistency checks.
+ kDefault,
+ // Structural BSON consistency and extra fast checks on BSON specifications.
+ kExtended,
+ // Structural BSON consistency and extra comprehensive checks on BSON specifications.
+ kFull,
+};
+
/**
* Checks that the buf holds a BSON object as defined in http://bsonspec.org/spec.html.
* Note that maxLength is the buffer size, NOT the BSON size.
* Validation errors result in returning an InvalidBSON or Overflow status.
- * The checks are structural only, and include:
+ * For the default validation mode, the checks are structural only, and include:
* - String, Object, Array, BinData, DBRef, Code, Symbol and CodeWScope lengths are correct.
* - Field names, String, Object, Array, DBRef, Code, Symbol, and CodeWScope end with NUL.
* - Bool values are false (0) or true (1).
* - Correct nesting, not exceeding maximum allowable nesting depth.
- * They do not include validity of UTF-8 strings, contents of array indices, regular expression
- * validity, code validity, correct length and formatting of binary subtypes, etc.
+ * For the extended validation mode, the checks include everything above and:
+ * - Deprecated types are not used.
+ * - Contents of array indices are consecutively numbered from zero.
+ * - Correct UUID and MD5 lengths.
+ * - Structurally correct encrypted BSON values.
+ * - Valid regular expression options.
+ * For the full validation mode, the checks include everything above and:
+ * - Field names are not duplicated in the same level.
+ * - Validity of UTF-8 strings.
+ * - Valid compressed BSON columns.
* Length is only limited by the buffer's maxLength and the inherent 2GB - 1 format limitation.
*/
-Status validateBSON(const char* buf, uint64_t maxLength) noexcept;
+Status validateBSON(const char* buf,
+ uint64_t maxLength,
+ BSONValidateMode mode = BSONValidateMode::kDefault) noexcept;
} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index e4e03057066..473ba784fb7 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -326,7 +326,10 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
const RecordData& record,
size_t* dataSize,
ValidateResults* results) {
- const Status status = validateBSON(record.data(), record.size());
+ auto validateBSONMode = _validateState->isCheckingBSONConsistencies()
+ ? BSONValidateMode::kFull
+ : BSONValidateMode::kExtended;
+ const Status status = validateBSON(record.data(), record.size(), validateBSONMode);
if (!status.isOK())
return status;