summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Hausman <richard.hausman@mongodb.com>2022-08-20 01:10:31 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-20 01:45:22 +0000
commit3f777924c271f91fdc9a878a2e70cc4f068d9014 (patch)
treed59fa770192347fb01073b057780bda7b1b4390d
parenta96f1436e02feea955565ac0ea39f8b56f087639 (diff)
downloadmongo-3f777924c271f91fdc9a878a2e70cc4f068d9014.tar.gz
SERVER-67521 Check for duplicate field names in BSON documents during validation
-rw-r--r--jstests/disk/validate_bson_inconsistency.js11
-rw-r--r--src/mongo/bson/bson_bm.cpp24
-rw-r--r--src/mongo/bson/bson_validate.cpp48
-rw-r--r--src/mongo/bson/bson_validate_test.cpp26
4 files changed, 102 insertions, 7 deletions
diff --git a/jstests/disk/validate_bson_inconsistency.js b/jstests/disk/validate_bson_inconsistency.js
index e2d7675da90..91e406f6f25 100644
--- a/jstests/disk/validate_bson_inconsistency.js
+++ b/jstests/disk/validate_bson_inconsistency.js
@@ -31,9 +31,20 @@ resetDbpath(dbpath);
mongod = startMongodOnExistingPath(dbpath);
db = mongod.getDB(baseName);
testColl = db[collName];
+ testColl.insert({a: 1, b: 2, c: {b: 3}, d: {a: [2, 3, 4], b: {a: 2}}});
+ testColl.insert({a: 1, b: 1});
+ // Warnings should be triggered iff checkBSONConsistency is set to true.
let res = assert.commandWorked(testColl.validate());
assert(res.valid, tojson(res));
+ assert.eq(res.nNonCompliantDocuments, 0);
+ assert.eq(res.warnings.length, 0);
+
+ res = assert.commandWorked(testColl.validate({checkBSONConsistency: true}));
+ assert(res.valid, tojson(res));
+ assert.eq(res.nNonCompliantDocuments, numDocs);
+ assert.eq(res.warnings.length, 1);
+
MongoRunner.stopMongod(mongod, null, {skipValidation: true});
})();
diff --git a/src/mongo/bson/bson_bm.cpp b/src/mongo/bson/bson_bm.cpp
index f4a1064dbac..29d12bbd6d3 100644
--- a/src/mongo/bson/bson_bm.cpp
+++ b/src/mongo/bson/bson_bm.cpp
@@ -114,8 +114,32 @@ void BM_validate(benchmark::State& state) {
state.SetBytesProcessed(totalSize);
}
+void BM_validate_contents(benchmark::State& state) {
+ BSONArrayBuilder builder;
+ auto len = state.range(0);
+ size_t totalSize = 0;
+ for (auto j = 0; j < len; j++)
+ builder.append(buildSampleObj(j));
+ BSONObj array = builder.done();
+
+ const auto& elem = array[0].Obj();
+ auto status = validateBSON(elem.objdata(), elem.objsize(), BSONValidateMode::kFull);
+ if (!status.isOK())
+ LOGV2(6752100, "Validate failed", "elem"_attr = elem, "status"_attr = status);
+ invariant(status.isOK());
+
+ for (auto _ : state) {
+ benchmark::ClobberMemory();
+ benchmark::DoNotOptimize(
+ validateBSON(array.objdata(), array.objsize(), BSONValidateMode::kFull));
+ totalSize += array.objsize();
+ }
+ state.SetBytesProcessed(totalSize);
+}
+
BENCHMARK(BM_arrayBuilder)->Ranges({{{1}, {100'000}}});
BENCHMARK(BM_arrayLookup)->Ranges({{{1}, {100'000}}});
BENCHMARK(BM_validate)->Ranges({{{1}, {1'000}}});
+BENCHMARK(BM_validate_contents)->Ranges({{{1}, {1'000}}});
} // namespace mongo
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index f69700696b1..370dc14dac6 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -122,7 +122,7 @@ public:
_checkRegexOptions(options);
break;
}
- case BSONType::BinData:
+ case BSONType::BinData: {
uint8_t subtype =
ConstDataView(ptr + sizeof(uint32_t)).read<LittleEndian<uint8_t>>();
switch (subtype) {
@@ -153,6 +153,7 @@ public:
}
}
break;
+ }
}
}
@@ -223,8 +224,17 @@ protected:
class FullValidator : private ExtendedValidator {
public:
void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) {
+ registerFieldName(ptr + 1);
ExtendedValidator::checkNonConformantElem(ptr, offsetToValue, type);
switch (type) {
+ case BSONType::Array: {
+ objFrames.push_back({std::vector<std::string>(), false});
+ break;
+ }
+ case BSONType::Object: {
+ objFrames.push_back({std::vector<std::string>(), true});
+ break;
+ };
case BSONType::BinData: {
uint8_t subtype = ConstDataView(ptr + offsetToValue + sizeof(uint32_t))
.read<LittleEndian<uint8_t>>();
@@ -245,12 +255,37 @@ public:
}
}
- void checkUTF8Char() {}
-
- void checkDuplicateFieldName() {}
+ void checkDuplicateFieldName() {
+ invariant(!objFrames.empty());
+ auto& curr = objFrames.back().first;
+ // If curr is not an object frame, it will always be empty, so no need to check.
+ if (curr.empty()) {
+ objFrames.pop_back();
+ return;
+ }
+ invariant(objFrames.back().second);
+ std::sort(curr.begin(), curr.end());
+ auto duplicate = std::adjacent_find(curr.begin(), curr.end());
+ uassert(NonConformantBSON,
+ fmt::format("A BSON document contains a duplicate field name : {}", *duplicate),
+ duplicate == curr.end());
+ objFrames.pop_back();
+ }
void popLevel() {
ExtendedValidator::popLevel();
+ checkDuplicateFieldName();
+ }
+
+private:
+ // A given frame is an object if and only if frame.second == true.
+ std::vector<std::pair<std::vector<std::string>, bool>> objFrames = {
+ {std::vector<std::string>(), true}};
+
+ void registerFieldName(std::string str) {
+ if (objFrames.back().second) {
+ objFrames.back().first.emplace_back(str);
+ };
}
};
@@ -354,10 +389,10 @@ private:
}
bool _popFrame() {
- _validator.popLevel();
if (_currFrame == _frames.begin())
return false;
--_currFrame;
+ _validator.popLevel();
return true;
}
@@ -453,6 +488,9 @@ private:
uassert(InvalidBSON, "incorrect BSON length", ++cursor.ptr == _currFrame->end);
_maybePopCodeWithScope(cursor);
} while (_popFrame()); // Finished when there are no frames left.
+
+ // Check the top level field names.
+ _validator.checkDuplicateFieldName();
}
/**
diff --git a/src/mongo/bson/bson_validate_test.cpp b/src/mongo/bson/bson_validate_test.cpp
index 1c9d7b0d046..ad097ffbc43 100644
--- a/src/mongo/bson/bson_validate_test.cpp
+++ b/src/mongo/bson/bson_validate_test.cpp
@@ -309,12 +309,12 @@ TEST(BSONValidateExtended, BSONArrayIndexes) {
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
x1 = BSON("validNestedArraysAndObjects"
- << BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "1"
+ << BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "3"
<< BSONArray(BSON("0"
<< "a"
<< "1"
<< "b"))
- << "1"
+ << "4"
<< "b")))));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateMode::kExtended));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateMode::kFull));
@@ -659,6 +659,28 @@ TEST(BSONValidateExtended, DeprecatedTypes) {
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
}
+TEST(BSONValidateExtended, DuplicateFieldNames) {
+ std::pair<Status, Status> stats{Status::OK(), Status::OK()};
+ auto fullyValidate = [&](BSONObj obj) {
+ return std::pair{validateBSON(obj.objdata(), obj.objsize(), BSONValidateMode::kExtended),
+ validateBSON(obj.objdata(), obj.objsize(), BSONValidateMode::kFull)};
+ };
+ BSONObj x = BSON("a" << 1 << "b" << 2);
+ stats = fullyValidate(x);
+ ASSERT_OK(stats.first);
+ ASSERT_OK(stats.second);
+
+ x = BSON("a" << 1 << "b" << 1 << "a" << 3);
+ stats = fullyValidate(x);
+ ASSERT_OK(stats.first);
+ ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON);
+
+ x = BSON("a" << 1 << "b" << BSON("a" << 1 << "b" << BSON("a" << 1) << "a" << 3) << "c" << 3);
+ stats = fullyValidate(x);
+ ASSERT_OK(stats.first);
+ ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON);
+}
+
TEST(BSONValidateExtended, BSONColumn) {
BSONColumnBuilder cb("example"_sd);
cb.append(BSON("a"