diff options
author | Richard Hausman <richard.hausman@mongodb.com> | 2022-08-20 01:10:31 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-20 01:45:22 +0000 |
commit | 3f777924c271f91fdc9a878a2e70cc4f068d9014 (patch) | |
tree | d59fa770192347fb01073b057780bda7b1b4390d | |
parent | a96f1436e02feea955565ac0ea39f8b56f087639 (diff) | |
download | mongo-3f777924c271f91fdc9a878a2e70cc4f068d9014.tar.gz |
SERVER-67521 Check for duplicate field names in BSON documents during validation
-rw-r--r-- | jstests/disk/validate_bson_inconsistency.js | 11 | ||||
-rw-r--r-- | src/mongo/bson/bson_bm.cpp | 24 | ||||
-rw-r--r-- | src/mongo/bson/bson_validate.cpp | 48 | ||||
-rw-r--r-- | src/mongo/bson/bson_validate_test.cpp | 26 |
4 files changed, 102 insertions, 7 deletions
diff --git a/jstests/disk/validate_bson_inconsistency.js b/jstests/disk/validate_bson_inconsistency.js index e2d7675da90..91e406f6f25 100644 --- a/jstests/disk/validate_bson_inconsistency.js +++ b/jstests/disk/validate_bson_inconsistency.js @@ -31,9 +31,20 @@ resetDbpath(dbpath); mongod = startMongodOnExistingPath(dbpath); db = mongod.getDB(baseName); testColl = db[collName]; + testColl.insert({a: 1, b: 2, c: {b: 3}, d: {a: [2, 3, 4], b: {a: 2}}}); + testColl.insert({a: 1, b: 1}); + // Warnings should be triggered iff checkBSONConsistency is set to true. let res = assert.commandWorked(testColl.validate()); assert(res.valid, tojson(res)); + assert.eq(res.nNonCompliantDocuments, 0); + assert.eq(res.warnings.length, 0); + + res = assert.commandWorked(testColl.validate({checkBSONConsistency: true})); + assert(res.valid, tojson(res)); + assert.eq(res.nNonCompliantDocuments, numDocs); + assert.eq(res.warnings.length, 1); + MongoRunner.stopMongod(mongod, null, {skipValidation: true}); })(); diff --git a/src/mongo/bson/bson_bm.cpp b/src/mongo/bson/bson_bm.cpp index f4a1064dbac..29d12bbd6d3 100644 --- a/src/mongo/bson/bson_bm.cpp +++ b/src/mongo/bson/bson_bm.cpp @@ -114,8 +114,32 @@ void BM_validate(benchmark::State& state) { state.SetBytesProcessed(totalSize); } +void BM_validate_contents(benchmark::State& state) { + BSONArrayBuilder builder; + auto len = state.range(0); + size_t totalSize = 0; + for (auto j = 0; j < len; j++) + builder.append(buildSampleObj(j)); + BSONObj array = builder.done(); + + const auto& elem = array[0].Obj(); + auto status = validateBSON(elem.objdata(), elem.objsize(), BSONValidateMode::kFull); + if (!status.isOK()) + LOGV2(6752100, "Validate failed", "elem"_attr = elem, "status"_attr = status); + invariant(status.isOK()); + + for (auto _ : state) { + benchmark::ClobberMemory(); + benchmark::DoNotOptimize( + validateBSON(array.objdata(), array.objsize(), BSONValidateMode::kFull)); + totalSize += array.objsize(); + } + state.SetBytesProcessed(totalSize); +} + BENCHMARK(BM_arrayBuilder)->Ranges({{{1}, {100'000}}}); BENCHMARK(BM_arrayLookup)->Ranges({{{1}, {100'000}}}); BENCHMARK(BM_validate)->Ranges({{{1}, {1'000}}}); +BENCHMARK(BM_validate_contents)->Ranges({{{1}, {1'000}}}); } // namespace mongo diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp index f69700696b1..370dc14dac6 100644 --- a/src/mongo/bson/bson_validate.cpp +++ b/src/mongo/bson/bson_validate.cpp @@ -122,7 +122,7 @@ public: _checkRegexOptions(options); break; } - case BSONType::BinData: + case BSONType::BinData: { uint8_t subtype = ConstDataView(ptr + sizeof(uint32_t)).read<LittleEndian<uint8_t>>(); switch (subtype) { @@ -153,6 +153,7 @@ public: } } break; + } } } @@ -223,8 +224,17 @@ protected: class FullValidator : private ExtendedValidator { public: void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) { + registerFieldName(ptr + 1); ExtendedValidator::checkNonConformantElem(ptr, offsetToValue, type); switch (type) { + case BSONType::Array: { + objFrames.push_back({std::vector<std::string>(), false}); + break; + } + case BSONType::Object: { + objFrames.push_back({std::vector<std::string>(), true}); + break; + }; case BSONType::BinData: { uint8_t subtype = ConstDataView(ptr + offsetToValue + sizeof(uint32_t)) .read<LittleEndian<uint8_t>>(); @@ -245,12 +255,37 @@ public: } } - void checkUTF8Char() {} - - void checkDuplicateFieldName() {} + void checkDuplicateFieldName() { + invariant(!objFrames.empty()); + auto& curr = objFrames.back().first; + // If curr is not an object frame, it will always be empty, so no need to check. + if (curr.empty()) { + objFrames.pop_back(); + return; + } + invariant(objFrames.back().second); + std::sort(curr.begin(), curr.end()); + auto duplicate = std::adjacent_find(curr.begin(), curr.end()); + uassert(NonConformantBSON, + fmt::format("A BSON document contains a duplicate field name : {}", *duplicate), + duplicate == curr.end()); + objFrames.pop_back(); + } void popLevel() { ExtendedValidator::popLevel(); + checkDuplicateFieldName(); + } + +private: + // A given frame is an object if and only if frame.second == true. + std::vector<std::pair<std::vector<std::string>, bool>> objFrames = { + {std::vector<std::string>(), true}}; + + void registerFieldName(std::string str) { + if (objFrames.back().second) { + objFrames.back().first.emplace_back(str); + }; } }; @@ -354,10 +389,10 @@ private: } bool _popFrame() { - _validator.popLevel(); if (_currFrame == _frames.begin()) return false; --_currFrame; + _validator.popLevel(); return true; } @@ -453,6 +488,9 @@ private: uassert(InvalidBSON, "incorrect BSON length", ++cursor.ptr == _currFrame->end); _maybePopCodeWithScope(cursor); } while (_popFrame()); // Finished when there are no frames left. + + // Check the top level field names. + _validator.checkDuplicateFieldName(); } /** diff --git a/src/mongo/bson/bson_validate_test.cpp b/src/mongo/bson/bson_validate_test.cpp index 1c9d7b0d046..ad097ffbc43 100644 --- a/src/mongo/bson/bson_validate_test.cpp +++ b/src/mongo/bson/bson_validate_test.cpp @@ -309,12 +309,12 @@ TEST(BSONValidateExtended, BSONArrayIndexes) { ASSERT_EQ(status, ErrorCodes::NonConformantBSON); x1 = BSON("validNestedArraysAndObjects" - << BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "1" + << BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "3" << BSONArray(BSON("0" << "a" << "1" << "b")) - << "1" + << "4" << "b"))))); ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateMode::kExtended)); ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateMode::kFull)); @@ -659,6 +659,28 @@ TEST(BSONValidateExtended, DeprecatedTypes) { ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON); } +TEST(BSONValidateExtended, DuplicateFieldNames) { + std::pair<Status, Status> stats{Status::OK(), Status::OK()}; + auto fullyValidate = [&](BSONObj obj) { + return std::pair{validateBSON(obj.objdata(), obj.objsize(), BSONValidateMode::kExtended), + validateBSON(obj.objdata(), obj.objsize(), BSONValidateMode::kFull)}; + }; + BSONObj x = BSON("a" << 1 << "b" << 2); + stats = fullyValidate(x); + ASSERT_OK(stats.first); + ASSERT_OK(stats.second); + + x = BSON("a" << 1 << "b" << 1 << "a" << 3); + stats = fullyValidate(x); + ASSERT_OK(stats.first); + ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON); + + x = BSON("a" << 1 << "b" << BSON("a" << 1 << "b" << BSON("a" << 1) << "a" << 3) << "c" << 3); + stats = fullyValidate(x); + ASSERT_OK(stats.first); + ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON); +} + TEST(BSONValidateExtended, BSONColumn) { BSONColumnBuilder cb("example"_sd); cb.append(BSON("a" |