diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2021-10-30 01:04:13 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-30 01:38:21 +0000 |
commit | 2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0 (patch) | |
tree | 895d6d415dd3037262f3519490a34bba507ec4e8 | |
parent | aa3c6eb2c7b4f76521004461e41b4a8b2022586e (diff) | |
download | mongo-2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0.tar.gz |
SERVER-60577 Add functionality to detect mixed-schema data in time-series bucket collections
-rw-r--r-- | src/mongo/db/catalog/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection.h | 6 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_impl.cpp | 47 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_impl.h | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_mock.h | 4 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_test.cpp | 127 |
6 files changed, 188 insertions, 0 deletions
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index 31a07bf6278..ca5635e34a8 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -239,6 +239,7 @@ env.Library( '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/db/storage/write_unit_of_work', + '$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util', '$BUILD_DIR/mongo/idl/server_parameter', '$BUILD_DIR/mongo/util/fail_point', '$BUILD_DIR/mongo/util/log_and_backoff', @@ -578,6 +579,7 @@ if wiredtiger: '$BUILD_DIR/mongo/db/service_context_d_test_fixture', '$BUILD_DIR/mongo/db/service_context_test_fixture', '$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger', + '$BUILD_DIR/mongo/db/timeseries/timeseries_options', '$BUILD_DIR/mongo/unittest/unittest', '$BUILD_DIR/mongo/util/clock_source_mock', '$BUILD_DIR/mongo/util/fail_point', diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 0ba948c2afe..90c9a7fdfbb 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -541,6 +541,12 @@ public: virtual void setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext* opCtx, boost::optional<bool> setting) = 0; + /** + * Returns true if the passed in time-series bucket document contains mixed-schema data. + */ + virtual bool doesTimeseriesBucketsDocContainMixedSchemaData( + const BSONObj& bucketsDoc) const = 0; + /* * Returns true if this collection is clustered. That is, its RecordIds store the value of the * cluster key. If the collection is clustered on _id, there is no separate _id index. diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index 33426a8804b..5bb5a14435d 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -76,6 +76,7 @@ #include "mongo/db/storage/key_string.h" #include "mongo/db/storage/record_store.h" #include "mongo/db/storage/storage_parameters_gen.h" +#include "mongo/db/timeseries/timeseries_constants.h" #include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h" #include "mongo/db/transaction_participant.h" #include "mongo/db/ttl_collection_cache.h" @@ -314,6 +315,39 @@ bool indexTypeSupportsPathLevelMultikeyTracking(StringData accessMethod) { return accessMethod == IndexNames::BTREE || accessMethod == IndexNames::GEO_2DSPHERE; } +bool doesMinMaxHaveMixedSchemaData(const BSONObj& min, const BSONObj& max) { + auto minIt = min.begin(); + auto minEnd = min.end(); + auto maxIt = max.begin(); + auto maxEnd = max.end(); + + while (minIt != minEnd && maxIt != maxEnd) { + bool typeMatch = minIt->canonicalType() == maxIt->canonicalType(); + if (!typeMatch) { + return true; + } else if (minIt->type() == Object) { + // The 'control.min' and 'control.max' fields have the same ordering. + invariant(minIt->fieldNameStringData() == maxIt->fieldNameStringData()); + if (doesMinMaxHaveMixedSchemaData(minIt->Obj(), maxIt->Obj())) { + return true; + } + } else if (minIt->type() == Array) { + if (doesMinMaxHaveMixedSchemaData(minIt->Obj(), maxIt->Obj())) { + return true; + } + } + + invariant(typeMatch); + minIt++; + maxIt++; + } + + // The 'control.min' and 'control.max' fields have the same cardinality. + invariant(minIt == minEnd && maxIt == maxEnd); + + return false; +} + } // namespace CollectionImpl::SharedState::SharedState(CollectionImpl* collection, @@ -1364,6 +1398,19 @@ void CollectionImpl::setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext }); } +bool CollectionImpl::doesTimeseriesBucketsDocContainMixedSchemaData( + const BSONObj& bucketsDoc) const { + if (!getTimeseriesOptions()) { + return false; + } + + const BSONObj controlObj = bucketsDoc.getObjectField(timeseries::kBucketControlFieldName); + const BSONObj minObj = controlObj.getObjectField(timeseries::kBucketControlMinFieldName); + const BSONObj maxObj = controlObj.getObjectField(timeseries::kBucketControlMaxFieldName); + + return doesMinMaxHaveMixedSchemaData(minObj, maxObj); +} + bool CollectionImpl::isClustered() const { return getClusteredInfo().is_initialized(); } diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h index 763fd6aae56..55684e7e275 100644 --- a/src/mongo/db/catalog/collection_impl.h +++ b/src/mongo/db/catalog/collection_impl.h @@ -325,6 +325,8 @@ public: void setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext* opCtx, boost::optional<bool> setting) final; + bool doesTimeseriesBucketsDocContainMixedSchemaData(const BSONObj& bucketsDoc) const final; + /** * isClustered() relies on the object returned from getClusteredInfo(). If * ClusteredCollectionInfo exists, the collection is clustered. diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h index d7dcd523a5f..3340930e5dc 100644 --- a/src/mongo/db/catalog/collection_mock.h +++ b/src/mongo/db/catalog/collection_mock.h @@ -246,6 +246,10 @@ public: std::abort(); } + bool doesTimeseriesBucketsDocContainMixedSchemaData(const BSONObj& bucketsDoc) const { + std::abort(); + } + bool isClustered() const { std::abort(); } diff --git a/src/mongo/db/catalog/collection_test.cpp b/src/mongo/db/catalog/collection_test.cpp index cc778470713..6480eef202b 100644 --- a/src/mongo/db/catalog/collection_test.cpp +++ b/src/mongo/db/catalog/collection_test.cpp @@ -57,6 +57,7 @@ using namespace mongo; class CollectionTest : public CatalogTestFixture { protected: void makeCapped(NamespaceString nss, long long cappedSize = 8192); + void makeTimeseries(NamespaceString nss); void makeCollectionForMultikey(NamespaceString nss, StringData indexName); }; @@ -67,6 +68,12 @@ void CollectionTest::makeCapped(NamespaceString nss, long long cappedSize) { ASSERT_OK(storageInterface()->createCollection(operationContext(), nss, options)); } +void CollectionTest::makeTimeseries(NamespaceString nss) { + CollectionOptions options; + options.timeseries = TimeseriesOptions(/*timeField=*/"t"); + ASSERT_OK(storageInterface()->createCollection(operationContext(), nss, options)); +} + TEST_F(CollectionTest, CappedNotifierKillAndIsDead) { NamespaceString nss("test.t"); makeCapped(nss); @@ -334,6 +341,126 @@ TEST_F(CollectionTest, ForceSetIndexIsMultikeyRemovesUncommittedChangesOnRollbac } } +TEST_F(CollectionTest, CheckTimeseriesBucketDocsForMixedSchemaData) { + NamespaceString nss("test.system.buckets.ts"); + makeTimeseries(nss); + + auto opCtx = operationContext(); + AutoGetCollection autoColl(opCtx, nss, MODE_IX); + const auto& coll = autoColl.getCollection(); + ASSERT(coll); + ASSERT(coll->getTimeseriesOptions()); + + // These are the min/max control fields generated prior to the change in SERVER-60565 in order + // to test the detection of mixed-schema data in time-series buckets from earlier versions. + std::vector<BSONObj> mixedSchemaControlDocs = { + // Insert -> {x: NumberLong(1)}, {x: {y: "z"}}, {x: "abc"} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : NumberLong(1) }, + "max" : { "x" : { "y" : "z" } } } })"), + // Insert -> {x: NumberLong(1)}, {x: [1, 2, 3]}, {x: "abc"} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : NumberLong(1) }, + "max" : { "x" : [ 1, 2, 3 ] } } })"), + // Insert -> {x: {y: 1}}, {x: {y: 2}}, {x: {y: [1, 2]}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : 1 } }, + "max" : { "x" : { "y" : [ 1, 2 ] } } } })"), + // Insert -> {x: 1}, {x: {y: 10}}, {x: true} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : true } } })"), + // Insert -> {x: {y: 1}}, {x: {y: 2}}, {x: {y: null}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : null } }, + "max" : { "x" : { "y" : 2 } } } })"), + // Insert -> {x: {y: true}}, {x: {y: false}}, {x: {y: null}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : null } }, + "max" : { "x" : { "y" : true } } } })"), + // Insert -> {x: NumberLong(1)}, {x: {y: NumberDecimal(1.5)}}, {x: NumberLong(2)} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : NumberLong(1) }, + "max" : { "x" : { "y" : NumberDecimal("1.50000000000000") } } } })"), + // Insert -> {x: ["abc"]}, {x: [123]} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : [ 123 ] }, + "max" : { "x" : [ "abc" ] } } })"), + // Insert -> {x: ["abc", 123]}, {x: [123, "abc"]} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : [ 123, 123 ] }, + "max" : { "x" : [ "abc", "abc" ] } } })"), + // Insert -> {x: {y: 1}}, {x: {y: {z: 5}}}, {x: {y: [1, 2]}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : 1 } }, + "max" : { "x" : { "y" : [ 1, 2 ] } } } })"), + // Insert -> {x: Number(1.0)}, {x: {y: "z"}}, {x: NumberLong(10)} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : { "y" : "z" } } } })"), + // Insert -> {x: Number(1.0)}, {x: [Number(2.0), Number(3.0)]}, {x: NumberLong(10)} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : [ 2, 3 ] } } })")}; + + for (const auto& controlDoc : mixedSchemaControlDocs) { + ASSERT_TRUE(coll->doesTimeseriesBucketsDocContainMixedSchemaData(controlDoc)); + } + + std::vector<BSONObj> nonMixedSchemaControlDocs = { + // Insert -> {x: 1}, {x: 2}, {x: 3} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : 3 } } })"), + // Insert -> {x: 1}, {x: 1.5} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : 1.5 } } })"), + // Insert -> {x: NumberLong(1)}, {x: NumberDecimal(2)} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : NumberLong(1) }, + "max" : { "x" : NumberDecimal("2.00000000000000") } } })"), + // Insert -> {x: NumberInt(1)}, {x: NumberDecimal(1.5)}, {x: NumberLong(2)} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : NumberLong(2) } } })"), + // Insert -> {x: NumberLong(1)}, {x: NumberDecimal(1.5)}, {x: NumberLong(2)} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : NumberLong(1) }, + "max" : { "x" : NumberLong(2) } } })"), + // Insert -> {x: {y: true}}, {x: {y: false}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : false } }, + "max" : { "x" : { "y" : true } } } })"), + // Insert -> {x: [1, 2, 3]}, {x: [4, 5, 6]} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : [ 1, 2, 3 ] }, + "max" : { "x" : [ 4, 5, 6 ] } } })"), + // Insert -> {x: [{x: 1}, {z: false}]}, {x: [{x: 5}, {y: "abc"}]} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : [ { "x" : 1 }, { "y" : "abc", "z" : false } ] }, + "max" : { "x" : [ { "x" : 5 }, { "y" : "abc", "z" : false } ] } } })"), + // Insert -> {x: 1}, {y: 1} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1, "y" : 1 }, + "max" : { "x" : 1, "y" : 1 } } })"), + // Insert -> {x: ["a"]}, {y: [1]} + ::mongo::fromjson( + R"({ control : { min : { x : [ "a" ], y : [ 1 ] }, + max : { x : [ "a" ], y : [ 1 ] } } })"), + // Insert -> {x: {y: [{a: Number(1.0)}, [{b: NumberLong(10)}]]}}, + // {x: {y: [{a: Number(5.0)}, [{b: NumberLong(50)}]]}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : [ { "a" : 1 }, [ { "b" : NumberLong(10) } ] ] } }, + "max" : { "x" : { "y" : [ { "a" : 5 }, [ { "b" : NumberLong(50) } ] ] } } } })"), + // Insert -> {x: Number(1.0)}, {x: NumberLong(10)} + ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 }, + "max" : { "x" : NumberLong(10) } } })"), + + // Insert -> {x: {y: [{a: Number(1.5)}, [{b: NumberLong(10)}]]}}, + // {x: {y: [{a: Number(2.5)}, [{b: Number(3.5)}]]}} + ::mongo::fromjson( + R"({ "control" : { "min" : { "x" : { "y" : [ { "a" : 1.5 }, [ { "b" : 3.5 } ] ] } }, + "max" : { "x" : { "y" : [ { "a" : 2.5 }, [ { "b" : NumberLong(10) } ] ] } } } })")}; + + + for (const auto& controlDoc : nonMixedSchemaControlDocs) { + ASSERT_FALSE(coll->doesTimeseriesBucketsDocContainMixedSchemaData(controlDoc)); + } +} + TEST_F(CatalogTestFixture, CollectionPtrNoYieldTag) { CollectionMock mock(NamespaceString("test.t")); |