summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2021-10-30 01:04:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-30 01:38:21 +0000
commit2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0 (patch)
tree895d6d415dd3037262f3519490a34bba507ec4e8
parentaa3c6eb2c7b4f76521004461e41b4a8b2022586e (diff)
downloadmongo-2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0.tar.gz
SERVER-60577 Add functionality to detect mixed-schema data in time-series bucket collections
-rw-r--r--src/mongo/db/catalog/SConscript2
-rw-r--r--src/mongo/db/catalog/collection.h6
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp47
-rw-r--r--src/mongo/db/catalog/collection_impl.h2
-rw-r--r--src/mongo/db/catalog/collection_mock.h4
-rw-r--r--src/mongo/db/catalog/collection_test.cpp127
6 files changed, 188 insertions, 0 deletions
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 31a07bf6278..ca5635e34a8 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -239,6 +239,7 @@ env.Library(
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/db/storage/write_unit_of_work',
+ '$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util',
'$BUILD_DIR/mongo/idl/server_parameter',
'$BUILD_DIR/mongo/util/fail_point',
'$BUILD_DIR/mongo/util/log_and_backoff',
@@ -578,6 +579,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/db/service_context_d_test_fixture',
'$BUILD_DIR/mongo/db/service_context_test_fixture',
'$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger',
+ '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
'$BUILD_DIR/mongo/unittest/unittest',
'$BUILD_DIR/mongo/util/clock_source_mock',
'$BUILD_DIR/mongo/util/fail_point',
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index 0ba948c2afe..90c9a7fdfbb 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -541,6 +541,12 @@ public:
virtual void setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext* opCtx,
boost::optional<bool> setting) = 0;
+ /**
+ * Returns true if the passed in time-series bucket document contains mixed-schema data.
+ */
+ virtual bool doesTimeseriesBucketsDocContainMixedSchemaData(
+ const BSONObj& bucketsDoc) const = 0;
+
/*
* Returns true if this collection is clustered. That is, its RecordIds store the value of the
* cluster key. If the collection is clustered on _id, there is no separate _id index.
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 33426a8804b..5bb5a14435d 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -76,6 +76,7 @@
#include "mongo/db/storage/key_string.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/storage_parameters_gen.h"
+#include "mongo/db/timeseries/timeseries_constants.h"
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
#include "mongo/db/transaction_participant.h"
#include "mongo/db/ttl_collection_cache.h"
@@ -314,6 +315,39 @@ bool indexTypeSupportsPathLevelMultikeyTracking(StringData accessMethod) {
return accessMethod == IndexNames::BTREE || accessMethod == IndexNames::GEO_2DSPHERE;
}
+bool doesMinMaxHaveMixedSchemaData(const BSONObj& min, const BSONObj& max) {
+ auto minIt = min.begin();
+ auto minEnd = min.end();
+ auto maxIt = max.begin();
+ auto maxEnd = max.end();
+
+ while (minIt != minEnd && maxIt != maxEnd) {
+ bool typeMatch = minIt->canonicalType() == maxIt->canonicalType();
+ if (!typeMatch) {
+ return true;
+ } else if (minIt->type() == Object) {
+ // The 'control.min' and 'control.max' fields have the same ordering.
+ invariant(minIt->fieldNameStringData() == maxIt->fieldNameStringData());
+ if (doesMinMaxHaveMixedSchemaData(minIt->Obj(), maxIt->Obj())) {
+ return true;
+ }
+ } else if (minIt->type() == Array) {
+ if (doesMinMaxHaveMixedSchemaData(minIt->Obj(), maxIt->Obj())) {
+ return true;
+ }
+ }
+
+ invariant(typeMatch);
+ minIt++;
+ maxIt++;
+ }
+
+ // The 'control.min' and 'control.max' fields have the same cardinality.
+ invariant(minIt == minEnd && maxIt == maxEnd);
+
+ return false;
+}
+
} // namespace
CollectionImpl::SharedState::SharedState(CollectionImpl* collection,
@@ -1364,6 +1398,19 @@ void CollectionImpl::setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext
});
}
+bool CollectionImpl::doesTimeseriesBucketsDocContainMixedSchemaData(
+ const BSONObj& bucketsDoc) const {
+ if (!getTimeseriesOptions()) {
+ return false;
+ }
+
+ const BSONObj controlObj = bucketsDoc.getObjectField(timeseries::kBucketControlFieldName);
+ const BSONObj minObj = controlObj.getObjectField(timeseries::kBucketControlMinFieldName);
+ const BSONObj maxObj = controlObj.getObjectField(timeseries::kBucketControlMaxFieldName);
+
+ return doesMinMaxHaveMixedSchemaData(minObj, maxObj);
+}
+
bool CollectionImpl::isClustered() const {
return getClusteredInfo().is_initialized();
}
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 763fd6aae56..55684e7e275 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -325,6 +325,8 @@ public:
void setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext* opCtx,
boost::optional<bool> setting) final;
+ bool doesTimeseriesBucketsDocContainMixedSchemaData(const BSONObj& bucketsDoc) const final;
+
/**
* isClustered() relies on the object returned from getClusteredInfo(). If
* ClusteredCollectionInfo exists, the collection is clustered.
diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h
index d7dcd523a5f..3340930e5dc 100644
--- a/src/mongo/db/catalog/collection_mock.h
+++ b/src/mongo/db/catalog/collection_mock.h
@@ -246,6 +246,10 @@ public:
std::abort();
}
+ bool doesTimeseriesBucketsDocContainMixedSchemaData(const BSONObj& bucketsDoc) const {
+ std::abort();
+ }
+
bool isClustered() const {
std::abort();
}
diff --git a/src/mongo/db/catalog/collection_test.cpp b/src/mongo/db/catalog/collection_test.cpp
index cc778470713..6480eef202b 100644
--- a/src/mongo/db/catalog/collection_test.cpp
+++ b/src/mongo/db/catalog/collection_test.cpp
@@ -57,6 +57,7 @@ using namespace mongo;
class CollectionTest : public CatalogTestFixture {
protected:
void makeCapped(NamespaceString nss, long long cappedSize = 8192);
+ void makeTimeseries(NamespaceString nss);
void makeCollectionForMultikey(NamespaceString nss, StringData indexName);
};
@@ -67,6 +68,12 @@ void CollectionTest::makeCapped(NamespaceString nss, long long cappedSize) {
ASSERT_OK(storageInterface()->createCollection(operationContext(), nss, options));
}
+void CollectionTest::makeTimeseries(NamespaceString nss) {
+ CollectionOptions options;
+ options.timeseries = TimeseriesOptions(/*timeField=*/"t");
+ ASSERT_OK(storageInterface()->createCollection(operationContext(), nss, options));
+}
+
TEST_F(CollectionTest, CappedNotifierKillAndIsDead) {
NamespaceString nss("test.t");
makeCapped(nss);
@@ -334,6 +341,126 @@ TEST_F(CollectionTest, ForceSetIndexIsMultikeyRemovesUncommittedChangesOnRollbac
}
}
+TEST_F(CollectionTest, CheckTimeseriesBucketDocsForMixedSchemaData) {
+ NamespaceString nss("test.system.buckets.ts");
+ makeTimeseries(nss);
+
+ auto opCtx = operationContext();
+ AutoGetCollection autoColl(opCtx, nss, MODE_IX);
+ const auto& coll = autoColl.getCollection();
+ ASSERT(coll);
+ ASSERT(coll->getTimeseriesOptions());
+
+ // These are the min/max control fields generated prior to the change in SERVER-60565 in order
+ // to test the detection of mixed-schema data in time-series buckets from earlier versions.
+ std::vector<BSONObj> mixedSchemaControlDocs = {
+ // Insert -> {x: NumberLong(1)}, {x: {y: "z"}}, {x: "abc"}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : NumberLong(1) },
+ "max" : { "x" : { "y" : "z" } } } })"),
+ // Insert -> {x: NumberLong(1)}, {x: [1, 2, 3]}, {x: "abc"}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : NumberLong(1) },
+ "max" : { "x" : [ 1, 2, 3 ] } } })"),
+ // Insert -> {x: {y: 1}}, {x: {y: 2}}, {x: {y: [1, 2]}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : 1 } },
+ "max" : { "x" : { "y" : [ 1, 2 ] } } } })"),
+ // Insert -> {x: 1}, {x: {y: 10}}, {x: true}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : true } } })"),
+ // Insert -> {x: {y: 1}}, {x: {y: 2}}, {x: {y: null}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : null } },
+ "max" : { "x" : { "y" : 2 } } } })"),
+ // Insert -> {x: {y: true}}, {x: {y: false}}, {x: {y: null}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : null } },
+ "max" : { "x" : { "y" : true } } } })"),
+ // Insert -> {x: NumberLong(1)}, {x: {y: NumberDecimal(1.5)}}, {x: NumberLong(2)}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : NumberLong(1) },
+ "max" : { "x" : { "y" : NumberDecimal("1.50000000000000") } } } })"),
+ // Insert -> {x: ["abc"]}, {x: [123]}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : [ 123 ] },
+ "max" : { "x" : [ "abc" ] } } })"),
+ // Insert -> {x: ["abc", 123]}, {x: [123, "abc"]}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : [ 123, 123 ] },
+ "max" : { "x" : [ "abc", "abc" ] } } })"),
+ // Insert -> {x: {y: 1}}, {x: {y: {z: 5}}}, {x: {y: [1, 2]}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : 1 } },
+ "max" : { "x" : { "y" : [ 1, 2 ] } } } })"),
+ // Insert -> {x: Number(1.0)}, {x: {y: "z"}}, {x: NumberLong(10)}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : { "y" : "z" } } } })"),
+ // Insert -> {x: Number(1.0)}, {x: [Number(2.0), Number(3.0)]}, {x: NumberLong(10)}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : [ 2, 3 ] } } })")};
+
+ for (const auto& controlDoc : mixedSchemaControlDocs) {
+ ASSERT_TRUE(coll->doesTimeseriesBucketsDocContainMixedSchemaData(controlDoc));
+ }
+
+ std::vector<BSONObj> nonMixedSchemaControlDocs = {
+ // Insert -> {x: 1}, {x: 2}, {x: 3}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : 3 } } })"),
+ // Insert -> {x: 1}, {x: 1.5}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : 1.5 } } })"),
+ // Insert -> {x: NumberLong(1)}, {x: NumberDecimal(2)}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : NumberLong(1) },
+ "max" : { "x" : NumberDecimal("2.00000000000000") } } })"),
+ // Insert -> {x: NumberInt(1)}, {x: NumberDecimal(1.5)}, {x: NumberLong(2)}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : NumberLong(2) } } })"),
+ // Insert -> {x: NumberLong(1)}, {x: NumberDecimal(1.5)}, {x: NumberLong(2)}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : NumberLong(1) },
+ "max" : { "x" : NumberLong(2) } } })"),
+ // Insert -> {x: {y: true}}, {x: {y: false}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : false } },
+ "max" : { "x" : { "y" : true } } } })"),
+ // Insert -> {x: [1, 2, 3]}, {x: [4, 5, 6]}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : [ 1, 2, 3 ] },
+ "max" : { "x" : [ 4, 5, 6 ] } } })"),
+ // Insert -> {x: [{x: 1}, {z: false}]}, {x: [{x: 5}, {y: "abc"}]}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : [ { "x" : 1 }, { "y" : "abc", "z" : false } ] },
+ "max" : { "x" : [ { "x" : 5 }, { "y" : "abc", "z" : false } ] } } })"),
+ // Insert -> {x: 1}, {y: 1}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1, "y" : 1 },
+ "max" : { "x" : 1, "y" : 1 } } })"),
+ // Insert -> {x: ["a"]}, {y: [1]}
+ ::mongo::fromjson(
+ R"({ control : { min : { x : [ "a" ], y : [ 1 ] },
+ max : { x : [ "a" ], y : [ 1 ] } } })"),
+ // Insert -> {x: {y: [{a: Number(1.0)}, [{b: NumberLong(10)}]]}},
+ // {x: {y: [{a: Number(5.0)}, [{b: NumberLong(50)}]]}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : [ { "a" : 1 }, [ { "b" : NumberLong(10) } ] ] } },
+ "max" : { "x" : { "y" : [ { "a" : 5 }, [ { "b" : NumberLong(50) } ] ] } } } })"),
+ // Insert -> {x: Number(1.0)}, {x: NumberLong(10)}
+ ::mongo::fromjson(R"({ "control" : { "min" : { "x" : 1 },
+ "max" : { "x" : NumberLong(10) } } })"),
+
+ // Insert -> {x: {y: [{a: Number(1.5)}, [{b: NumberLong(10)}]]}},
+ // {x: {y: [{a: Number(2.5)}, [{b: Number(3.5)}]]}}
+ ::mongo::fromjson(
+ R"({ "control" : { "min" : { "x" : { "y" : [ { "a" : 1.5 }, [ { "b" : 3.5 } ] ] } },
+ "max" : { "x" : { "y" : [ { "a" : 2.5 }, [ { "b" : NumberLong(10) } ] ] } } } })")};
+
+
+ for (const auto& controlDoc : nonMixedSchemaControlDocs) {
+ ASSERT_FALSE(coll->doesTimeseriesBucketsDocContainMixedSchemaData(controlDoc));
+ }
+}
+
TEST_F(CatalogTestFixture, CollectionPtrNoYieldTag) {
CollectionMock mock(NamespaceString("test.t"));