summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2022-05-26 22:03:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-26 22:42:33 +0000
commit21305d9aedb10717eeeb997ff49d28d42457b120 (patch)
tree98a0e9361f453f86f4c797611a08d9ce6f5811a4 /src/mongo/db
parent4fe31381927908464d79624aa748bcf30006eb79 (diff)
downloadmongo-21305d9aedb10717eeeb997ff49d28d42457b120.tar.gz
SERVER-66681 Initialize schema representation from an existing bucket document
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_helpers.cpp41
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_helpers.h9
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp104
-rw-r--r--src/mongo/db/timeseries/flat_bson.cpp45
-rw-r--r--src/mongo/db/timeseries/flat_bson.h10
5 files changed, 204 insertions, 5 deletions
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
index da17039f7e1..cc5bc65f3c1 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
@@ -33,8 +33,9 @@
namespace mongo::timeseries {
-StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
- const CollatorInterface* collator) {
+namespace {
+
+StatusWith<std::pair<const BSONObj, const BSONObj>> extractMinAndMax(const BSONObj& bucketDoc) {
const BSONObj& controlObj = bucketDoc.getObjectField(kBucketControlFieldName);
if (controlObj.isEmpty()) {
return {ErrorCodes::BadValue,
@@ -50,7 +51,41 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
<< redact(bucketDoc)};
}
- return MinMax::parseFromBSON(minObj, maxObj, collator);
+ return std::make_pair(minObj, maxObj);
+}
+
+} // namespace
+
+StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
+ const CollatorInterface* collator) {
+ auto swDocs = extractMinAndMax(bucketDoc);
+ if (!swDocs.isOK()) {
+ return swDocs.getStatus();
+ }
+
+ const auto& [minObj, maxObj] = swDocs.getValue();
+
+ try {
+ return MinMax::parseFromBSON(minObj, maxObj, collator);
+ } catch (...) {
+ return exceptionToStatus();
+ }
+}
+
+StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
+ const CollatorInterface* collator) {
+ auto swDocs = extractMinAndMax(bucketDoc);
+ if (!swDocs.isOK()) {
+ return swDocs.getStatus();
+ }
+
+ const auto& [minObj, maxObj] = swDocs.getValue();
+
+ try {
+ return Schema::parseFromBSON(minObj, maxObj, collator);
+ } catch (...) {
+ return exceptionToStatus();
+ }
}
} // namespace mongo::timeseries
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.h b/src/mongo/db/timeseries/bucket_catalog_helpers.h
index 8af35376748..015cae8ef66 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.h
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.h
@@ -45,4 +45,13 @@ namespace mongo::timeseries {
StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
const CollatorInterface* collator);
+/**
+ * Generates and returns a Schema object from an existing bucket document. Avoids unpacking the
+ * bucket document and relies on the control.min and control.max summary fields.
+ *
+ * Returns a bad status if the bucket document is malformed or contains mixed schema measurements.
+ */
+StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
+ const CollatorInterface* collator);
+
} // namespace mongo::timeseries
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
index 74c2e3ce442..97f29ddf6e5 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
@@ -179,5 +179,109 @@ TEST_F(BucketCatalogHelpersTest, GenerateMinMaxSucceedsWithMixedSchemaBucketDocu
}
}
+TEST_F(BucketCatalogHelpersTest, GenerateSchemaFailsWithMixedSchemaBucketDocumentTest) {
+ ASSERT_OK(createCollection(operationContext(),
+ kNss.db().toString(),
+ BSON("create" << kNss.coll() << "timeseries"
+ << BSON("timeField"
+ << "time"))));
+
+ AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS);
+ const CollatorInterface* collator = autoColl->getDefaultCollator();
+
+ std::vector<BSONObj> docs = {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: {}}}})"),
+ ::mongo::fromjson(R"({control:{min: {a: {}}, max: {a: 1}}})"),
+ ::mongo::fromjson(R"({control:{min: {a: []}, max: {a: {}}}})"),
+ ::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: "foo"}}})")};
+
+ for (const BSONObj& doc : docs) {
+ StatusWith<timeseries::Schema> swSchema =
+ timeseries::generateSchemaFromBucketDoc(doc, collator);
+ ASSERT_NOT_OK(swSchema.getStatus());
+ }
+}
+
+TEST_F(BucketCatalogHelpersTest, GenerateSchemaWithInvalidMeasurementsTest) {
+ ASSERT_OK(createCollection(operationContext(),
+ kNss.db().toString(),
+ BSON("create" << kNss.coll() << "timeseries"
+ << BSON("timeField"
+ << "time"))));
+
+ AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS);
+ const CollatorInterface* collator = autoColl->getDefaultCollator();
+
+ // First item: Bucket document to generate the schema representation of.
+ // Second item: measurement that is incompatible with the generated schema.
+ std::vector<std::pair<BSONObj, BSONObj>> docs = {
+ {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"),
+ ::mongo::fromjson(R"({a: {}})")},
+ {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"),
+ ::mongo::fromjson(R"({a: []})")},
+ {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"),
+ ::mongo::fromjson(R"({a: "1"})")},
+ {::mongo::fromjson(R"({control:{min: {a: "a"}, max: {a: "aa"}}})"),
+ ::mongo::fromjson(R"({a: 123})")},
+ {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"),
+ ::mongo::fromjson(R"({a: {}})")},
+ {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"),
+ ::mongo::fromjson(R"({a: 123})")},
+ {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"),
+ ::mongo::fromjson(R"({a: "abc"})")},
+ {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"),
+ ::mongo::fromjson(R"({a: []})")},
+ {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"),
+ ::mongo::fromjson(R"({a: {b: "abc"}})")},
+ {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"),
+ ::mongo::fromjson(R"({a: {b: []}})")}};
+
+ for (const auto& [minMaxDoc, measurementDoc] : docs) {
+ StatusWith<timeseries::Schema> swSchema =
+ timeseries::generateSchemaFromBucketDoc(minMaxDoc, collator);
+ ASSERT_OK(swSchema.getStatus());
+
+ timeseries::Schema schema = std::move(swSchema.getValue());
+
+ auto result = schema.update(measurementDoc, /*metaField=*/boost::none, collator);
+ ASSERT(result == timeseries::Schema::UpdateStatus::Failed);
+ }
+}
+
+TEST_F(BucketCatalogHelpersTest, GenerateSchemaWithValidMeasurementsTest) {
+ ASSERT_OK(createCollection(operationContext(),
+ kNss.db().toString(),
+ BSON("create" << kNss.coll() << "timeseries"
+ << BSON("timeField"
+ << "time"))));
+
+ AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS);
+ const CollatorInterface* collator = autoColl->getDefaultCollator();
+
+ // First item: Bucket document to generate the schema representation of.
+ // Second item: measurement that is compatible with the generated schema.
+ std::vector<std::pair<BSONObj, BSONObj>> docs = {
+ {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"),
+ ::mongo::fromjson(R"({a: 1})")},
+ {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"),
+ ::mongo::fromjson(R"({a: 5})")},
+ {::mongo::fromjson(R"({control:{min: {a: "a"}, max: {a: "aa"}}})"),
+ ::mongo::fromjson(R"({a: "aaa"})")},
+ {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"),
+ ::mongo::fromjson(R"({a: [7, 8, 9]})")},
+ {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"),
+ ::mongo::fromjson(R"({a: {b: 3}})")}};
+
+ for (const auto& [minMaxDoc, measurementDoc] : docs) {
+ StatusWith<timeseries::Schema> swSchema =
+ timeseries::generateSchemaFromBucketDoc(minMaxDoc, collator);
+ ASSERT_OK(swSchema.getStatus());
+
+ timeseries::Schema schema = std::move(swSchema.getValue());
+
+ auto result = schema.update(measurementDoc, /*metaField=*/boost::none, collator);
+ ASSERT(result == timeseries::Schema::UpdateStatus::Updated);
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/timeseries/flat_bson.cpp b/src/mongo/db/timeseries/flat_bson.cpp
index 7160abe46c9..ba4b7caeeec 100644
--- a/src/mongo/db/timeseries/flat_bson.cpp
+++ b/src/mongo/db/timeseries/flat_bson.cpp
@@ -340,6 +340,21 @@ FlatBSONStore<Element, Value>::FlatBSONStore() {
}
template <class Derived, class Element, class Value>
+typename std::string FlatBSON<Derived, Element, Value>::updateStatusString(
+ UpdateStatus updateStatus) {
+ switch (updateStatus) {
+ case UpdateStatus::Updated:
+ return "updated";
+ case UpdateStatus::Failed:
+ return "failed";
+ case UpdateStatus::NoChange:
+ return "no change";
+ }
+
+ MONGO_UNREACHABLE;
+}
+
+template <class Derived, class Element, class Value>
typename FlatBSON<Derived, Element, Value>::UpdateStatus FlatBSON<Derived, Element, Value>::update(
const BSONObj& doc,
boost::optional<StringData> omitField,
@@ -849,8 +864,15 @@ MinMax MinMax::parseFromBSON(const BSONObj& min,
MinMax minmax;
// The metadata field is already excluded from generated min/max summaries.
- minmax.update(min, /*metaField=*/boost::none, stringComparator);
- minmax.update(max, /*metaField=*/boost::none, stringComparator);
+ UpdateStatus status = minmax.update(min, /*metaField=*/boost::none, stringComparator);
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "Failed to update min: " << updateStatusString(status),
+ status != UpdateStatus::Failed);
+
+ status = minmax.update(max, /*metaField=*/boost::none, stringComparator);
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "Failed to update max: " << updateStatusString(status),
+ status != UpdateStatus::Failed);
// Clear the updated state as we're only constructing the object from an existing document.
[[maybe_unused]] auto minUpdates = minmax.minUpdates();
@@ -871,6 +893,25 @@ const SchemaStore::Data& SchemaElement::data() const {
return _data;
}
+Schema Schema::parseFromBSON(const BSONObj& min,
+ const BSONObj& max,
+ const StringData::ComparatorInterface* stringComparator) {
+ Schema schema;
+
+ // The metadata field is already excluded from generated min/max summaries.
+ UpdateStatus status = schema.update(min, /*metaField=*/boost::none, stringComparator);
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "Failed to update min: " << updateStatusString(status),
+ status != UpdateStatus::Failed);
+
+ status = schema.update(max, /*metaField=*/boost::none, stringComparator);
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "Failed to update max: " << updateStatusString(status),
+ status != UpdateStatus::Failed);
+
+ return schema;
+}
+
std::pair<Schema::UpdateStatus, SchemaElement::UpdateContext> Schema::_shouldUpdateObj(
SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext) {
UpdateStatus status{UpdateStatus::Updated};
diff --git a/src/mongo/db/timeseries/flat_bson.h b/src/mongo/db/timeseries/flat_bson.h
index 47db34b6933..8ea65aec0d4 100644
--- a/src/mongo/db/timeseries/flat_bson.h
+++ b/src/mongo/db/timeseries/flat_bson.h
@@ -272,6 +272,8 @@ template <class Derived, class Element, class Value>
class FlatBSON {
public:
enum class UpdateStatus { Updated, Failed, NoChange };
+ static std::string updateStatusString(UpdateStatus updateStatus);
+
/**
* Updates the stored fields provided by 'doc', ignoring the 'metaField' field.
*/
@@ -507,6 +509,14 @@ private:
class Schema : public FlatBSON<Schema, SchemaElement, BSONTypeValue> {
friend class FlatBSON<Schema, SchemaElement, BSONTypeValue>;
+public:
+ /**
+ * Generates and returns a Schema object from the passed in min and max documents.
+ */
+ static Schema parseFromBSON(const BSONObj& min,
+ const BSONObj& max,
+ const StringData::ComparatorInterface* stringComparator);
+
protected:
static std::pair<UpdateStatus, typename SchemaElement::UpdateContext> _shouldUpdateObj(
SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext updateContext);