diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2022-05-26 22:03:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-26 22:42:33 +0000 |
commit | 21305d9aedb10717eeeb997ff49d28d42457b120 (patch) | |
tree | 98a0e9361f453f86f4c797611a08d9ce6f5811a4 /src/mongo | |
parent | 4fe31381927908464d79624aa748bcf30006eb79 (diff) | |
download | mongo-21305d9aedb10717eeeb997ff49d28d42457b120.tar.gz |
SERVER-66681 Initialize schema representation from an existing bucket document
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog_helpers.cpp | 41 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog_helpers.h | 9 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp | 104 | ||||
-rw-r--r-- | src/mongo/db/timeseries/flat_bson.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/timeseries/flat_bson.h | 10 |
5 files changed, 204 insertions, 5 deletions
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp index da17039f7e1..cc5bc65f3c1 100644 --- a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp +++ b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp @@ -33,8 +33,9 @@ namespace mongo::timeseries { -StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc, - const CollatorInterface* collator) { +namespace { + +StatusWith<std::pair<const BSONObj, const BSONObj>> extractMinAndMax(const BSONObj& bucketDoc) { const BSONObj& controlObj = bucketDoc.getObjectField(kBucketControlFieldName); if (controlObj.isEmpty()) { return {ErrorCodes::BadValue, @@ -50,7 +51,41 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc, << redact(bucketDoc)}; } - return MinMax::parseFromBSON(minObj, maxObj, collator); + return std::make_pair(minObj, maxObj); +} + +} // namespace + +StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc, + const CollatorInterface* collator) { + auto swDocs = extractMinAndMax(bucketDoc); + if (!swDocs.isOK()) { + return swDocs.getStatus(); + } + + const auto& [minObj, maxObj] = swDocs.getValue(); + + try { + return MinMax::parseFromBSON(minObj, maxObj, collator); + } catch (...) { + return exceptionToStatus(); + } +} + +StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc, + const CollatorInterface* collator) { + auto swDocs = extractMinAndMax(bucketDoc); + if (!swDocs.isOK()) { + return swDocs.getStatus(); + } + + const auto& [minObj, maxObj] = swDocs.getValue(); + + try { + return Schema::parseFromBSON(minObj, maxObj, collator); + } catch (...) { + return exceptionToStatus(); + } } } // namespace mongo::timeseries diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.h b/src/mongo/db/timeseries/bucket_catalog_helpers.h index 8af35376748..015cae8ef66 100644 --- a/src/mongo/db/timeseries/bucket_catalog_helpers.h +++ b/src/mongo/db/timeseries/bucket_catalog_helpers.h @@ -45,4 +45,13 @@ namespace mongo::timeseries { StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc, const CollatorInterface* collator); +/** + * Generates and returns a Schema object from an existing bucket document. Avoids unpacking the + * bucket document and relies on the control.min and control.max summary fields. + * + * Returns a bad status if the bucket document is malformed or contains mixed schema measurements. + */ +StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc, + const CollatorInterface* collator); + } // namespace mongo::timeseries diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp index 74c2e3ce442..97f29ddf6e5 100644 --- a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp @@ -179,5 +179,109 @@ TEST_F(BucketCatalogHelpersTest, GenerateMinMaxSucceedsWithMixedSchemaBucketDocu } } +TEST_F(BucketCatalogHelpersTest, GenerateSchemaFailsWithMixedSchemaBucketDocumentTest) { + ASSERT_OK(createCollection(operationContext(), + kNss.db().toString(), + BSON("create" << kNss.coll() << "timeseries" + << BSON("timeField" + << "time")))); + + AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS); + const CollatorInterface* collator = autoColl->getDefaultCollator(); + + std::vector<BSONObj> docs = {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: {}}}})"), + ::mongo::fromjson(R"({control:{min: {a: {}}, max: {a: 1}}})"), + ::mongo::fromjson(R"({control:{min: {a: []}, max: {a: {}}}})"), + ::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: "foo"}}})")}; + + for (const BSONObj& doc : docs) { + StatusWith<timeseries::Schema> swSchema = + timeseries::generateSchemaFromBucketDoc(doc, collator); + ASSERT_NOT_OK(swSchema.getStatus()); + } +} + +TEST_F(BucketCatalogHelpersTest, GenerateSchemaWithInvalidMeasurementsTest) { + ASSERT_OK(createCollection(operationContext(), + kNss.db().toString(), + BSON("create" << kNss.coll() << "timeseries" + << BSON("timeField" + << "time")))); + + AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS); + const CollatorInterface* collator = autoColl->getDefaultCollator(); + + // First item: Bucket document to generate the schema representation of. + // Second item: measurement that is incompatible with the generated schema. + std::vector<std::pair<BSONObj, BSONObj>> docs = { + {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"), + ::mongo::fromjson(R"({a: {}})")}, + {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"), + ::mongo::fromjson(R"({a: []})")}, + {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"), + ::mongo::fromjson(R"({a: "1"})")}, + {::mongo::fromjson(R"({control:{min: {a: "a"}, max: {a: "aa"}}})"), + ::mongo::fromjson(R"({a: 123})")}, + {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"), + ::mongo::fromjson(R"({a: {}})")}, + {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"), + ::mongo::fromjson(R"({a: 123})")}, + {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"), + ::mongo::fromjson(R"({a: "abc"})")}, + {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"), + ::mongo::fromjson(R"({a: []})")}, + {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"), + ::mongo::fromjson(R"({a: {b: "abc"}})")}, + {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"), + ::mongo::fromjson(R"({a: {b: []}})")}}; + + for (const auto& [minMaxDoc, measurementDoc] : docs) { + StatusWith<timeseries::Schema> swSchema = + timeseries::generateSchemaFromBucketDoc(minMaxDoc, collator); + ASSERT_OK(swSchema.getStatus()); + + timeseries::Schema schema = std::move(swSchema.getValue()); + + auto result = schema.update(measurementDoc, /*metaField=*/boost::none, collator); + ASSERT(result == timeseries::Schema::UpdateStatus::Failed); + } +} + +TEST_F(BucketCatalogHelpersTest, GenerateSchemaWithValidMeasurementsTest) { + ASSERT_OK(createCollection(operationContext(), + kNss.db().toString(), + BSON("create" << kNss.coll() << "timeseries" + << BSON("timeField" + << "time")))); + + AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IS); + const CollatorInterface* collator = autoColl->getDefaultCollator(); + + // First item: Bucket document to generate the schema representation of. + // Second item: measurement that is compatible with the generated schema. + std::vector<std::pair<BSONObj, BSONObj>> docs = { + {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"), + ::mongo::fromjson(R"({a: 1})")}, + {::mongo::fromjson(R"({control:{min: {a: 1}, max: {a: 2}}})"), + ::mongo::fromjson(R"({a: 5})")}, + {::mongo::fromjson(R"({control:{min: {a: "a"}, max: {a: "aa"}}})"), + ::mongo::fromjson(R"({a: "aaa"})")}, + {::mongo::fromjson(R"({control:{min: {a: [1, 2, 3]}, max: {a: [4, 5, 6]}}})"), + ::mongo::fromjson(R"({a: [7, 8, 9]})")}, + {::mongo::fromjson(R"({control:{min: {a: {b: 1}}, max: {a: {b: 2}}}})"), + ::mongo::fromjson(R"({a: {b: 3}})")}}; + + for (const auto& [minMaxDoc, measurementDoc] : docs) { + StatusWith<timeseries::Schema> swSchema = + timeseries::generateSchemaFromBucketDoc(minMaxDoc, collator); + ASSERT_OK(swSchema.getStatus()); + + timeseries::Schema schema = std::move(swSchema.getValue()); + + auto result = schema.update(measurementDoc, /*metaField=*/boost::none, collator); + ASSERT(result == timeseries::Schema::UpdateStatus::Updated); + } +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/timeseries/flat_bson.cpp b/src/mongo/db/timeseries/flat_bson.cpp index 7160abe46c9..ba4b7caeeec 100644 --- a/src/mongo/db/timeseries/flat_bson.cpp +++ b/src/mongo/db/timeseries/flat_bson.cpp @@ -340,6 +340,21 @@ FlatBSONStore<Element, Value>::FlatBSONStore() { } template <class Derived, class Element, class Value> +typename std::string FlatBSON<Derived, Element, Value>::updateStatusString( + UpdateStatus updateStatus) { + switch (updateStatus) { + case UpdateStatus::Updated: + return "updated"; + case UpdateStatus::Failed: + return "failed"; + case UpdateStatus::NoChange: + return "no change"; + } + + MONGO_UNREACHABLE; +} + +template <class Derived, class Element, class Value> typename FlatBSON<Derived, Element, Value>::UpdateStatus FlatBSON<Derived, Element, Value>::update( const BSONObj& doc, boost::optional<StringData> omitField, @@ -849,8 +864,15 @@ MinMax MinMax::parseFromBSON(const BSONObj& min, MinMax minmax; // The metadata field is already excluded from generated min/max summaries. - minmax.update(min, /*metaField=*/boost::none, stringComparator); - minmax.update(max, /*metaField=*/boost::none, stringComparator); + UpdateStatus status = minmax.update(min, /*metaField=*/boost::none, stringComparator); + uassert(ErrorCodes::BadValue, + str::stream() << "Failed to update min: " << updateStatusString(status), + status != UpdateStatus::Failed); + + status = minmax.update(max, /*metaField=*/boost::none, stringComparator); + uassert(ErrorCodes::BadValue, + str::stream() << "Failed to update max: " << updateStatusString(status), + status != UpdateStatus::Failed); // Clear the updated state as we're only constructing the object from an existing document. [[maybe_unused]] auto minUpdates = minmax.minUpdates(); @@ -871,6 +893,25 @@ const SchemaStore::Data& SchemaElement::data() const { return _data; } +Schema Schema::parseFromBSON(const BSONObj& min, + const BSONObj& max, + const StringData::ComparatorInterface* stringComparator) { + Schema schema; + + // The metadata field is already excluded from generated min/max summaries. + UpdateStatus status = schema.update(min, /*metaField=*/boost::none, stringComparator); + uassert(ErrorCodes::BadValue, + str::stream() << "Failed to update min: " << updateStatusString(status), + status != UpdateStatus::Failed); + + status = schema.update(max, /*metaField=*/boost::none, stringComparator); + uassert(ErrorCodes::BadValue, + str::stream() << "Failed to update max: " << updateStatusString(status), + status != UpdateStatus::Failed); + + return schema; +} + std::pair<Schema::UpdateStatus, SchemaElement::UpdateContext> Schema::_shouldUpdateObj( SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext) { UpdateStatus status{UpdateStatus::Updated}; diff --git a/src/mongo/db/timeseries/flat_bson.h b/src/mongo/db/timeseries/flat_bson.h index 47db34b6933..8ea65aec0d4 100644 --- a/src/mongo/db/timeseries/flat_bson.h +++ b/src/mongo/db/timeseries/flat_bson.h @@ -272,6 +272,8 @@ template <class Derived, class Element, class Value> class FlatBSON { public: enum class UpdateStatus { Updated, Failed, NoChange }; + static std::string updateStatusString(UpdateStatus updateStatus); + /** * Updates the stored fields provided by 'doc', ignoring the 'metaField' field. */ @@ -507,6 +509,14 @@ private: class Schema : public FlatBSON<Schema, SchemaElement, BSONTypeValue> { friend class FlatBSON<Schema, SchemaElement, BSONTypeValue>; +public: + /** + * Generates and returns a Schema object from the passed in min and max documents. + */ + static Schema parseFromBSON(const BSONObj& min, + const BSONObj& max, + const StringData::ComparatorInterface* stringComparator); + protected: static std::pair<UpdateStatus, typename SchemaElement::UpdateContext> _shouldUpdateObj( SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext updateContext); |