diff options
author | Kris Satya <kris.satya@mongodb.com> | 2021-06-14 14:48:10 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-15 21:24:14 +0000 |
commit | 6d66d9116bdee519bd42123a5ae30439c54dc213 (patch) | |
tree | c01ccbb1a2753d1335036d68ead5f9186f131185 | |
parent | 02feb4df5a0e4bf9ac915aaafd8842ce9c738d73 (diff) | |
download | mongo-6d66d9116bdee519bd42123a5ae30439c54dc213.tar.gz |
SERVER-56492 Normalize objects within arrays in time-series metadata
-rw-r--r-- | jstests/core/timeseries/timeseries_metadata.js | 33 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog_test.cpp | 94 |
3 files changed, 157 insertions, 6 deletions
diff --git a/jstests/core/timeseries/timeseries_metadata.js b/jstests/core/timeseries/timeseries_metadata.js index 21047da3826..9fe8de0cf0e 100644 --- a/jstests/core/timeseries/timeseries_metadata.js +++ b/jstests/core/timeseries/timeseries_metadata.js @@ -140,5 +140,38 @@ TimeseriesTest.run((insert) => { [ {_id: 3, time: t[3], meta: {a: 1}, x: 30}, ]); + + runTest( + // Metadata field contains an array within an object. + [ + {_id: 0, time: t[0], meta: {a: [{b: 1, c: 0}]}, x: 0}, + {_id: 1, time: t[1], meta: {a: [{c: 0, b: 1}]}, x: 10}, + ], + [ + {_id: 2, time: t[2], meta: {a: [{b: 2, c: 0}]}, x: 20}, + {_id: 3, time: t[3], meta: {a: [{c: 0, b: 2}]}, x: 30}, + ]); + + runTest( + // Metadata field contains a nested array. + [ + {_id: 0, time: t[0], meta: {a: [{b: 1, c: 0}, [{e: 1, f: 0}]]}, x: 0}, + {_id: 1, time: t[1], meta: {a: [{c: 0, b: 1}, [{f: 0, e: 1}]]}, x: 10}, + ], + [ + {_id: 2, time: t[2], meta: {a: [[{e: 1, f: 0}], {b: 1, c: 0}]}, x: 20}, + {_id: 3, time: t[3], meta: {a: [[{f: 0, e: 1}], {c: 0, b: 1}]}, x: 30}, + ]); + + runTest( + // Metadata field contains an array. + [ + {_id: 0, time: t[0], meta: {a: [1, 2, 3]}, x: 0}, + {_id: 1, time: t[1], meta: {a: [1, 2, 3]}, x: 10}, + ], + [ + {_id: 2, time: t[2], meta: {a: [2, 1, 3]}, x: 20}, + {_id: 3, time: t[3], meta: {a: [2, 1, 3]}, x: 30}, + ]); }); })(); diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp index 4c579b49be2..cee6c1ad4db 100644 --- a/src/mongo/db/timeseries/bucket_catalog.cpp +++ b/src/mongo/db/timeseries/bucket_catalog.cpp @@ -46,6 +46,10 @@ namespace mongo { namespace { + +void normalizeArray(BSONArrayBuilder* builder, const BSONObj& obj); +void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj); + const auto getBucketCatalog = ServiceContext::declareDecoration<BucketCatalog>(); MONGO_FAIL_POINT_DEFINE(hangTimeseriesDirectModificationBeforeWriteConflict); @@ -58,6 +62,20 @@ uint8_t numDigits(uint32_t num) { return numDigits; } +void normalizeArray(BSONArrayBuilder* builder, const BSONObj& obj) { + for (auto& arrayElem : obj) { + if (arrayElem.type() == BSONType::Array) { + BSONArrayBuilder subArray = builder->subarrayStart(); + normalizeArray(&subArray, arrayElem.Obj()); + } else if (arrayElem.type() == BSONType::Object) { + BSONObjBuilder subObject = builder->subobjStart(); + normalizeObject(&subObject, arrayElem.Obj()); + } else { + builder->append(arrayElem); + } + } +} + void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj) { // BSONObjIteratorSorted provides an abstraction similar to what this function does. However it // is using a lexical comparison that is slower than just doing a binary comparison of the field @@ -96,21 +114,27 @@ void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj) { std::sort(it, end); for (; it != end; ++it) { auto elem = it->element(); - if (elem.type() != BSONType::Object) { - builder->append(elem); - } else { + if (elem.type() == BSONType::Array) { + BSONArrayBuilder subArray(builder->subarrayStart(elem.fieldNameStringData())); + normalizeArray(&subArray, elem.Obj()); + } else if (elem.type() == BSONType::Object) { BSONObjBuilder subObject(builder->subobjStart(elem.fieldNameStringData())); normalizeObject(&subObject, elem.Obj()); + } else { + builder->append(elem); } } } void normalizeTopLevel(BSONObjBuilder* builder, const BSONElement& elem) { - if (elem.type() != BSONType::Object) { - builder->append(elem); - } else { + if (elem.type() == BSONType::Array) { + BSONArrayBuilder subArray(builder->subarrayStart(elem.fieldNameStringData())); + normalizeArray(&subArray, elem.Obj()); + } else if (elem.type() == BSONType::Object) { BSONObjBuilder subObject(builder->subobjStart(elem.fieldNameStringData())); normalizeObject(&subObject, elem.Obj()); + } else { + builder->append(elem); } } diff --git a/src/mongo/db/timeseries/bucket_catalog_test.cpp b/src/mongo/db/timeseries/bucket_catalog_test.cpp index 6414a52fca6..5c6f23baf34 100644 --- a/src/mongo/db/timeseries/bucket_catalog_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog_test.cpp @@ -271,6 +271,100 @@ TEST_F(BucketCatalogTest, InsertIntoDifferentBuckets) { } } +TEST_F(BucketCatalogTest, InsertIntoSameBucketArray) { + auto result1 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + auto result2 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField << BSON_ARRAY(BSON("b" << 1 << "a" << 0))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + + ASSERT_EQ(result1.getValue(), result2.getValue()); + + // Check metadata in buckets. + ASSERT_BSONOBJ_EQ(BSON(_metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))), + _bucketCatalog->getMetadata(result1.getValue()->bucket())); + ASSERT_BSONOBJ_EQ(BSON(_metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))), + _bucketCatalog->getMetadata(result2.getValue()->bucket())); +} + +TEST_F(BucketCatalogTest, InsertIntoSameBucketObjArray) { + auto result1 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField + << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) + << BSON("f" << 1 << "g" << 0))))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + auto result2 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField + << BSONObj(BSON("c" << BSON_ARRAY(BSON("b" << 1 << "a" << 0) + << BSON("g" << 0 << "f" << 1))))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + + ASSERT_EQ(result1.getValue(), result2.getValue()); + + // Check metadata in buckets. + ASSERT_BSONOBJ_EQ( + BSON(_metaField << BSONObj(BSON( + "c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) << BSON("f" << 1 << "g" << 0))))), + _bucketCatalog->getMetadata(result1.getValue()->bucket())); + ASSERT_BSONOBJ_EQ( + BSON(_metaField << BSONObj(BSON( + "c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) << BSON("f" << 1 << "g" << 0))))), + _bucketCatalog->getMetadata(result2.getValue()->bucket())); +} + + +TEST_F(BucketCatalogTest, InsertIntoSameBucketNestedArray) { + auto result1 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField + << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) + << BSON_ARRAY("123" + << "456"))))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + auto result2 = _bucketCatalog->insert( + _opCtx, + _ns1, + _getCollator(_ns1), + _getTimeseriesOptions(_ns1), + BSON(_timeField << Date_t::now() << _metaField + << BSONObj(BSON("c" << BSON_ARRAY(BSON("b" << 1 << "a" << 0) + << BSON_ARRAY("123" + << "456"))))), + BucketCatalog::CombineWithInsertsFromOtherClients::kAllow); + + ASSERT_EQ(result1.getValue(), result2.getValue()); + + // Check metadata in buckets. + ASSERT_BSONOBJ_EQ(BSON(_metaField << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) + << BSON_ARRAY("123" + << "456"))))), + _bucketCatalog->getMetadata(result1.getValue()->bucket())); + ASSERT_BSONOBJ_EQ(BSON(_metaField << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) + << BSON_ARRAY("123" + << "456"))))), + _bucketCatalog->getMetadata(result2.getValue()->bucket())); +} + TEST_F(BucketCatalogTest, InsertNullAndMissingMetaFieldIntoDifferentBuckets) { auto result1 = _bucketCatalog->insert(_opCtx, |