summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKris Satya <kris.satya@mongodb.com>2021-06-14 14:48:10 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-16 18:48:50 +0000
commit5d5aaabcc133510a24959446ad2aa6ed6ea1f04b (patch)
treeceb87d6b45242cd326fc9cf84cf441599739fa2b
parentd423ad12e4d2d2b242c2cf1f8c84d389869b5e13 (diff)
downloadmongo-5d5aaabcc133510a24959446ad2aa6ed6ea1f04b.tar.gz
SERVER-56492 Normalize objects within arrays in time-series metadata
(cherry picked from commit 6d66d9116bdee519bd42123a5ae30439c54dc213)
-rw-r--r--jstests/core/timeseries/timeseries_metadata.js33
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.cpp36
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_test.cpp94
3 files changed, 157 insertions, 6 deletions
diff --git a/jstests/core/timeseries/timeseries_metadata.js b/jstests/core/timeseries/timeseries_metadata.js
index 21047da3826..9fe8de0cf0e 100644
--- a/jstests/core/timeseries/timeseries_metadata.js
+++ b/jstests/core/timeseries/timeseries_metadata.js
@@ -140,5 +140,38 @@ TimeseriesTest.run((insert) => {
[
{_id: 3, time: t[3], meta: {a: 1}, x: 30},
]);
+
+ runTest(
+ // Metadata field contains an array within an object.
+ [
+ {_id: 0, time: t[0], meta: {a: [{b: 1, c: 0}]}, x: 0},
+ {_id: 1, time: t[1], meta: {a: [{c: 0, b: 1}]}, x: 10},
+ ],
+ [
+ {_id: 2, time: t[2], meta: {a: [{b: 2, c: 0}]}, x: 20},
+ {_id: 3, time: t[3], meta: {a: [{c: 0, b: 2}]}, x: 30},
+ ]);
+
+ runTest(
+ // Metadata field contains a nested array.
+ [
+ {_id: 0, time: t[0], meta: {a: [{b: 1, c: 0}, [{e: 1, f: 0}]]}, x: 0},
+ {_id: 1, time: t[1], meta: {a: [{c: 0, b: 1}, [{f: 0, e: 1}]]}, x: 10},
+ ],
+ [
+ {_id: 2, time: t[2], meta: {a: [[{e: 1, f: 0}], {b: 1, c: 0}]}, x: 20},
+ {_id: 3, time: t[3], meta: {a: [[{f: 0, e: 1}], {c: 0, b: 1}]}, x: 30},
+ ]);
+
+ runTest(
+ // Metadata field contains an array.
+ [
+ {_id: 0, time: t[0], meta: {a: [1, 2, 3]}, x: 0},
+ {_id: 1, time: t[1], meta: {a: [1, 2, 3]}, x: 10},
+ ],
+ [
+ {_id: 2, time: t[2], meta: {a: [2, 1, 3]}, x: 20},
+ {_id: 3, time: t[3], meta: {a: [2, 1, 3]}, x: 30},
+ ]);
});
})();
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index 653b5583117..a0353b46449 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -46,6 +46,10 @@
namespace mongo {
namespace {
+
+void normalizeArray(BSONArrayBuilder* builder, const BSONObj& obj);
+void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj);
+
const auto getBucketCatalog = ServiceContext::declareDecoration<BucketCatalog>();
MONGO_FAIL_POINT_DEFINE(hangTimeseriesDirectModificationBeforeWriteConflict);
@@ -58,6 +62,20 @@ uint8_t numDigits(uint32_t num) {
return numDigits;
}
+void normalizeArray(BSONArrayBuilder* builder, const BSONObj& obj) {
+ for (auto& arrayElem : obj) {
+ if (arrayElem.type() == BSONType::Array) {
+ BSONArrayBuilder subArray = builder->subarrayStart();
+ normalizeArray(&subArray, arrayElem.Obj());
+ } else if (arrayElem.type() == BSONType::Object) {
+ BSONObjBuilder subObject = builder->subobjStart();
+ normalizeObject(&subObject, arrayElem.Obj());
+ } else {
+ builder->append(arrayElem);
+ }
+ }
+}
+
void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj) {
// BSONObjIteratorSorted provides an abstraction similar to what this function does. However it
// is using a lexical comparison that is slower than just doing a binary comparison of the field
@@ -96,21 +114,27 @@ void normalizeObject(BSONObjBuilder* builder, const BSONObj& obj) {
std::sort(it, end);
for (; it != end; ++it) {
auto elem = it->element();
- if (elem.type() != BSONType::Object) {
- builder->append(elem);
- } else {
+ if (elem.type() == BSONType::Array) {
+ BSONArrayBuilder subArray(builder->subarrayStart(elem.fieldNameStringData()));
+ normalizeArray(&subArray, elem.Obj());
+ } else if (elem.type() == BSONType::Object) {
BSONObjBuilder subObject(builder->subobjStart(elem.fieldNameStringData()));
normalizeObject(&subObject, elem.Obj());
+ } else {
+ builder->append(elem);
}
}
}
void normalizeTopLevel(BSONObjBuilder* builder, const BSONElement& elem) {
- if (elem.type() != BSONType::Object) {
- builder->append(elem);
- } else {
+ if (elem.type() == BSONType::Array) {
+ BSONArrayBuilder subArray(builder->subarrayStart(elem.fieldNameStringData()));
+ normalizeArray(&subArray, elem.Obj());
+ } else if (elem.type() == BSONType::Object) {
BSONObjBuilder subObject(builder->subobjStart(elem.fieldNameStringData()));
normalizeObject(&subObject, elem.Obj());
+ } else {
+ builder->append(elem);
}
}
diff --git a/src/mongo/db/timeseries/bucket_catalog_test.cpp b/src/mongo/db/timeseries/bucket_catalog_test.cpp
index b76619d9eb1..8658b6807dd 100644
--- a/src/mongo/db/timeseries/bucket_catalog_test.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_test.cpp
@@ -282,6 +282,100 @@ TEST_F(BucketCatalogTest, InsertIntoDifferentBuckets) {
}
}
+TEST_F(BucketCatalogTest, InsertIntoSameBucketArray) {
+ auto result1 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ auto result2 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField << BSON_ARRAY(BSON("b" << 1 << "a" << 0))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ ASSERT_EQ(result1.getValue(), result2.getValue());
+
+ // Check metadata in buckets.
+ ASSERT_BSONOBJ_EQ(BSON(_metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))),
+ _bucketCatalog->getMetadata(result1.getValue()->bucket()));
+ ASSERT_BSONOBJ_EQ(BSON(_metaField << BSON_ARRAY(BSON("a" << 0 << "b" << 1))),
+ _bucketCatalog->getMetadata(result2.getValue()->bucket()));
+}
+
+TEST_F(BucketCatalogTest, InsertIntoSameBucketObjArray) {
+ auto result1 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField
+ << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1)
+ << BSON("f" << 1 << "g" << 0))))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ auto result2 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField
+ << BSONObj(BSON("c" << BSON_ARRAY(BSON("b" << 1 << "a" << 0)
+ << BSON("g" << 0 << "f" << 1))))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ ASSERT_EQ(result1.getValue(), result2.getValue());
+
+ // Check metadata in buckets.
+ ASSERT_BSONOBJ_EQ(
+ BSON(_metaField << BSONObj(BSON(
+ "c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) << BSON("f" << 1 << "g" << 0))))),
+ _bucketCatalog->getMetadata(result1.getValue()->bucket()));
+ ASSERT_BSONOBJ_EQ(
+ BSON(_metaField << BSONObj(BSON(
+ "c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1) << BSON("f" << 1 << "g" << 0))))),
+ _bucketCatalog->getMetadata(result2.getValue()->bucket()));
+}
+
+
+TEST_F(BucketCatalogTest, InsertIntoSameBucketNestedArray) {
+ auto result1 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField
+ << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1)
+ << BSON_ARRAY("123"
+ << "456"))))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ auto result2 = _bucketCatalog->insert(
+ _opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField
+ << BSONObj(BSON("c" << BSON_ARRAY(BSON("b" << 1 << "a" << 0)
+ << BSON_ARRAY("123"
+ << "456"))))),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ ASSERT_EQ(result1.getValue(), result2.getValue());
+
+ // Check metadata in buckets.
+ ASSERT_BSONOBJ_EQ(BSON(_metaField << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1)
+ << BSON_ARRAY("123"
+ << "456"))))),
+ _bucketCatalog->getMetadata(result1.getValue()->bucket()));
+ ASSERT_BSONOBJ_EQ(BSON(_metaField << BSONObj(BSON("c" << BSON_ARRAY(BSON("a" << 0 << "b" << 1)
+ << BSON_ARRAY("123"
+ << "456"))))),
+ _bucketCatalog->getMetadata(result2.getValue()->bucket()));
+}
+
TEST_F(BucketCatalogTest, InsertNullAndMissingMetaFieldIntoDifferentBuckets) {
auto result1 =
_bucketCatalog->insert(_opCtx,