summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Larkin-York <dan.larkin-york@mongodb.com>2022-05-06 18:09:55 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-06 18:34:06 +0000
commitc4577480b324634daf96ce685faf73f106f33e0d (patch)
treebf0623d7333cd52f7c28964b6c438bbbe7c5f5d3
parent8adb070dcb43842e61da3e15415ad4e132d1655a (diff)
downloadmongo-c4577480b324634daf96ce685faf73f106f33e0d.tar.gz
SERVER-65406 Handle compressed buckets in timeseries dotted path support library
-rw-r--r--jstests/core/timeseries/timeseries_metric_index_2dsphere.js35
-rw-r--r--src/mongo/db/index/expression_keys_private.cpp13
-rw-r--r--src/mongo/db/index/s2_bucket_key_generator_test.cpp8
-rw-r--r--src/mongo/db/timeseries/SConscript3
-rw-r--r--src/mongo/db/timeseries/bucket_compression.cpp21
-rw-r--r--src/mongo/db/timeseries/bucket_compression.h5
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support.cpp300
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support.h30
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp355
9 files changed, 523 insertions, 247 deletions
diff --git a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
index 09ff02c8999..e47119b5bbd 100644
--- a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
+++ b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
@@ -66,33 +66,52 @@ TimeseriesTest.run((insert) => {
const twoDSphereDocs = [
{
_id: 0,
- [timeFieldName]: ISODate(),
+ [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"),
[metaFieldName]: "m1",
location: {type: "Point", coordinates: [40, -70]}
},
{
_id: 1,
- [timeFieldName]: ISODate(),
+ [timeFieldName]: ISODate("2022-04-01T00:01:00.000Z"), // should land in same bucket
[metaFieldName]: "m1",
location: {type: "Point", coordinates: [40.1, -70.1]}
},
{
_id: 2,
- [timeFieldName]: ISODate(),
+ [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"),
[metaFieldName]: "m2",
location: {type: "Point", coordinates: [40.2, -70.2]}
},
{
_id: 3,
- [timeFieldName]: ISODate(),
+ [timeFieldName]: ISODate("2022-04-01T00:01:00.000Z"), // should land in same bucket
[metaFieldName]: "m2",
- location: {type: "Point", coordinates: [40.2, -70.2]}
+ location: {type: "Point", coordinates: [40.3, -70.3]}
+ },
+ {
+ _id: 4,
+ [timeFieldName]: ISODate("2022-04-01T00:02:00.000Z"), // should land in same bucket
+ [metaFieldName]: "m2",
+ location: {type: "Point", coordinates: [40.4, -70.4]}
+ },
+ {
+ _id: 5,
+ [timeFieldName]:
+ ISODate("2022-04-01T02:00:00.000Z"), // should open new bucket and compress old one
+ [metaFieldName]: "m2",
+ location: {type: "Point", coordinates: [40.5, -70.5]}
+ },
+ {
+ _id: 6,
+ [timeFieldName]: ISODate("2022-04-01T02:01:00.000Z"), // should land in same bucket
+ [metaFieldName]: "m2"
},
- {_id: 4, [timeFieldName]: ISODate(), [metaFieldName]: "m2"},
- {_id: 5, [timeFieldName]: ISODate(), [metaFieldName]: "m3"},
+ {_id: 7, [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"), [metaFieldName]: "m3"},
];
assert.commandWorked(insert(timeseriescoll, twoDSphereDocs),
'Failed to insert twoDSphereDocs: ' + tojson(twoDSphereDocs));
+ assert.eq(bucketscoll.count(), 4);
+ printjson(bucketscoll.find({}).toArray());
// Test invalid documents
const docWithInvalidCoordinates = {
@@ -140,7 +159,7 @@ TimeseriesTest.run((insert) => {
timeseriescoll.find({location: {$geoWithin: {$center: [[40, -70], .15]}}}).toArray().length,
geoWithinPlan2d);
- assert.eq(4,
+ assert.eq(6,
timeseriescoll
.aggregate([{
$geoNear: {
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index 4bc13b10421..3d957f15519 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -706,12 +706,13 @@ void ExpressionKeysPrivate::getS2Keys(SharedBufferFragmentBuilder& pooledBufferB
std::vector<KeyString::HeapBuilder> updatedKeysToAdd;
if (IndexNames::GEO_2DSPHERE_BUCKET == keyElem.str()) {
- timeseries::dotted_path_support::extractAllElementsAlongBucketPath(
- obj,
- keyElem.fieldName(),
- fieldElements,
- expandArrayOnTrailingField,
- arrayComponents);
+ auto elementStorage =
+ timeseries::dotted_path_support::extractAllElementsAlongBucketPath(
+ obj,
+ keyElem.fieldName(),
+ fieldElements,
+ expandArrayOnTrailingField,
+ arrayComponents);
// null, undefined, {} and [] should all behave like there is no geo field. So we
// look for these cases and ignore those measurements if we find them.
diff --git a/src/mongo/db/index/s2_bucket_key_generator_test.cpp b/src/mongo/db/index/s2_bucket_key_generator_test.cpp
index f501f5cc781..89187d275b0 100644
--- a/src/mongo/db/index/s2_bucket_key_generator_test.cpp
+++ b/src/mongo/db/index/s2_bucket_key_generator_test.cpp
@@ -137,7 +137,7 @@ struct S2BucketKeyGeneratorTest : public unittest::Test {
TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeys) {
BSONObj keyPattern = fromjson("{'data.geo': '2dsphere_bucket'}");
BSONObj genKeysFrom = fromjson(
- "{data: {geo: {"
+ "{control: {version: 1}, data: {geo: {"
"'0': {type: 'Point', coordinates: [0, 0]},"
"'1': {type: 'Point', coordinates: [3, 3]}"
"}}}");
@@ -165,7 +165,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeys) {
TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubField) {
BSONObj keyPattern = fromjson("{'data.geo.sub': '2dsphere_bucket'}");
BSONObj genKeysFrom = fromjson(
- "{data: {geo: {"
+ "{control: {version: 1}, data: {geo: {"
"'0': {sub: {type: 'Point', coordinates: [0, 0]}},"
"'1': {sub: {type: 'Point', coordinates: [3, 3]}}"
"}}}");
@@ -194,7 +194,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubField) {
TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysDeepSubField) {
BSONObj keyPattern = fromjson("{'data.geo.sub1.sub2.sub3': '2dsphere_bucket'}");
BSONObj genKeysFrom = fromjson(
- "{data: {geo: {"
+ "{control: {version: 1}, data: {geo: {"
"'0': {sub1: {sub2: {sub3: {type: 'Point', coordinates: [0, 0]}}}},"
"'1': {sub1: {sub2: {sub3: {type: 'Point', coordinates: [3, 3]}}}}"
"}}}");
@@ -223,7 +223,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysDeepSubField) {
TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubFieldSomeMissing) {
BSONObj keyPattern = fromjson("{'data.geo.sub': '2dsphere_bucket'}");
BSONObj genKeysFrom = fromjson(
- "{data: {geo: {"
+ "{control: {version: 1}, data: {geo: {"
"'0': {sub: {type: 'Point', coordinates: [0, 0]}},"
"'1': {sub: {}},"
"'2': {sub: null},"
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index 519725d38b4..aa103c25a8b 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -88,12 +88,14 @@ env.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/bson/mutable/mutable_bson',
+ '$BUILD_DIR/mongo/bson/util/bson_column',
'$BUILD_DIR/mongo/db/exec/bucket_unpacker',
'$BUILD_DIR/mongo/db/index_commands_idl',
'$BUILD_DIR/mongo/db/index_names',
'$BUILD_DIR/mongo/db/namespace_string',
'$BUILD_DIR/mongo/db/ops/write_ops_parsers',
'$BUILD_DIR/mongo/db/storage/storage_options',
+ 'bucket_compression',
'timeseries_options',
],
)
@@ -130,6 +132,7 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/db/catalog/catalog_test_fixture',
'$BUILD_DIR/mongo/db/catalog_raii',
'bucket_catalog',
+ 'bucket_compression',
'timeseries_conversion_util',
'timeseries_options',
],
diff --git a/src/mongo/db/timeseries/bucket_compression.cpp b/src/mongo/db/timeseries/bucket_compression.cpp
index 2f8eefd3a84..7d6e6961687 100644
--- a/src/mongo/db/timeseries/bucket_compression.cpp
+++ b/src/mongo/db/timeseries/bucket_compression.cpp
@@ -342,5 +342,26 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
return {};
}
+bool isCompressedBucket(const BSONObj& bucketDoc) {
+ auto&& controlField = bucketDoc[timeseries::kBucketControlFieldName];
+ uassert(6540600,
+ "Time-series bucket documents must have 'control' object present",
+ controlField && controlField.type() == BSONType::Object);
+
+ auto&& versionField = controlField.Obj()[timeseries::kBucketControlVersionFieldName];
+ uassert(6540601,
+ "Time-series bucket documents must have 'control.version' field present",
+ versionField && isNumericBSONType(versionField.type()));
+ auto version = versionField.Number();
+
+ if (version == 1) {
+ return false;
+ } else if (version == 2) {
+ return true;
+ } else {
+ uasserted(6540602, "Invalid bucket version");
+ }
+}
+
} // namespace timeseries
} // namespace mongo
diff --git a/src/mongo/db/timeseries/bucket_compression.h b/src/mongo/db/timeseries/bucket_compression.h
index 7921bca1227..e70bec965d7 100644
--- a/src/mongo/db/timeseries/bucket_compression.h
+++ b/src/mongo/db/timeseries/bucket_compression.h
@@ -60,5 +60,10 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
const NamespaceString& nss,
bool validateDecompression);
+/**
+ * Returns whether a timeseries bucket has been compressed to the v2 format.
+ */
+bool isCompressedBucket(const BSONObj& bucketDoc);
+
} // namespace timeseries
} // namespace mongo
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
index fc388e5fa0c..b28e198429e 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
@@ -37,6 +37,8 @@
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/util/bsoncolumn.h"
+#include "mongo/db/timeseries/bucket_compression.h"
#include "mongo/db/timeseries/timeseries_constants.h"
#include "mongo/util/ctype.h"
@@ -58,93 +60,130 @@ boost::optional<std::pair<StringData, StringData>> _splitPath(StringData path) {
return std::make_pair(left, next);
}
-template <typename BSONElementColl>
-void _extractAllElementsAlongBucketPath(const BSONObj& obj,
- StringData path,
- BSONElementColl& elements,
- bool expandArrayOnTrailingField,
- BSONDepthIndex depth,
- MultikeyComponents* arrayComponents) {
- auto handleElement = [&](BSONElement elem, StringData path) -> void {
- if (elem.eoo()) {
- size_t idx = path.find('.');
- if (idx != std::string::npos) {
- invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
- StringData left = path.substr(0, idx);
- StringData next = path.substr(idx + 1, path.size());
+void _handleElementForExtractAllElementsOnBucketPath(const BSONObj& obj,
+ BSONElement elem,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ BSONDepthIndex depth,
+ MultikeyComponents* arrayComponents) {
+ if (elem.eoo()) {
+ size_t idx = path.find('.');
+ if (idx != std::string::npos) {
+ invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
+ StringData left = path.substr(0, idx);
+ StringData next = path.substr(idx + 1, path.size());
- BSONElement e = obj.getField(left);
+ BSONElement e = obj.getField(left);
- if (e.type() == Object) {
- _extractAllElementsAlongBucketPath(e.embeddedObject(),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
- } else if (e.type() == Array) {
- bool allDigits = false;
- if (next.size() > 0 && ctype::isDigit(next[0])) {
- unsigned temp = 1;
- while (temp < next.size() && ctype::isDigit(next[temp]))
- temp++;
- allDigits = temp == next.size() || next[temp] == '.';
- }
- if (allDigits) {
- _extractAllElementsAlongBucketPath(e.embeddedObject(),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
- } else {
- BSONObjIterator i(e.embeddedObject());
- while (i.more()) {
- BSONElement e2 = i.next();
- if (e2.type() == Object || e2.type() == Array)
- _extractAllElementsAlongBucketPath(e2.embeddedObject(),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
- }
- if (arrayComponents) {
- arrayComponents->insert(depth);
+ if (e.type() == Object) {
+ BSONObj embedded = e.embeddedObject();
+ _handleElementForExtractAllElementsOnBucketPath(embedded,
+ embedded.getField(next),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
+ } else if (e.type() == Array) {
+ bool allDigits = false;
+ if (next.size() > 0 && ctype::isDigit(next[0])) {
+ unsigned temp = 1;
+ while (temp < next.size() && ctype::isDigit(next[temp]))
+ temp++;
+ allDigits = temp == next.size() || next[temp] == '.';
+ }
+ if (allDigits) {
+ BSONObj embedded = e.embeddedObject();
+ _handleElementForExtractAllElementsOnBucketPath(embedded,
+ embedded.getField(next),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
+ } else {
+ BSONObjIterator i(e.embeddedObject());
+ while (i.more()) {
+ BSONElement e2 = i.next();
+ if (e2.type() == Object || e2.type() == Array) {
+ BSONObj embedded = e2.embeddedObject();
+ _handleElementForExtractAllElementsOnBucketPath(
+ embedded,
+ embedded.getField(next),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
}
}
- } else {
- // do nothing: no match
- }
- }
- } else {
- if (elem.type() == Array && expandArrayOnTrailingField) {
- BSONObjIterator i(elem.embeddedObject());
- while (i.more()) {
- elements.insert(i.next());
- }
- if (arrayComponents) {
- arrayComponents->insert(depth);
+ if (arrayComponents) {
+ arrayComponents->insert(depth);
+ }
}
} else {
- elements.insert(elem);
+ // do nothing: no match
}
}
- };
+ } else {
+ if (elem.type() == Array && expandArrayOnTrailingField) {
+ BSONObjIterator i(elem.embeddedObject());
+ while (i.more()) {
+ elements.insert(i.next());
+ }
+ if (arrayComponents) {
+ arrayComponents->insert(depth);
+ }
+ } else {
+ elements.insert(elem);
+ }
+ }
+}
+boost::optional<BSONColumn> _extractAllElementsAlongBucketPath(
+ const BSONObj& obj,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ bool isCompressed,
+ BSONDepthIndex depth,
+ MultikeyComponents* arrayComponents) {
switch (depth) {
case 0:
case 1: {
if (auto res = _splitPath(path)) {
auto& [left, next] = *res;
BSONElement e = obj.getField(left);
- if (e.type() == Object && (depth > 0 || left == timeseries::kBucketDataFieldName)) {
- _extractAllElementsAlongBucketPath(e.embeddedObject(),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
+ if (depth > 0 || left == timeseries::kBucketDataFieldName) {
+ if (e.type() == Object) {
+ return _extractAllElementsAlongBucketPath(e.embeddedObject(),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ isCompressed,
+ depth + 1,
+ arrayComponents);
+ } else if (isCompressed && e.type() == BinData) {
+ // Unbucketing magic happens here for nested measurement fields (i.e.
+ // data.a.b) in compressed buckets.
+ BSONColumn storage{e};
+ for (const BSONElement& e2 : storage) {
+ if (!e2.eoo()) {
+ BSONObj embedded =
+ e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+ _handleElementForExtractAllElementsOnBucketPath(
+ embedded,
+ embedded.getField(next),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth,
+ arrayComponents);
+ }
+ }
+ return std::move(storage);
+ }
}
} else {
BSONElement e = obj.getField(path);
@@ -153,32 +192,55 @@ void _extractAllElementsAlongBucketPath(const BSONObj& obj,
StringData(),
elements,
expandArrayOnTrailingField,
+ isCompressed,
depth + 1,
arrayComponents);
+ } else if (isCompressed && BinData == e.type()) {
+ // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
+ // in compressed buckets.
+ invariant(depth == 1);
+ BSONColumn storage{e};
+ for (const BSONElement& e2 : storage) {
+ if (!e2.eoo()) {
+ BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+ _handleElementForExtractAllElementsOnBucketPath(
+ embedded,
+ e2,
+ ""_sd,
+ elements,
+ expandArrayOnTrailingField,
+ depth,
+ arrayComponents);
+ }
+ }
+ return std::move(storage);
}
}
break;
}
case 2: {
- // Unbucketing magic happens here.
+ // Unbucketing magic happens here for uncompressed buckets.
+ invariant(!isCompressed);
for (const BSONElement& e : obj) {
std::string subPath = e.fieldName();
if (!path.empty()) {
subPath.append("." + path);
}
- BSONElement sub = obj.getField(subPath);
- handleElement(sub, subPath);
+ _handleElementForExtractAllElementsOnBucketPath(obj,
+ obj.getField(subPath),
+ subPath,
+ elements,
+ expandArrayOnTrailingField,
+ depth,
+ arrayComponents);
}
break;
}
- default: {
- BSONElement e = obj.getField(path);
- handleElement(e, path);
- break;
- }
+ default: { MONGO_UNREACHABLE; }
}
-}
+ return boost::none;
+}
bool _haveArrayAlongBucketDataPath(const BSONObj& obj, StringData path, BSONDepthIndex depth);
@@ -198,7 +260,9 @@ bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
BSONElement e = obj.getField(left);
if (e.type() == Object) {
- return _haveArrayAlongBucketDataPath(e.embeddedObject(), next, depth + 1);
+ auto embedded = e.embeddedObject();
+ return _handleElementForHaveArrayAlongBucketDataPath(
+ embedded, embedded.getField(next), next, depth + 1);
} else if (e.type() == Array) {
return true;
} else {
@@ -214,27 +278,56 @@ bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
return false;
}
-bool _haveArrayAlongBucketDataPath(const BSONObj& obj, StringData path, BSONDepthIndex depth) {
+bool _haveArrayAlongBucketDataPath(const BSONObj& obj,
+ StringData path,
+ bool isCompressed,
+ BSONDepthIndex depth) {
switch (depth) {
case 0:
case 1: {
if (auto res = _splitPath(path)) {
auto& [left, next] = *res;
BSONElement e = obj.getField(left);
- if (e.type() == Object && (depth > 0 || left == timeseries::kBucketDataFieldName)) {
- return _haveArrayAlongBucketDataPath(e.embeddedObject(), next, depth + 1);
+ if (depth > 0 || left == timeseries::kBucketDataFieldName) {
+ if (e.type() == Object) {
+ return _haveArrayAlongBucketDataPath(
+ e.embeddedObject(), next, isCompressed, depth + 1);
+ } else if (isCompressed && BinData == e.type()) {
+ // Unbucketing magic happens here for nested measurement fields (i.e.
+ // data.a.b) in compressed buckets.
+ BSONColumn column{e};
+ for (const BSONElement& e2 : column) {
+ BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+ const bool foundArray = _handleElementForHaveArrayAlongBucketDataPath(
+ embedded, embedded.getField(next), next, depth);
+ if (foundArray) {
+ return foundArray;
+ }
+ }
+ }
}
} else {
BSONElement e = obj.getField(path);
if (Object == e.type()) {
return _haveArrayAlongBucketDataPath(
- e.embeddedObject(), StringData(), depth + 1);
+ e.embeddedObject(), StringData(), isCompressed, depth + 1);
+ } else if (BinData == e.type()) {
+ // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
+ // in compressed buckets.
+ invariant(isCompressed && depth == 1);
+ BSONColumn column{e};
+ for (const BSONElement& e2 : column) {
+ if (e2.type() == Array) {
+ return true;
+ }
+ }
}
}
return false;
}
case 2: {
- // Unbucketing magic happens here.
+ // Unbucketing magic happens here for uncompressed buckets.
+ invariant(!isCompressed);
for (const BSONElement& e : obj) {
std::string subPath = e.fieldName();
if (!path.empty()) {
@@ -367,24 +460,20 @@ Decision _fieldContainsArrayData(const BSONObj& min, const BSONObj& max, StringD
} // namespace
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
- StringData path,
- BSONElementSet& elements,
- bool expandArrayOnTrailingField,
- MultikeyComponents* arrayComponents) {
- constexpr BSONDepthIndex initialDepth = 0;
- _extractAllElementsAlongBucketPath(
- obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
-}
-
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
- StringData path,
- BSONElementMultiSet& elements,
- bool expandArrayOnTrailingField,
- MultikeyComponents* arrayComponents) {
+boost::optional<BSONColumn> extractAllElementsAlongBucketPath(const BSONObj& obj,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ MultikeyComponents* arrayComponents) {
constexpr BSONDepthIndex initialDepth = 0;
- _extractAllElementsAlongBucketPath(
- obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
+ const bool isCompressed = timeseries::isCompressedBucket(obj);
+ return _extractAllElementsAlongBucketPath(obj,
+ path,
+ elements,
+ expandArrayOnTrailingField,
+ isCompressed,
+ initialDepth,
+ arrayComponents);
}
bool haveArrayAlongBucketDataPath(const BSONObj& bucketObj, StringData path) {
@@ -394,7 +483,8 @@ bool haveArrayAlongBucketDataPath(const BSONObj& bucketObj, StringData path) {
}
constexpr BSONDepthIndex initialDepth = 0;
- return _haveArrayAlongBucketDataPath(bucketObj, path, initialDepth);
+ const bool isCompressed = timeseries::isCompressedBucket(bucketObj);
+ return _haveArrayAlongBucketDataPath(bucketObj, path, isCompressed, initialDepth);
}
std::ostream& operator<<(std::ostream& s, const Decision& i) {
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support.h b/src/mongo/db/timeseries/timeseries_dotted_path_support.h
index 266c86a9117..fa2ebba941b 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support.h
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support.h
@@ -29,10 +29,12 @@
#pragma once
+#include "boost/any.hpp"
#include <cstddef>
#include "mongo/bson/bsonelement_comparator_interface.h"
#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/util/bsoncolumn.h"
#include "mongo/db/index/multikey_paths.h"
namespace mongo {
@@ -48,8 +50,15 @@ namespace dotted_path_support {
* original document. Note that the caller should include the 'data' prefix, but omit the depth-2
* numeric path entry that results from pivoting the data into the bucketed format.
*
- * Other than the bucketing and unbucketing mentioned above, the function should be have roughly
- * like `mongo::dotted_path_support::extractAllElementsAlongPath'.
+ * Other than the bucketing and unbucketing mentioned above, and the return value, the function
+ * should behave roughly like `mongo::dotted_path_support::extractAllElementsAlongPath'.
+ *
+ * In the case that the input bucket has been compressed, the function may need to decompress the
+ * data in order to examine it and extract the requested data. Since the data is returned as
+ * BSONElement, which does not own the data it references, we will need to provide storage for the
+ * decompressed data that will outlive the function call so that the returned BSONElements remain
+ * valid. This storage is provided via the optional return value. It need not be examined directly,
+ * but it should not be discarded until after the extracted elements.
*
* An example:
*
@@ -57,17 +66,12 @@ namespace dotted_path_support {
* {b: 1} and {b: 2} would be added to the set. 'arrayComponents' would be set as
* std::set<size_t>{1U}.
*/
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
- StringData path,
- BSONElementSet& elements,
- bool expandArrayOnTrailingField = true,
- MultikeyComponents* arrayComponents = nullptr);
-
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
- StringData path,
- BSONElementMultiSet& elements,
- bool expandArrayOnTrailingField = true,
- MultikeyComponents* arrayComponents = nullptr);
+MONGO_WARN_UNUSED_RESULT_FUNCTION boost::optional<BSONColumn> extractAllElementsAlongBucketPath(
+ const BSONObj& obj,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField = true,
+ MultikeyComponents* arrayComponents = nullptr);
/**
* Finds arrays in individual metrics along the specified data path.
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
index 586b4361f29..638a2c8d6c4 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
@@ -31,7 +31,9 @@
#include "mongo/platform/basic.h"
#include "mongo/bson/bsonmisc.h"
+#include "mongo/db/timeseries/bucket_compression.h"
#include "mongo/db/timeseries/timeseries_dotted_path_support.h"
+#include "mongo/unittest/bson_test_util.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -39,12 +41,32 @@ namespace {
namespace tdps = ::mongo::timeseries::dotted_path_support;
-TEST(TimeseriesDottedPathSupport, HaveArrayAlongBucketPath) {
- BSONObj obj = ::mongo::fromjson(R"(
+class TimeseriesDottedPathSupportTest : public unittest::Test {
+protected:
+ void runTest(const BSONObj& obj, const std::function<void(const BSONObj&)>& test) {
+ test(obj);
+
+ NamespaceString nss{"test"};
+ auto compressionResult = timeseries::compressBucket(obj, "time", nss, true);
+ ASSERT_TRUE(compressionResult.compressedBucket.has_value());
+ ASSERT_FALSE(compressionResult.decompressionFailed);
+
+ test(compressionResult.compressedBucket.get());
+ }
+};
+
+TEST_F(TimeseriesDottedPathSupportTest, HaveArrayAlongBucketPath) {
+ BSONObj input = ::mongo::fromjson(R"(
{
control: {version: 1},
bogus: [1],
data: {
+ time: {
+ "0": {"$date": "1970-01-01T00:00:00.000Z"},
+ "1": {"$date": "1970-01-01T00:00:00.001Z"},
+ "3": {"$date": "1970-01-01T00:00:00.003Z"},
+ "99": {"$date": "1970-01-01T00:00:00.099Z"}
+ },
a: {},
b: [],
c: {
@@ -70,31 +92,41 @@ TEST(TimeseriesDottedPathSupport, HaveArrayAlongBucketPath) {
{a: true}
]
}
+ },
+ h: {
+ "1": {
+ a: {
+ b: true
+ }
+ }
}
}
})");
- // Non-data fields should always be false
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control.version"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "bogus"));
+ runTest(input, [this](const BSONObj& obj) {
+ // Non-data fields should always be false
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control.version"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "bogus"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.a"));
- ASSERT_FALSE(
- tdps::haveArrayAlongBucketDataPath(obj, "data.b")); // bucket expansion hides array
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.c"));
- ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
- ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.e"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.f"));
- ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.f.a"));
- ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.g"));
- ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a"));
- ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a.a"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.a"));
+ ASSERT_FALSE(
+ tdps::haveArrayAlongBucketDataPath(obj, "data.b")); // bucket expansion hides array
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.c"));
+ invariant(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.e"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.f"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.f.a"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.g"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a.a"));
+ ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.h.a.b"));
+ });
}
-TEST(TimeseriesDottedPathSupport, fieldContainsArrayData) {
- BSONObj obj = ::mongo::fromjson(R"(
+TEST_F(TimeseriesDottedPathSupportTest, fieldContainsArrayData) {
+ BSONObj input = ::mongo::fromjson(R"(
{
control: {
min: {
@@ -169,112 +201,213 @@ TEST(TimeseriesDottedPathSupport, fieldContainsArrayData) {
g: true
}
}
+ },
+ data: {
+ time: {
+ "0": {"$date": "1970-01-01T00:00:00.000Z"}
+ }
}
})");
- // Because this function is meant as an optimization to avoid a more expensive check, we need
- // to ensure that we don't make wrong decisions, but we can always fall through to the more
- // expensive check. So if the *best* decision that the function could make with the information
- // present in the control fields is "Yes", we will accept "Yes" or "Maybe". Similarly if the
- // best decision it could make is "No", we will accept "No" or "Maybe". If there isn't enough
- // information in the control fields to make a firm decision, then it must return "Maybe".
+ runTest(input, [this](const BSONObj& obj) {
+ // Because this function is meant as an optimization to avoid a more expensive check, we
+ // need to ensure that we don't make wrong decisions, but we can always fall through to the
+ // more expensive check. So if the *best* decision that the function could make with the
+ // information present in the control fields is "Yes", we will accept "Yes" or "Maybe".
+ // Similarly if the best decision it could make is "No", we will accept "No" or "Maybe". If
+ // there isn't enough information in the control fields to make a firm decision, then it
+ // must return "Maybe".
- // A few assumptions about type orders necessary to understand the below tests:
- // eoo < double < array < bool
+ // A few assumptions about type orders necessary to understand the below tests:
+ // eoo < double < array < bool
- constexpr auto yes = tdps::Decision::Yes;
- constexpr auto no = tdps::Decision::No;
- constexpr auto maybe = tdps::Decision::Maybe;
+ constexpr auto yes = tdps::Decision::Yes;
+ constexpr auto no = tdps::Decision::No;
+ constexpr auto maybe = tdps::Decision::Maybe;
- // a: {min: double, max: bool},
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "a"));
+ // a: {min: double, max: bool},
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "a"));
- // b: {min: bool, max: bool}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "b"));
+ // b: {min: bool, max: bool}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "b"));
- // c: {min: double, max: double}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "c"));
+ // c: {min: double, max: double}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "c"));
- // d: {min: double, max: array}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "d"));
+ // d: {min: double, max: array}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "d"));
- // e: {min: array, max: bool}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "e"));
+ // e: {min: array, max: bool}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "e"));
- // f: {min: array, max: array}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "f"));
+ // f: {min: array, max: array}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "f"));
- // g: {min: double, max: object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g"));
- // g.a: {min: double.eoo, max: object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "g.a"));
- // g.b: {min: double.eoo, max: object.double}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.b"));
- // g.c: {min: double.eoo, max: object.array}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "g.c"));
- // g.d: {min: double.eoo, max: object.eoo}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.d"));
+ // g: {min: double, max: object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g"));
+ // g.a: {min: double.eoo, max: object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "g.a"));
+ // g.b: {min: double.eoo, max: object.double}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.b"));
+ // g.c: {min: double.eoo, max: object.array}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "g.c"));
+ // g.d: {min: double.eoo, max: object.eoo}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.d"));
- // h: {min: object, max: bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h"));
- // h.a: {min: object.bool, max: bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.a"));
- // h.b: {min: object.double, max: bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.b"));
- // h.c: {min: object.array, max: bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.c"));
- // h.d: {min: object.eoo, max: bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.d"));
+ // h: {min: object, max: bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h"));
+ // h.a: {min: object.bool, max: bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.a"));
+ // h.b: {min: object.double, max: bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.b"));
+ // h.c: {min: object.array, max: bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.c"));
+ // h.d: {min: object.eoo, max: bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.d"));
- // i: {min: object, max: object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i"));
- // i.a: {min: object.double, max: object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.a"));
- // i.b: {min: object.array, max: object.array}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.b"));
- // i.c: {min: object.bool, max: object.bool}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.c"));
- // i.d: {min: object.object, max: object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.d"));
- // i.d.a: {min: object.object.double, max: object.object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.d.a"));
- // i.e: {min: object.object, max: object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e"));
- // i.e.a: {min: object.object.double, max: object.object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e.a"));
- // i.f: {min: object.double, max: object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f"));
- // i.f.a: {min: object.double.eoo, max: object.object.double}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.a"));
- // i.f.b: {min: object.double.eoo, max: object.object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.b"));
- // i.f.c: {min: object.double.eoo, max: object.object.object}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c"));
- // i.f.c.a: {min: object.double.eoo.eoo, max: object.object.object.double}
- ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c.a"));
- // i.f.c.b: {min: object.double.eoo.eoo, max: object.object.object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.c.b"));
- // i.f.d: {min: object.double.eoo, max: object.object.array}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.f.d"));
- // i.f.e: {min: object.double.eoo, max: object.object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.e"));
- // i.g: {min: object.object, max: object.bool}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g"));
- // i.g.a: {min: object.object.double, max: object.bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.a"));
- // i.g.b: {min: object.object.object, max: object.bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.b"));
- // i.g.c: {min: object.object.object, max: object.bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c"));
- // i.g.c.a: {min: object.object.object.double, max: object.bool.eoo.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.a"));
- // i.g.c.b: {min: object.object.object.bool, max: object.bool.eoo.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.b"));
- // i.g.d: {min: object.object.array, max: object.bool.eoo}
- ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.g.d"));
- // i.g.e: {min: object.object.bool, max: object.bool.eoo}
- ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.e"));
+ // i: {min: object, max: object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i"));
+ // i.a: {min: object.double, max: object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.a"));
+ // i.b: {min: object.array, max: object.array}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.b"));
+ // i.c: {min: object.bool, max: object.bool}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.c"));
+ // i.d: {min: object.object, max: object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.d"));
+ // i.d.a: {min: object.object.double, max: object.object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.d.a"));
+ // i.e: {min: object.object, max: object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e"));
+ // i.e.a: {min: object.object.double, max: object.object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e.a"));
+ // i.f: {min: object.double, max: object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f"));
+ // i.f.a: {min: object.double.eoo, max: object.object.double}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.a"));
+ // i.f.b: {min: object.double.eoo, max: object.object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.b"));
+ // i.f.c: {min: object.double.eoo, max: object.object.object}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c"));
+ // i.f.c.a: {min: object.double.eoo.eoo, max: object.object.object.double}
+ ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c.a"));
+ // i.f.c.b: {min: object.double.eoo.eoo, max: object.object.object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.c.b"));
+ // i.f.d: {min: object.double.eoo, max: object.object.array}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.f.d"));
+ // i.f.e: {min: object.double.eoo, max: object.object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.e"));
+ // i.g: {min: object.object, max: object.bool}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g"));
+ // i.g.a: {min: object.object.double, max: object.bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.a"));
+ // i.g.b: {min: object.object.object, max: object.bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.b"));
+ // i.g.c: {min: object.object.object, max: object.bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c"));
+ // i.g.c.a: {min: object.object.object.double, max: object.bool.eoo.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.a"));
+ // i.g.c.b: {min: object.object.object.bool, max: object.bool.eoo.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.b"));
+ // i.g.d: {min: object.object.array, max: object.bool.eoo}
+ ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.g.d"));
+ // i.g.e: {min: object.object.bool, max: object.bool.eoo}
+ ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.e"));
+ });
}
+TEST_F(TimeseriesDottedPathSupportTest, ExtractAllElementsAlongBucketPath) {
+ BSONObj input = ::mongo::fromjson(R"(
+{
+ control: {version: 1},
+ data: {
+ time: {
+ "0": {"$date": "1970-01-01T00:00:00.000Z"},
+ "1": {"$date": "1970-01-01T00:00:00.001Z"},
+ "3": {"$date": "1970-01-01T00:00:00.003Z"},
+ "99": {"$date": "1970-01-01T00:00:00.099Z"}
+ },
+ a: {},
+ b: [],
+ c: {
+ "0": true,
+ "1": false
+ },
+ d: {
+ "0": false,
+ "1": []
+ },
+ e: {
+ "3": "foo",
+ "99": [1, 2]
+ },
+ f: {
+ "1": {
+ a: [true, false]
+ }
+ },
+ g: {
+ "1": {
+ a: [
+ {a: true}
+ ]
+ }
+ },
+ h: {
+ "1": {
+ a: {
+ b: true
+ }
+ },
+ "3": {
+ a: {
+ b: false
+ }
+ }
+ }
+ }
+})");
+
+ runTest(input, [this](const BSONObj& obj) {
+ auto assertExtractionMatches = [&](StringData path, const BSONArray expectedStorage) {
+ BSONElementSet actual;
+ auto actualStorage = tdps::extractAllElementsAlongBucketPath(obj, path, actual);
+
+ BSONElementSet expected;
+ for (auto&& el : expectedStorage) {
+ expected.emplace(el);
+ }
+
+ ASSERT_EQ(actual.size(), expected.size());
+
+ auto actualIt = actual.begin();
+ auto expectedIt = expected.begin();
+ while (actualIt != actual.end() && expectedIt != expected.end()) {
+ ASSERT_FALSE(actualIt->eoo());
+ ASSERT_FALSE(expectedIt->eoo());
+ ASSERT_EQ(actualIt->woCompare(*expectedIt, 0), 0);
+ actualIt++;
+ expectedIt++;
+ }
+ };
+
+ assertExtractionMatches("data.a"_sd, BSONArray());
+ assertExtractionMatches("data.b"_sd, BSONArray());
+ assertExtractionMatches("data.c"_sd, BSON_ARRAY(true << false));
+ assertExtractionMatches("data.d"_sd, BSON_ARRAY(false));
+ assertExtractionMatches("data.e"_sd, BSON_ARRAY("foo" << 1 << 2));
+ assertExtractionMatches("data.f"_sd, BSON_ARRAY(BSON("a" << BSON_ARRAY(true << false))));
+ assertExtractionMatches("data.f.a"_sd, BSON_ARRAY(true << false));
+ assertExtractionMatches("data.g"_sd,
+ BSON_ARRAY(BSON("a" << BSON_ARRAY(BSON("a" << true)))));
+ assertExtractionMatches("data.g.a"_sd, BSON_ARRAY(BSON("a" << true)));
+ assertExtractionMatches("data.g.a.a"_sd, BSON_ARRAY(true));
+ assertExtractionMatches(
+ "data.h"_sd,
+ BSON_ARRAY(BSON("a" << BSON("b" << true)) << BSON("a" << BSON("b" << false))));
+ assertExtractionMatches("data.h.a"_sd, BSON_ARRAY(BSON("b" << true) << BSON("b" << false)));
+ assertExtractionMatches("data.h.a.b"_sd, BSON_ARRAY(true << false));
+ });
+}
} // namespace
} // namespace mongo