SERVER-65406 Handle compressed buckets in timeseries dotted path support library

author: Dan Larkin-York <dan.larkin-york@mongodb.com> 2022-05-06 18:09:55 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-05-06 18:34:06 +0000
commit: c4577480b324634daf96ce685faf73f106f33e0d (patch)
tree: bf0623d7333cd52f7c28964b6c438bbbe7c5f5d3
parent: 8adb070dcb43842e61da3e15415ad4e132d1655a (diff)
download: mongo-c4577480b324634daf96ce685faf73f106f33e0d.tar.gz
9 files changed, 523 insertions, 247 deletions
diff --git a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
index 09ff02c8999..e47119b5bbd 100644
--- a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
+++ b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
@@ -66,33 +66,52 @@ TimeseriesTest.run((insert) => {
     const twoDSphereDocs = [
         {
             _id: 0,
-            [timeFieldName]: ISODate(),
+            [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"),
             [metaFieldName]: "m1",
             location: {type: "Point", coordinates: [40, -70]}
         },
         {
             _id: 1,
-            [timeFieldName]: ISODate(),
+            [timeFieldName]: ISODate("2022-04-01T00:01:00.000Z"),  // should land in same bucket
             [metaFieldName]: "m1",
             location: {type: "Point", coordinates: [40.1, -70.1]}
         },
         {
             _id: 2,
-            [timeFieldName]: ISODate(),
+            [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"),
             [metaFieldName]: "m2",
             location: {type: "Point", coordinates: [40.2, -70.2]}
         },
         {
             _id: 3,
-            [timeFieldName]: ISODate(),
+            [timeFieldName]: ISODate("2022-04-01T00:01:00.000Z"),  // should land in same bucket
             [metaFieldName]: "m2",
-            location: {type: "Point", coordinates: [40.2, -70.2]}
+            location: {type: "Point", coordinates: [40.3, -70.3]}
+        },
+        {
+            _id: 4,
+            [timeFieldName]: ISODate("2022-04-01T00:02:00.000Z"),  // should land in same bucket
+            [metaFieldName]: "m2",
+            location: {type: "Point", coordinates: [40.4, -70.4]}
+        },
+        {
+            _id: 5,
+            [timeFieldName]:
+                ISODate("2022-04-01T02:00:00.000Z"),  // should open new bucket and compress old one
+            [metaFieldName]: "m2",
+            location: {type: "Point", coordinates: [40.5, -70.5]}
+        },
+        {
+            _id: 6,
+            [timeFieldName]: ISODate("2022-04-01T02:01:00.000Z"),  // should land in same bucket
+            [metaFieldName]: "m2"
         },
-        {_id: 4, [timeFieldName]: ISODate(), [metaFieldName]: "m2"},
-        {_id: 5, [timeFieldName]: ISODate(), [metaFieldName]: "m3"},
+        {_id: 7, [timeFieldName]: ISODate("2022-04-01T00:00:00.000Z"), [metaFieldName]: "m3"},
     ];
     assert.commandWorked(insert(timeseriescoll, twoDSphereDocs),
                          'Failed to insert twoDSphereDocs: ' + tojson(twoDSphereDocs));
+    assert.eq(bucketscoll.count(), 4);
+    printjson(bucketscoll.find({}).toArray());
 
     // Test invalid documents
     const docWithInvalidCoordinates = {
@@ -140,7 +159,7 @@ TimeseriesTest.run((insert) => {
         timeseriescoll.find({location: {$geoWithin: {$center: [[40, -70], .15]}}}).toArray().length,
         geoWithinPlan2d);
 
-    assert.eq(4,
+    assert.eq(6,
               timeseriescoll
                   .aggregate([{
                       $geoNear: {
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index 4bc13b10421..3d957f15519 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -706,12 +706,13 @@ void ExpressionKeysPrivate::getS2Keys(SharedBufferFragmentBuilder& pooledBufferB
             std::vector<KeyString::HeapBuilder> updatedKeysToAdd;
 
             if (IndexNames::GEO_2DSPHERE_BUCKET == keyElem.str()) {
-                timeseries::dotted_path_support::extractAllElementsAlongBucketPath(
-                    obj,
-                    keyElem.fieldName(),
-                    fieldElements,
-                    expandArrayOnTrailingField,
-                    arrayComponents);
+                auto elementStorage =
+                    timeseries::dotted_path_support::extractAllElementsAlongBucketPath(
+                        obj,
+                        keyElem.fieldName(),
+                        fieldElements,
+                        expandArrayOnTrailingField,
+                        arrayComponents);
 
                 // null, undefined, {} and [] should all behave like there is no geo field. So we
                 // look for these cases and ignore those measurements if we find them.
diff --git a/src/mongo/db/index/s2_bucket_key_generator_test.cpp b/src/mongo/db/index/s2_bucket_key_generator_test.cpp
index f501f5cc781..89187d275b0 100644
--- a/src/mongo/db/index/s2_bucket_key_generator_test.cpp
+++ b/src/mongo/db/index/s2_bucket_key_generator_test.cpp
@@ -137,7 +137,7 @@ struct S2BucketKeyGeneratorTest : public unittest::Test {
 TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeys) {
     BSONObj keyPattern = fromjson("{'data.geo': '2dsphere_bucket'}");
     BSONObj genKeysFrom = fromjson(
-        "{data: {geo: {"
+        "{control: {version: 1}, data: {geo: {"
         "'0': {type: 'Point', coordinates: [0, 0]},"
         "'1': {type: 'Point', coordinates: [3, 3]}"
         "}}}");
@@ -165,7 +165,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeys) {
 TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubField) {
     BSONObj keyPattern = fromjson("{'data.geo.sub': '2dsphere_bucket'}");
     BSONObj genKeysFrom = fromjson(
-        "{data: {geo: {"
+        "{control: {version: 1}, data: {geo: {"
         "'0': {sub: {type: 'Point', coordinates: [0, 0]}},"
         "'1': {sub: {type: 'Point', coordinates: [3, 3]}}"
         "}}}");
@@ -194,7 +194,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubField) {
 TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysDeepSubField) {
     BSONObj keyPattern = fromjson("{'data.geo.sub1.sub2.sub3': '2dsphere_bucket'}");
     BSONObj genKeysFrom = fromjson(
-        "{data: {geo: {"
+        "{control: {version: 1}, data: {geo: {"
         "'0': {sub1: {sub2: {sub3: {type: 'Point', coordinates: [0, 0]}}}},"
         "'1': {sub1: {sub2: {sub3: {type: 'Point', coordinates: [3, 3]}}}}"
         "}}}");
@@ -223,7 +223,7 @@ TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysDeepSubField) {
 TEST_F(S2BucketKeyGeneratorTest, GetS2BucketKeysSubFieldSomeMissing) {
     BSONObj keyPattern = fromjson("{'data.geo.sub': '2dsphere_bucket'}");
     BSONObj genKeysFrom = fromjson(
-        "{data: {geo: {"
+        "{control: {version: 1}, data: {geo: {"
         "'0': {sub: {type: 'Point', coordinates: [0, 0]}},"
         "'1': {sub: {}},"
         "'2': {sub: null},"
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index 519725d38b4..aa103c25a8b 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -88,12 +88,14 @@ env.Library(
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/base',
         '$BUILD_DIR/mongo/bson/mutable/mutable_bson',
+        '$BUILD_DIR/mongo/bson/util/bson_column',
         '$BUILD_DIR/mongo/db/exec/bucket_unpacker',
         '$BUILD_DIR/mongo/db/index_commands_idl',
         '$BUILD_DIR/mongo/db/index_names',
         '$BUILD_DIR/mongo/db/namespace_string',
         '$BUILD_DIR/mongo/db/ops/write_ops_parsers',
         '$BUILD_DIR/mongo/db/storage/storage_options',
+        'bucket_compression',
         'timeseries_options',
     ],
 )
@@ -130,6 +132,7 @@ env.CppUnitTest(
         '$BUILD_DIR/mongo/db/catalog/catalog_test_fixture',
         '$BUILD_DIR/mongo/db/catalog_raii',
         'bucket_catalog',
+        'bucket_compression',
         'timeseries_conversion_util',
         'timeseries_options',
     ],
diff --git a/src/mongo/db/timeseries/bucket_compression.cpp b/src/mongo/db/timeseries/bucket_compression.cpp
index 2f8eefd3a84..7d6e6961687 100644
--- a/src/mongo/db/timeseries/bucket_compression.cpp
+++ b/src/mongo/db/timeseries/bucket_compression.cpp
@@ -342,5 +342,26 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
     return {};
 }
 
+bool isCompressedBucket(const BSONObj& bucketDoc) {
+    auto&& controlField = bucketDoc[timeseries::kBucketControlFieldName];
+    uassert(6540600,
+            "Time-series bucket documents must have 'control' object present",
+            controlField && controlField.type() == BSONType::Object);
+
+    auto&& versionField = controlField.Obj()[timeseries::kBucketControlVersionFieldName];
+    uassert(6540601,
+            "Time-series bucket documents must have 'control.version' field present",
+            versionField && isNumericBSONType(versionField.type()));
+    auto version = versionField.Number();
+
+    if (version == 1) {
+        return false;
+    } else if (version == 2) {
+        return true;
+    } else {
+        uasserted(6540602, "Invalid bucket version");
+    }
+}
+
 }  // namespace timeseries
 }  // namespace mongo
diff --git a/src/mongo/db/timeseries/bucket_compression.h b/src/mongo/db/timeseries/bucket_compression.h
index 7921bca1227..e70bec965d7 100644
--- a/src/mongo/db/timeseries/bucket_compression.h
+++ b/src/mongo/db/timeseries/bucket_compression.h
@@ -60,5 +60,10 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
                                  const NamespaceString& nss,
                                  bool validateDecompression);
 
+/**
+ * Returns whether a timeseries bucket has been compressed to the v2 format.
+ */
+bool isCompressedBucket(const BSONObj& bucketDoc);
+
 }  // namespace timeseries
 }  // namespace mongo
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
index fc388e5fa0c..b28e198429e 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
@@ -37,6 +37,8 @@
 #include "mongo/bson/bsonmisc.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/util/bsoncolumn.h"
+#include "mongo/db/timeseries/bucket_compression.h"
 #include "mongo/db/timeseries/timeseries_constants.h"
 #include "mongo/util/ctype.h"
 
@@ -58,93 +60,130 @@ boost::optional<std::pair<StringData, StringData>> _splitPath(StringData path) {
     return std::make_pair(left, next);
 }
 
-template <typename BSONElementColl>
-void _extractAllElementsAlongBucketPath(const BSONObj& obj,
-                                        StringData path,
-                                        BSONElementColl& elements,
-                                        bool expandArrayOnTrailingField,
-                                        BSONDepthIndex depth,
-                                        MultikeyComponents* arrayComponents) {
-    auto handleElement = [&](BSONElement elem, StringData path) -> void {
-        if (elem.eoo()) {
-            size_t idx = path.find('.');
-            if (idx != std::string::npos) {
-                invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
-                StringData left = path.substr(0, idx);
-                StringData next = path.substr(idx + 1, path.size());
+void _handleElementForExtractAllElementsOnBucketPath(const BSONObj& obj,
+                                                     BSONElement elem,
+                                                     StringData path,
+                                                     BSONElementSet& elements,
+                                                     bool expandArrayOnTrailingField,
+                                                     BSONDepthIndex depth,
+                                                     MultikeyComponents* arrayComponents) {
+    if (elem.eoo()) {
+        size_t idx = path.find('.');
+        if (idx != std::string::npos) {
+            invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
+            StringData left = path.substr(0, idx);
+            StringData next = path.substr(idx + 1, path.size());
 
-                BSONElement e = obj.getField(left);
+            BSONElement e = obj.getField(left);
 
-                if (e.type() == Object) {
-                    _extractAllElementsAlongBucketPath(e.embeddedObject(),
-                                                       next,
-                                                       elements,
-                                                       expandArrayOnTrailingField,
-                                                       depth + 1,
-                                                       arrayComponents);
-                } else if (e.type() == Array) {
-                    bool allDigits = false;
-                    if (next.size() > 0 && ctype::isDigit(next[0])) {
-                        unsigned temp = 1;
-                        while (temp < next.size() && ctype::isDigit(next[temp]))
-                            temp++;
-                        allDigits = temp == next.size() || next[temp] == '.';
-                    }
-                    if (allDigits) {
-                        _extractAllElementsAlongBucketPath(e.embeddedObject(),
-                                                           next,
-                                                           elements,
-                                                           expandArrayOnTrailingField,
-                                                           depth + 1,
-                                                           arrayComponents);
-                    } else {
-                        BSONObjIterator i(e.embeddedObject());
-                        while (i.more()) {
-                            BSONElement e2 = i.next();
-                            if (e2.type() == Object || e2.type() == Array)
-                                _extractAllElementsAlongBucketPath(e2.embeddedObject(),
-                                                                   next,
-                                                                   elements,
-                                                                   expandArrayOnTrailingField,
-                                                                   depth + 1,
-                                                                   arrayComponents);
-                        }
-                        if (arrayComponents) {
-                            arrayComponents->insert(depth);
+            if (e.type() == Object) {
+                BSONObj embedded = e.embeddedObject();
+                _handleElementForExtractAllElementsOnBucketPath(embedded,
+                                                                embedded.getField(next),
+                                                                next,
+                                                                elements,
+                                                                expandArrayOnTrailingField,
+                                                                depth + 1,
+                                                                arrayComponents);
+            } else if (e.type() == Array) {
+                bool allDigits = false;
+                if (next.size() > 0 && ctype::isDigit(next[0])) {
+                    unsigned temp = 1;
+                    while (temp < next.size() && ctype::isDigit(next[temp]))
+                        temp++;
+                    allDigits = temp == next.size() || next[temp] == '.';
+                }
+                if (allDigits) {
+                    BSONObj embedded = e.embeddedObject();
+                    _handleElementForExtractAllElementsOnBucketPath(embedded,
+                                                                    embedded.getField(next),
+                                                                    next,
+                                                                    elements,
+                                                                    expandArrayOnTrailingField,
+                                                                    depth + 1,
+                                                                    arrayComponents);
+                } else {
+                    BSONObjIterator i(e.embeddedObject());
+                    while (i.more()) {
+                        BSONElement e2 = i.next();
+                        if (e2.type() == Object || e2.type() == Array) {
+                            BSONObj embedded = e2.embeddedObject();
+                            _handleElementForExtractAllElementsOnBucketPath(
+                                embedded,
+                                embedded.getField(next),
+                                next,
+                                elements,
+                                expandArrayOnTrailingField,
+                                depth + 1,
+                                arrayComponents);
                         }
                     }
-                } else {
-                    // do nothing: no match
-                }
-            }
-        } else {
-            if (elem.type() == Array && expandArrayOnTrailingField) {
-                BSONObjIterator i(elem.embeddedObject());
-                while (i.more()) {
-                    elements.insert(i.next());
-                }
-                if (arrayComponents) {
-                    arrayComponents->insert(depth);
+                    if (arrayComponents) {
+                        arrayComponents->insert(depth);
+                    }
                 }
             } else {
-                elements.insert(elem);
+                // do nothing: no match
             }
         }
-    };
+    } else {
+        if (elem.type() == Array && expandArrayOnTrailingField) {
+            BSONObjIterator i(elem.embeddedObject());
+            while (i.more()) {
+                elements.insert(i.next());
+            }
+            if (arrayComponents) {
+                arrayComponents->insert(depth);
+            }
+        } else {
+            elements.insert(elem);
+        }
+    }
+}
 
+boost::optional<BSONColumn> _extractAllElementsAlongBucketPath(
+    const BSONObj& obj,
+    StringData path,
+    BSONElementSet& elements,
+    bool expandArrayOnTrailingField,
+    bool isCompressed,
+    BSONDepthIndex depth,
+    MultikeyComponents* arrayComponents) {
     switch (depth) {
         case 0:
         case 1: {
             if (auto res = _splitPath(path)) {
                 auto& [left, next] = *res;
                 BSONElement e = obj.getField(left);
-                if (e.type() == Object && (depth > 0 || left == timeseries::kBucketDataFieldName)) {
-                    _extractAllElementsAlongBucketPath(e.embeddedObject(),
-                                                       next,
-                                                       elements,
-                                                       expandArrayOnTrailingField,
-                                                       depth + 1,
-                                                       arrayComponents);
+                if (depth > 0 || left == timeseries::kBucketDataFieldName) {
+                    if (e.type() == Object) {
+                        return _extractAllElementsAlongBucketPath(e.embeddedObject(),
+                                                                  next,
+                                                                  elements,
+                                                                  expandArrayOnTrailingField,
+                                                                  isCompressed,
+                                                                  depth + 1,
+                                                                  arrayComponents);
+                    } else if (isCompressed && e.type() == BinData) {
+                        // Unbucketing magic happens here for nested measurement fields (i.e.
+                        // data.a.b) in compressed buckets.
+                        BSONColumn storage{e};
+                        for (const BSONElement& e2 : storage) {
+                            if (!e2.eoo()) {
+                                BSONObj embedded =
+                                    e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+                                _handleElementForExtractAllElementsOnBucketPath(
+                                    embedded,
+                                    embedded.getField(next),
+                                    next,
+                                    elements,
+                                    expandArrayOnTrailingField,
+                                    depth,
+                                    arrayComponents);
+                            }
+                        }
+                        return std::move(storage);
+                    }
                 }
             } else {
                 BSONElement e = obj.getField(path);
@@ -153,32 +192,55 @@ void _extractAllElementsAlongBucketPath(const BSONObj& obj,
                                                        StringData(),
                                                        elements,
                                                        expandArrayOnTrailingField,
+                                                       isCompressed,
                                                        depth + 1,
                                                        arrayComponents);
+                } else if (isCompressed && BinData == e.type()) {
+                    // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
+                    // in compressed buckets.
+                    invariant(depth == 1);
+                    BSONColumn storage{e};
+                    for (const BSONElement& e2 : storage) {
+                        if (!e2.eoo()) {
+                            BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+                            _handleElementForExtractAllElementsOnBucketPath(
+                                embedded,
+                                e2,
+                                ""_sd,
+                                elements,
+                                expandArrayOnTrailingField,
+                                depth,
+                                arrayComponents);
+                        }
+                    }
+                    return std::move(storage);
                 }
             }
             break;
         }
         case 2: {
-            // Unbucketing magic happens here.
+            // Unbucketing magic happens here for uncompressed buckets.
+            invariant(!isCompressed);
             for (const BSONElement& e : obj) {
                 std::string subPath = e.fieldName();
                 if (!path.empty()) {
                     subPath.append("." + path);
                 }
-                BSONElement sub = obj.getField(subPath);
-                handleElement(sub, subPath);
+                _handleElementForExtractAllElementsOnBucketPath(obj,
+                                                                obj.getField(subPath),
+                                                                subPath,
+                                                                elements,
+                                                                expandArrayOnTrailingField,
+                                                                depth,
+                                                                arrayComponents);
             }
             break;
         }
-        default: {
-            BSONElement e = obj.getField(path);
-            handleElement(e, path);
-            break;
-        }
+        default: { MONGO_UNREACHABLE; }
     }
-}
 
+    return boost::none;
+}
 
 bool _haveArrayAlongBucketDataPath(const BSONObj& obj, StringData path, BSONDepthIndex depth);
 
@@ -198,7 +260,9 @@ bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
             BSONElement e = obj.getField(left);
 
             if (e.type() == Object) {
-                return _haveArrayAlongBucketDataPath(e.embeddedObject(), next, depth + 1);
+                auto embedded = e.embeddedObject();
+                return _handleElementForHaveArrayAlongBucketDataPath(
+                    embedded, embedded.getField(next), next, depth + 1);
             } else if (e.type() == Array) {
                 return true;
             } else {
@@ -214,27 +278,56 @@ bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
     return false;
 }
 
-bool _haveArrayAlongBucketDataPath(const BSONObj& obj, StringData path, BSONDepthIndex depth) {
+bool _haveArrayAlongBucketDataPath(const BSONObj& obj,
+                                   StringData path,
+                                   bool isCompressed,
+                                   BSONDepthIndex depth) {
     switch (depth) {
         case 0:
         case 1: {
             if (auto res = _splitPath(path)) {
                 auto& [left, next] = *res;
                 BSONElement e = obj.getField(left);
-                if (e.type() == Object && (depth > 0 || left == timeseries::kBucketDataFieldName)) {
-                    return _haveArrayAlongBucketDataPath(e.embeddedObject(), next, depth + 1);
+                if (depth > 0 || left == timeseries::kBucketDataFieldName) {
+                    if (e.type() == Object) {
+                        return _haveArrayAlongBucketDataPath(
+                            e.embeddedObject(), next, isCompressed, depth + 1);
+                    } else if (isCompressed && BinData == e.type()) {
+                        // Unbucketing magic happens here for nested measurement fields (i.e.
+                        // data.a.b) in compressed buckets.
+                        BSONColumn column{e};
+                        for (const BSONElement& e2 : column) {
+                            BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
+                            const bool foundArray = _handleElementForHaveArrayAlongBucketDataPath(
+                                embedded, embedded.getField(next), next, depth);
+                            if (foundArray) {
+                                return foundArray;
+                            }
+                        }
+                    }
                 }
             } else {
                 BSONElement e = obj.getField(path);
                 if (Object == e.type()) {
                     return _haveArrayAlongBucketDataPath(
-                        e.embeddedObject(), StringData(), depth + 1);
+                        e.embeddedObject(), StringData(), isCompressed, depth + 1);
+                } else if (BinData == e.type()) {
+                    // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
+                    // in compressed buckets.
+                    invariant(isCompressed && depth == 1);
+                    BSONColumn column{e};
+                    for (const BSONElement& e2 : column) {
+                        if (e2.type() == Array) {
+                            return true;
+                        }
+                    }
                 }
             }
             return false;
         }
         case 2: {
-            // Unbucketing magic happens here.
+            // Unbucketing magic happens here for uncompressed buckets.
+            invariant(!isCompressed);
             for (const BSONElement& e : obj) {
                 std::string subPath = e.fieldName();
                 if (!path.empty()) {
@@ -367,24 +460,20 @@ Decision _fieldContainsArrayData(const BSONObj& min, const BSONObj& max, StringD
 
 }  // namespace
 
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
-                                       StringData path,
-                                       BSONElementSet& elements,
-                                       bool expandArrayOnTrailingField,
-                                       MultikeyComponents* arrayComponents) {
-    constexpr BSONDepthIndex initialDepth = 0;
-    _extractAllElementsAlongBucketPath(
-        obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
-}
-
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
-                                       StringData path,
-                                       BSONElementMultiSet& elements,
-                                       bool expandArrayOnTrailingField,
-                                       MultikeyComponents* arrayComponents) {
+boost::optional<BSONColumn> extractAllElementsAlongBucketPath(const BSONObj& obj,
+                                                              StringData path,
+                                                              BSONElementSet& elements,
+                                                              bool expandArrayOnTrailingField,
+                                                              MultikeyComponents* arrayComponents) {
     constexpr BSONDepthIndex initialDepth = 0;
-    _extractAllElementsAlongBucketPath(
-        obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
+    const bool isCompressed = timeseries::isCompressedBucket(obj);
+    return _extractAllElementsAlongBucketPath(obj,
+                                              path,
+                                              elements,
+                                              expandArrayOnTrailingField,
+                                              isCompressed,
+                                              initialDepth,
+                                              arrayComponents);
 }
 
 bool haveArrayAlongBucketDataPath(const BSONObj& bucketObj, StringData path) {
@@ -394,7 +483,8 @@ bool haveArrayAlongBucketDataPath(const BSONObj& bucketObj, StringData path) {
     }
 
     constexpr BSONDepthIndex initialDepth = 0;
-    return _haveArrayAlongBucketDataPath(bucketObj, path, initialDepth);
+    const bool isCompressed = timeseries::isCompressedBucket(bucketObj);
+    return _haveArrayAlongBucketDataPath(bucketObj, path, isCompressed, initialDepth);
 }
 
 std::ostream& operator<<(std::ostream& s, const Decision& i) {
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support.h b/src/mongo/db/timeseries/timeseries_dotted_path_support.h
index 266c86a9117..fa2ebba941b 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support.h
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support.h
@@ -29,10 +29,12 @@
 
 #pragma once
 
+#include "boost/any.hpp"
 #include <cstddef>
 
 #include "mongo/bson/bsonelement_comparator_interface.h"
 #include "mongo/bson/bsonobj.h"
+#include "mongo/bson/util/bsoncolumn.h"
 #include "mongo/db/index/multikey_paths.h"
 
 namespace mongo {
@@ -48,8 +50,15 @@ namespace dotted_path_support {
  * original document. Note that the caller should include the 'data' prefix, but omit the depth-2
  * numeric path entry that results from pivoting the data into the bucketed format.
  *
- * Other than the bucketing and unbucketing mentioned above, the function should be have roughly
- * like `mongo::dotted_path_support::extractAllElementsAlongPath'.
+ * Other than the bucketing and unbucketing mentioned above, and the return value, the function
+ * should behave roughly like `mongo::dotted_path_support::extractAllElementsAlongPath'.
+ *
+ * In the case that the input bucket has been compressed, the function may need to decompress the
+ * data in order to examine it and extract the requested data. Since the data is returned as
+ * BSONElement, which does not own the data it references, we will need to provide storage for the
+ * decompressed data that will outlive the function call so that the returned BSONElements remain
+ * valid. This storage is provided via the optional return value. It need not be examined directly,
+ * but it should not be discarded until after the extracted elements.
  *
  * An example:
  *
@@ -57,17 +66,12 @@ namespace dotted_path_support {
  *   {b: 1} and {b: 2}  would be added to the set. 'arrayComponents' would be set as
  *   std::set<size_t>{1U}.
  */
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
-                                       StringData path,
-                                       BSONElementSet& elements,
-                                       bool expandArrayOnTrailingField = true,
-                                       MultikeyComponents* arrayComponents = nullptr);
-
-void extractAllElementsAlongBucketPath(const BSONObj& obj,
-                                       StringData path,
-                                       BSONElementMultiSet& elements,
-                                       bool expandArrayOnTrailingField = true,
-                                       MultikeyComponents* arrayComponents = nullptr);
+MONGO_WARN_UNUSED_RESULT_FUNCTION boost::optional<BSONColumn> extractAllElementsAlongBucketPath(
+    const BSONObj& obj,
+    StringData path,
+    BSONElementSet& elements,
+    bool expandArrayOnTrailingField = true,
+    MultikeyComponents* arrayComponents = nullptr);
 
 /**
  * Finds arrays in individual metrics along the specified data path.
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
index 586b4361f29..638a2c8d6c4 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
@@ -31,7 +31,9 @@
 #include "mongo/platform/basic.h"
 
 #include "mongo/bson/bsonmisc.h"
+#include "mongo/db/timeseries/bucket_compression.h"
 #include "mongo/db/timeseries/timeseries_dotted_path_support.h"
+#include "mongo/unittest/bson_test_util.h"
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -39,12 +41,32 @@ namespace {
 
 namespace tdps = ::mongo::timeseries::dotted_path_support;
 
-TEST(TimeseriesDottedPathSupport, HaveArrayAlongBucketPath) {
-    BSONObj obj = ::mongo::fromjson(R"(
+class TimeseriesDottedPathSupportTest : public unittest::Test {
+protected:
+    void runTest(const BSONObj& obj, const std::function<void(const BSONObj&)>& test) {
+        test(obj);
+
+        NamespaceString nss{"test"};
+        auto compressionResult = timeseries::compressBucket(obj, "time", nss, true);
+        ASSERT_TRUE(compressionResult.compressedBucket.has_value());
+        ASSERT_FALSE(compressionResult.decompressionFailed);
+
+        test(compressionResult.compressedBucket.get());
+    }
+};
+
+TEST_F(TimeseriesDottedPathSupportTest, HaveArrayAlongBucketPath) {
+    BSONObj input = ::mongo::fromjson(R"(
 {
     control: {version: 1},
     bogus: [1],
     data: {
+        time: {
+            "0": {"$date": "1970-01-01T00:00:00.000Z"},
+            "1": {"$date": "1970-01-01T00:00:00.001Z"},
+            "3": {"$date": "1970-01-01T00:00:00.003Z"},
+            "99": {"$date": "1970-01-01T00:00:00.099Z"}
+        },
         a: {},
         b: [],
         c: {
@@ -70,31 +92,41 @@ TEST(TimeseriesDottedPathSupport, HaveArrayAlongBucketPath) {
                     {a: true}
                 ]
             }
+        },
+        h: {
+            "1": {
+                a: {
+                    b: true
+                }
+            }
         }
     }
 })");
 
-    // Non-data fields should always be false
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control"));
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control.version"));
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "bogus"));
+    runTest(input, [this](const BSONObj& obj) {
+        // Non-data fields should always be false
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "control.version"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "bogus"));
 
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data"));
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.a"));
-    ASSERT_FALSE(
-        tdps::haveArrayAlongBucketDataPath(obj, "data.b"));  // bucket expansion hides array
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.c"));
-    ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
-    ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.e"));
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.f"));
-    ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.f.a"));
-    ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.g"));
-    ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a"));
-    ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a.a"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.a"));
+        ASSERT_FALSE(
+            tdps::haveArrayAlongBucketDataPath(obj, "data.b"));  // bucket expansion hides array
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.c"));
+        invariant(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
+        ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.e"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.f"));
+        ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.f.a"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.g"));
+        ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a"));
+        ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a.a"));
+        ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.h.a.b"));
+    });
 }
 
-TEST(TimeseriesDottedPathSupport, fieldContainsArrayData) {
-    BSONObj obj = ::mongo::fromjson(R"(
+TEST_F(TimeseriesDottedPathSupportTest, fieldContainsArrayData) {
+    BSONObj input = ::mongo::fromjson(R"(
 {
     control: {
         min: {
@@ -169,112 +201,213 @@ TEST(TimeseriesDottedPathSupport, fieldContainsArrayData) {
                 g: true
             }
         }
+    },
+    data: {
+        time: {
+            "0": {"$date": "1970-01-01T00:00:00.000Z"}
+        }
     }
 })");
 
-    // Because this function is meant as an optimization to avoid a more expensive check, we need
-    // to ensure that we don't make wrong decisions, but we can always fall through to the more
-    // expensive check. So if the *best* decision that the function could make with the information
-    // present in the control fields is "Yes", we will accept "Yes" or "Maybe". Similarly if the
-    // best decision it could make is "No", we will accept "No" or "Maybe". If there isn't enough
-    // information in the control fields to make a firm decision, then it must return "Maybe".
+    runTest(input, [this](const BSONObj& obj) {
+        // Because this function is meant as an optimization to avoid a more expensive check, we
+        // need to ensure that we don't make wrong decisions, but we can always fall through to the
+        // more expensive check. So if the *best* decision that the function could make with the
+        // information present in the control fields is "Yes", we will accept "Yes" or "Maybe".
+        // Similarly if the best decision it could make is "No", we will accept "No" or "Maybe". If
+        // there isn't enough information in the control fields to make a firm decision, then it
+        // must return "Maybe".
 
-    // A few assumptions about type orders necessary to understand the below tests:
-    //    eoo < double < array < bool
+        // A few assumptions about type orders necessary to understand the below tests:
+        //    eoo < double < array < bool
 
-    constexpr auto yes = tdps::Decision::Yes;
-    constexpr auto no = tdps::Decision::No;
-    constexpr auto maybe = tdps::Decision::Maybe;
+        constexpr auto yes = tdps::Decision::Yes;
+        constexpr auto no = tdps::Decision::No;
+        constexpr auto maybe = tdps::Decision::Maybe;
 
-    // a: {min: double, max: bool},
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "a"));
+        // a: {min: double, max: bool},
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "a"));
 
-    // b: {min: bool, max: bool}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "b"));
+        // b: {min: bool, max: bool}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "b"));
 
-    // c: {min: double, max: double}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "c"));
+        // c: {min: double, max: double}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "c"));
 
-    // d: {min: double, max: array}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "d"));
+        // d: {min: double, max: array}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "d"));
 
-    // e: {min: array, max: bool}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "e"));
+        // e: {min: array, max: bool}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "e"));
 
-    // f: {min: array, max: array}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "f"));
+        // f: {min: array, max: array}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "f"));
 
-    // g: {min: double, max: object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g"));
-    // g.a: {min: double.eoo, max: object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "g.a"));
-    // g.b: {min: double.eoo, max: object.double}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.b"));
-    // g.c: {min: double.eoo, max: object.array}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "g.c"));
-    // g.d: {min: double.eoo, max: object.eoo}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.d"));
+        // g: {min: double, max: object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g"));
+        // g.a: {min: double.eoo, max: object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "g.a"));
+        // g.b: {min: double.eoo, max: object.double}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.b"));
+        // g.c: {min: double.eoo, max: object.array}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "g.c"));
+        // g.d: {min: double.eoo, max: object.eoo}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "g.d"));
 
-    // h: {min: object, max: bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h"));
-    // h.a: {min: object.bool, max: bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.a"));
-    // h.b: {min: object.double, max: bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.b"));
-    // h.c: {min: object.array, max: bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.c"));
-    // h.d: {min: object.eoo, max: bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.d"));
+        // h: {min: object, max: bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h"));
+        // h.a: {min: object.bool, max: bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.a"));
+        // h.b: {min: object.double, max: bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.b"));
+        // h.c: {min: object.array, max: bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.c"));
+        // h.d: {min: object.eoo, max: bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "h.d"));
 
-    // i: {min: object, max: object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i"));
-    // i.a: {min: object.double, max: object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.a"));
-    // i.b: {min: object.array, max: object.array}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.b"));
-    // i.c: {min: object.bool, max: object.bool}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.c"));
-    // i.d: {min: object.object, max: object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.d"));
-    // i.d.a: {min: object.object.double, max: object.object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.d.a"));
-    // i.e: {min: object.object, max: object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e"));
-    // i.e.a: {min: object.object.double, max: object.object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e.a"));
-    // i.f: {min: object.double, max: object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f"));
-    // i.f.a: {min: object.double.eoo, max: object.object.double}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.a"));
-    // i.f.b: {min: object.double.eoo, max: object.object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.b"));
-    // i.f.c: {min: object.double.eoo, max: object.object.object}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c"));
-    // i.f.c.a: {min: object.double.eoo.eoo, max: object.object.object.double}
-    ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c.a"));
-    // i.f.c.b: {min: object.double.eoo.eoo, max: object.object.object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.c.b"));
-    // i.f.d: {min: object.double.eoo, max: object.object.array}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.f.d"));
-    // i.f.e: {min: object.double.eoo, max: object.object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.e"));
-    // i.g: {min: object.object, max: object.bool}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g"));
-    // i.g.a: {min: object.object.double, max: object.bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.a"));
-    // i.g.b: {min: object.object.object, max: object.bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.b"));
-    // i.g.c: {min: object.object.object, max: object.bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c"));
-    // i.g.c.a: {min: object.object.object.double, max: object.bool.eoo.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.a"));
-    // i.g.c.b: {min: object.object.object.bool, max: object.bool.eoo.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.b"));
-    // i.g.d: {min: object.object.array, max: object.bool.eoo}
-    ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.g.d"));
-    // i.g.e: {min: object.object.bool, max: object.bool.eoo}
-    ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.e"));
+        // i: {min: object, max: object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i"));
+        // i.a: {min: object.double, max: object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.a"));
+        // i.b: {min: object.array, max: object.array}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.b"));
+        // i.c: {min: object.bool, max: object.bool}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.c"));
+        // i.d: {min: object.object, max: object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.d"));
+        // i.d.a: {min: object.object.double, max: object.object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.d.a"));
+        // i.e: {min: object.object, max: object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e"));
+        // i.e.a: {min: object.object.double, max: object.object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.e.a"));
+        // i.f: {min: object.double, max: object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f"));
+        // i.f.a: {min: object.double.eoo, max: object.object.double}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.a"));
+        // i.f.b: {min: object.double.eoo, max: object.object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.b"));
+        // i.f.c: {min: object.double.eoo, max: object.object.object}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c"));
+        // i.f.c.a: {min: object.double.eoo.eoo, max: object.object.object.double}
+        ASSERT_NE(yes, tdps::fieldContainsArrayData(obj, "i.f.c.a"));
+        // i.f.c.b: {min: object.double.eoo.eoo, max: object.object.object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.c.b"));
+        // i.f.d: {min: object.double.eoo, max: object.object.array}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.f.d"));
+        // i.f.e: {min: object.double.eoo, max: object.object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.f.e"));
+        // i.g: {min: object.object, max: object.bool}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g"));
+        // i.g.a: {min: object.object.double, max: object.bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.a"));
+        // i.g.b: {min: object.object.object, max: object.bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.b"));
+        // i.g.c: {min: object.object.object, max: object.bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c"));
+        // i.g.c.a: {min: object.object.object.double, max: object.bool.eoo.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.a"));
+        // i.g.c.b: {min: object.object.object.bool, max: object.bool.eoo.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.c.b"));
+        // i.g.d: {min: object.object.array, max: object.bool.eoo}
+        ASSERT_NE(no, tdps::fieldContainsArrayData(obj, "i.g.d"));
+        // i.g.e: {min: object.object.bool, max: object.bool.eoo}
+        ASSERT_EQ(maybe, tdps::fieldContainsArrayData(obj, "i.g.e"));
+    });
 }
 
+TEST_F(TimeseriesDottedPathSupportTest, ExtractAllElementsAlongBucketPath) {
+    BSONObj input = ::mongo::fromjson(R"(
+{
+    control: {version: 1},
+    data: {
+        time: {
+            "0": {"$date": "1970-01-01T00:00:00.000Z"},
+            "1": {"$date": "1970-01-01T00:00:00.001Z"},
+            "3": {"$date": "1970-01-01T00:00:00.003Z"},
+            "99": {"$date": "1970-01-01T00:00:00.099Z"}
+        },
+        a: {},
+        b: [],
+        c: {
+            "0": true,
+            "1": false
+        },
+        d: {
+            "0": false,
+            "1": []
+        },
+        e: {
+            "3": "foo",
+            "99": [1, 2]
+        },
+        f: {
+            "1": {
+                a: [true, false]
+            }
+        },
+        g: {
+            "1": {
+                a: [
+                    {a: true}
+                ]
+            }
+        },
+        h: {
+            "1": {
+                a: {
+                    b: true
+                }
+            },
+            "3": {
+                a: {
+                    b: false
+                }
+            }
+        }
+    }
+})");
+
+    runTest(input, [this](const BSONObj& obj) {
+        auto assertExtractionMatches = [&](StringData path, const BSONArray expectedStorage) {
+            BSONElementSet actual;
+            auto actualStorage = tdps::extractAllElementsAlongBucketPath(obj, path, actual);
+
+            BSONElementSet expected;
+            for (auto&& el : expectedStorage) {
+                expected.emplace(el);
+            }
+
+            ASSERT_EQ(actual.size(), expected.size());
+
+            auto actualIt = actual.begin();
+            auto expectedIt = expected.begin();
+            while (actualIt != actual.end() && expectedIt != expected.end()) {
+                ASSERT_FALSE(actualIt->eoo());
+                ASSERT_FALSE(expectedIt->eoo());
+                ASSERT_EQ(actualIt->woCompare(*expectedIt, 0), 0);
+                actualIt++;
+                expectedIt++;
+            }
+        };
+
+        assertExtractionMatches("data.a"_sd, BSONArray());
+        assertExtractionMatches("data.b"_sd, BSONArray());
+        assertExtractionMatches("data.c"_sd, BSON_ARRAY(true << false));
+        assertExtractionMatches("data.d"_sd, BSON_ARRAY(false));
+        assertExtractionMatches("data.e"_sd, BSON_ARRAY("foo" << 1 << 2));
+        assertExtractionMatches("data.f"_sd, BSON_ARRAY(BSON("a" << BSON_ARRAY(true << false))));
+        assertExtractionMatches("data.f.a"_sd, BSON_ARRAY(true << false));
+        assertExtractionMatches("data.g"_sd,
+                                BSON_ARRAY(BSON("a" << BSON_ARRAY(BSON("a" << true)))));
+        assertExtractionMatches("data.g.a"_sd, BSON_ARRAY(BSON("a" << true)));
+        assertExtractionMatches("data.g.a.a"_sd, BSON_ARRAY(true));
+        assertExtractionMatches(
+            "data.h"_sd,
+            BSON_ARRAY(BSON("a" << BSON("b" << true)) << BSON("a" << BSON("b" << false))));
+        assertExtractionMatches("data.h.a"_sd, BSON_ARRAY(BSON("b" << true) << BSON("b" << false)));
+        assertExtractionMatches("data.h.a.b"_sd, BSON_ARRAY(true << false));
+    });
+}
 }  // namespace
 }  // namespace mongo
author	Dan Larkin-York <dan.larkin-york@mongodb.com>	2022-05-06 18:09:55 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-05-06 18:34:06 +0000
commit	c4577480b324634daf96ce685faf73f106f33e0d (patch)
tree	bf0623d7333cd52f7c28964b6c438bbbe7c5f5d3
parent	8adb070dcb43842e61da3e15415ad4e132d1655a (diff)
download	mongo-c4577480b324634daf96ce685faf73f106f33e0d.tar.gz