SERVER-66686 Optimize fetching archived buckets using OID

author: Faustoleyva54 <fausto.leyva@mongodb.com> 2022-09-21 02:22:39 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-09-21 02:53:24 +0000
commit: 361789ed8a613a2dc0335a821ead0ab6205fbdaa (patch)
tree: 042617c5229c3569ab50042f426e4eae6aa841e8
parent: ad87c2153e5c8aea754cf5fab3eb56eadd78d877 (diff)
download: mongo-361789ed8a613a2dc0335a821ead0ab6205fbdaa.tar.gz
4 files changed, 86 insertions, 0 deletions
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index 560eeddbc98..18b64b990bf 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -32,6 +32,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/commands/server_status_core',
         '$BUILD_DIR/mongo/db/dbdirectclient',
+        '$BUILD_DIR/mongo/db/record_id_helpers',
         '$BUILD_DIR/mongo/db/server_base',
         '$BUILD_DIR/mongo/db/server_options_core',
         '$BUILD_DIR/mongo/db/storage/storage_options',
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
index 4b0c5cf2209..8890b9d1bdc 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
@@ -29,6 +29,7 @@
 
 #include "mongo/db/timeseries/bucket_catalog_helpers.h"
 #include "mongo/db/dbdirectclient.h"
+#include "mongo/db/record_id_helpers.h"
 #include "mongo/db/timeseries/timeseries_constants.h"
 #include "mongo/logv2/redaction.h"
 
@@ -135,6 +136,14 @@ StatusWith<std::pair<Date_t, boost::optional<BSONElement>>> extractTimeAndMeta(
     return std::make_pair(time, boost::none);
 }
 
+BSONObj findDocFromOID(OperationContext* opCtx, const Collection* coll, const OID& bucketId) {
+    Snapshotted<BSONObj> bucketObj;
+    auto rid = record_id_helpers::keyForOID(bucketId);
+    auto foundDoc = coll->findDoc(opCtx, rid, &bucketObj);
+
+    return (foundDoc) ? bucketObj.value() : BSONObj();
+}
+
 BSONObj findSuitableBucket(OperationContext* opCtx,
                            const NamespaceString& bucketNss,
                            const TimeseriesOptions& options,
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.h b/src/mongo/db/timeseries/bucket_catalog_helpers.h
index bf1c8b2c949..141df5d308f 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.h
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.h
@@ -32,6 +32,7 @@
 #include "mongo/base/status_with.h"
 #include "mongo/base/string_data_comparator_interface.h"
 #include "mongo/bson/bsonobj.h"
+#include "mongo/db/catalog/collection.h"
 #include "mongo/db/timeseries/flat_bson.h"
 #include "mongo/db/timeseries/timeseries_options.h"
 
@@ -63,6 +64,12 @@ StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
 StatusWith<std::pair<Date_t, boost::optional<BSONElement>>> extractTimeAndMeta(
     const BSONObj& doc, const TimeseriesOptions& options);
 
+
+/**
+ * Retrieves a document from the record store based off of the bucket ID.
+ */
+BSONObj findDocFromOID(OperationContext* opCtx, const Collection* coll, const OID& bucketId);
+
 /**
  * Executes a 'find' query on the timeseries bucket collection to find a bucket eligible to
  * receive a new measurement specified by a document's metadata and timestamp (measurementTs).
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
index 54356c1cd1f..695a5f9c89d 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers_test.cpp
@@ -485,5 +485,74 @@ TEST_F(BucketCatalogHelpersTest, IncompatibleBucketsForNewMeasurements) {
     }
 }
 
+TEST_F(BucketCatalogHelpersTest, FindDocumentFromOID) {
+    ASSERT_OK(createCollection(
+        operationContext(),
+        kNss.dbName(),
+        BSON("create" << kNss.coll() << "timeseries"
+                      << BSON("timeField" << _timeField << "metaField" << _metaField))));
+
+    AutoGetCollection autoColl(operationContext(), kNss.makeTimeseriesBucketsNamespace(), MODE_IX);
+    ASSERT(autoColl->getTimeseriesOptions() && autoColl->getTimeseriesOptions()->getMetaField());
+
+    std::vector<BSONObj> bucketDocs = {mongo::fromjson(
+                                           R"({
+            "_id":{"$oid":"62e7e6ec27c28d338ab29200"},
+            "control":{"version":1,"min":{"_id":1,"time":{"$date":"2021-08-01T11:00:00Z"},"a":1},
+                                   "max":{"_id":3,"time":{"$date":"2021-08-01T12:00:00Z"},"a":3},
+                       "closed":false},
+            "meta":1,
+            "data":{"time":{"0":{"$date":"2021-08-01T11:00:00Z"},
+                            "1":{"$date":"2021-08-01T11:00:00Z"},
+                            "2":{"$date":"2021-08-01T11:00:00Z"}},
+                    "a":{"0":1,"1":2,"2":3}}})"),
+                                       mongo::fromjson(
+                                           R"(
+            {"_id":{"$oid":"62e7eee4f33f295800073138"},
+            "control":{"version":1,"min":{"_id":7,"time":{"$date":"2022-08-01T12:00:00Z"},"a":1},
+                                   "max":{"_id":10,"time":{"$date":"2022-08-01T13:00:00Z"},"a":3}},
+            "meta":2,
+            "data":{"time":{"0":{"$date":"2022-08-01T12:00:00Z"},
+                            "1":{"$date":"2022-08-01T12:00:00Z"},
+                            "2":{"$date":"2022-08-01T12:00:00Z"}},
+                    "a":{"0":1,"1":2,"2":3}}})"),
+                                       mongo::fromjson(
+                                           R"({
+            "_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"_id":7,"time":{"$date":"2023-08-01T13:00:00Z"},"a":1},
+                                   "max":{"_id":10,"time":{"$date":"2023-08-01T14:00:00Z"},"a":3},
+                       "closed":false},
+            "meta":3,
+            "data":{"time":{"0":{"$date":"2023-08-01T13:00:00Z"},
+                            "1":{"$date":"2023-08-01T13:00:00Z"},
+                            "2":{"$date":"2023-08-01T13:00:00Z"}},
+                    "a":{"0":1,"1":2,"2":3}}})")};
+
+    // Insert bucket documents into the system.buckets collection.
+    for (const auto& doc : bucketDocs) {
+        _insertIntoBucketColl(doc);
+    }
+
+    // Given a valid OID for a bucket document, we should be able to retrieve the full bucket
+    // document.
+    for (const auto& doc : bucketDocs) {
+        const auto bucketId = doc["_id"].OID();
+        auto retrievedBucket =
+            timeseries::findDocFromOID(operationContext(), (*autoColl).get(), bucketId);
+        ASSERT(!retrievedBucket.isEmpty());
+        ASSERT_BSONOBJ_EQ(retrievedBucket, doc);
+    }
+
+    // For non-existent OIDs, we don't expect to retrieve anything.
+    std::vector<OID> nonExistentOIDs = {OID("26e7e6ec27c28d338ab29200"),
+                                        OID("90e7e6ec27c28d338ab29200"),
+                                        OID("00e7e6ec27c28d338ab29200")};
+    for (const auto& oid : nonExistentOIDs) {
+        auto retrievedBucket =
+            timeseries::findDocFromOID(operationContext(), (*autoColl).get(), oid);
+        ASSERT(retrievedBucket.isEmpty());
+    }
+}
+
 }  // namespace
 }  // namespace mongo
author	Faustoleyva54 <fausto.leyva@mongodb.com>	2022-09-21 02:22:39 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-09-21 02:53:24 +0000
commit	361789ed8a613a2dc0335a821ead0ab6205fbdaa (patch)
tree	042617c5229c3569ab50042f426e4eae6aa841e8
parent	ad87c2153e5c8aea754cf5fab3eb56eadd78d877 (diff)
download	mongo-361789ed8a613a2dc0335a821ead0ab6205fbdaa.tar.gz