SERVER-60537 Support clustering non-explicitly replicated collections by arbitrary keys

author: Josef Ahmad <josef.ahmad@mongodb.com> 2021-10-25 10:52:16 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-10-25 11:38:11 +0000
commit: 11e1a414386c0d4c670dfc306b8fdfd52a9e68e0 (patch)
tree: 8510195e51449a0c6db4ae3ae0419f1afde79e5b /src/mongo
parent: 3b168ad0f99534125d7457aa34a743a3cdce0765 (diff)
download: mongo-11e1a414386c0d4c670dfc306b8fdfd52a9e68e0.tar.gz
19 files changed, 127 insertions, 46 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index f1ac5d97bed..2864d3b020f 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1197,6 +1197,7 @@ env.Library(
         '$BUILD_DIR/mongo/base',
     ],
     LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/catalog/clustered_collection_options',
         '$BUILD_DIR/mongo/db/storage/key_string',
     ],)
 
diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp
index 3d9a63a129a..23ab7550f00 100644
--- a/src/mongo/db/catalog/clustered_collection_util.cpp
+++ b/src/mongo/db/catalog/clustered_collection_util.cpp
@@ -36,6 +36,17 @@ namespace clustered_util {
 
 static constexpr StringData kDefaultClusteredIndexName = "_id_"_sd;
 
+void ensureClusteredIndexName(ClusteredIndexSpec& indexSpec) {
+    if (!indexSpec.getName()) {
+        auto clusterKey = indexSpec.getKey().firstElement().fieldNameStringData();
+        if (clusterKey == "_id") {
+            indexSpec.setName(kDefaultClusteredIndexName);
+        } else {
+            indexSpec.setName(StringData(clusterKey + "_1"));
+        }
+    }
+}
+
 ClusteredCollectionInfo makeCanonicalClusteredInfoForLegacyFormat() {
     auto indexSpec = ClusteredIndexSpec{BSON("_id" << 1), true};
     indexSpec.setName(kDefaultClusteredIndexName);
@@ -43,9 +54,7 @@ ClusteredCollectionInfo makeCanonicalClusteredInfoForLegacyFormat() {
 }
 
 ClusteredCollectionInfo makeCanonicalClusteredInfo(ClusteredIndexSpec indexSpec) {
-    if (!indexSpec.getName()) {
-        indexSpec.setName(kDefaultClusteredIndexName);
-    }
+    ensureClusteredIndexName(indexSpec);
     return ClusteredCollectionInfo(std::move(indexSpec), false);
 }
 
@@ -67,6 +76,7 @@ boost::optional<ClusteredCollectionInfo> parseClusteredInfo(const BSONElement& e
     }
 
     auto indexSpec = ClusteredIndexSpec::parse({"ClusteredUtil::parseClusteredInfo"}, elem.Obj());
+    ensureClusteredIndexName(indexSpec);
     return makeCanonicalClusteredInfo(std::move(indexSpec));
 }
 
@@ -81,5 +91,15 @@ BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo)
     return bob.obj();
 }
 
+bool matchesClusterKey(const BSONObj& obj,
+                       const boost::optional<ClusteredCollectionInfo>& collInfo) {
+    return obj.firstElement().fieldNameStringData() ==
+        collInfo->getIndexSpec().getKey().firstElement().fieldNameStringData();
+}
+
+StringData getClusterKeyFieldName(const ClusteredIndexSpec& indexSpec) {
+    return indexSpec.getKey().firstElement().fieldNameStringData();
+}
+
 }  // namespace clustered_util
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/clustered_collection_util.h b/src/mongo/db/catalog/clustered_collection_util.h
index 2f45d70eb8e..84e4275cce5 100644
--- a/src/mongo/db/catalog/clustered_collection_util.h
+++ b/src/mongo/db/catalog/clustered_collection_util.h
@@ -60,5 +60,16 @@ bool requiresLegacyFormat(const NamespaceString& nss);
  */
 BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo);
 
+/**
+ * Returns true if the BSON object matches the collection's cluster key.
+ */
+bool matchesClusterKey(const BSONObj& obj,
+                       const boost::optional<ClusteredCollectionInfo>& collInfo);
+
+/**
+ * Returns the field name of a cluster key.
+ */
+StringData getClusterKeyFieldName(const ClusteredIndexSpec& indexSpec);
+
 }  // namespace clustered_util
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index fee046ffe9e..051a1c46164 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -541,9 +541,9 @@ public:
     virtual void setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext* opCtx,
                                                             boost::optional<bool> setting) = 0;
 
-    /**
-     * Returns true if this collection is clustered on _id values. That is, its RecordIds are _id
-     * values and has no separate _id index.
+    /*
+     * Returns true if this collection is clustered. That is, its RecordIds store the value of the
+     * cluster key. If the collection is clustered on _id, there is no separate _id index.
      */
     virtual bool isClustered() const = 0;
 
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 77c73116010..a15a507b98c 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -777,7 +777,8 @@ Status CollectionImpl::insertDocumentForBulkLoader(
     RecordId recordId;
     if (isClustered()) {
         invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
-        recordId = uassertStatusOK(record_id_helpers::keyForDoc(doc));
+        recordId =
+            uassertStatusOK(record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
     }
 
     // Using timestamp 0 for these inserts, which are non-oplog so we don't have an appropriate
@@ -856,7 +857,8 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx,
         RecordId recordId;
         if (isClustered()) {
             invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
-            recordId = uassertStatusOK(record_id_helpers::keyForDoc(doc));
+            recordId = uassertStatusOK(
+                record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
         }
 
         if (MONGO_unlikely(corruptDocumentOnInsert.shouldFail())) {
diff --git a/src/mongo/db/catalog/create_collection.cpp b/src/mongo/db/catalog/create_collection.cpp
index a238779222c..9c226916957 100644
--- a/src/mongo/db/catalog/create_collection.cpp
+++ b/src/mongo/db/catalog/create_collection.cpp
@@ -69,6 +69,7 @@ MONGO_FAIL_POINT_DEFINE(failPreimagesCollectionCreation);
 using IndexVersion = IndexDescriptor::IndexVersion;
 
 Status validateClusteredIndexSpec(OperationContext* opCtx,
+                                  const NamespaceString& nss,
                                   const ClusteredIndexSpec& spec,
                                   boost::optional<int64_t> expireAfterSeconds) {
     if (!spec.getUnique()) {
@@ -76,9 +77,33 @@ Status validateClusteredIndexSpec(OperationContext* opCtx,
                       "The clusteredIndex option requires unique: true to be specified");
     }
 
-    if (SimpleBSONObjComparator::kInstance.evaluate(spec.getKey() != BSON("_id" << 1))) {
+    bool clusterKeyOnId =
+        SimpleBSONObjComparator::kInstance.evaluate(spec.getKey() == BSON("_id" << 1));
+    if (nss.isReplicated() && !clusterKeyOnId) {
         return Status(ErrorCodes::Error(5979701),
-                      "The clusteredIndex option is only supported for key: {_id: 1}");
+                      "The clusteredIndex option is only supported for key: {_id: 1} on replicated "
+                      "collections");
+    }
+
+    if (spec.getKey().nFields() > 1) {
+        return Status(ErrorCodes::Error(6053700),
+                      "The clusteredIndex option does not support a compound cluster key");
+    }
+
+    const auto arbitraryClusterKeyField = clustered_util::getClusterKeyFieldName(spec);
+    if (arbitraryClusterKeyField.find(".", 0) != std::string::npos) {
+        return Status(
+            ErrorCodes::Error(6053701),
+            "The clusteredIndex option does not support a cluster key with nested fields");
+    }
+
+    const bool isForwardClusterKey = SimpleBSONObjComparator::kInstance.evaluate(
+        spec.getKey() == BSON(arbitraryClusterKeyField << 1));
+    if (!isForwardClusterKey) {
+        return Status(ErrorCodes::Error(6053702),
+                      str::stream()
+                          << "The clusteredIndex option supports cluster keys like {"
+                          << arbitraryClusterKeyField << ": 1}, but got " << spec.getKey());
     }
 
     if (expireAfterSeconds) {
@@ -455,7 +480,7 @@ Status _createCollection(OperationContext* opCtx,
             }
 
             auto clusteredIndexStatus = validateClusteredIndexSpec(
-                opCtx, clusteredIndex->getIndexSpec(), collectionOptions.expireAfterSeconds);
+                opCtx, nss, clusteredIndex->getIndexSpec(), collectionOptions.expireAfterSeconds);
             if (!clusteredIndexStatus.isOK()) {
                 return clusteredIndexStatus;
             }
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 4553c696639..325014761dc 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -39,6 +39,7 @@
 #include "mongo/bson/simple_bsonelement_comparator.h"
 #include "mongo/bson/simple_bsonobj_comparator.h"
 #include "mongo/db/audit.h"
+#include "mongo/db/catalog/clustered_collection_util.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/index_build_block.h"
 #include "mongo/db/catalog/index_catalog_entry_impl.h"
@@ -787,13 +788,13 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx,
     }
 
     uassert(ErrorCodes::InvalidOptions,
-            "Unique indexes are not supported on collections clustered by _id",
+            "Unique indexes are not supported on clustered collections",
             !collection->isClustered() || !spec[IndexDescriptor::kUniqueFieldName].trueValue());
 
     if (IndexDescriptor::isIdIndexPattern(key)) {
         if (collection->isClustered()) {
             return Status(ErrorCodes::CannotCreateIndex,
-                          "cannot create an _id index on a collection already clustered by _id");
+                          "cannot create the _id index on a clustered collection");
         }
 
         BSONElement uniqueElt = spec["unique"];
@@ -815,6 +816,13 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx,
             return Status(ErrorCodes::CannotCreateIndex,
                           "_id index must have the collection default collation");
         }
+    } else {
+        // Non _id index
+        if (collection->isClustered() &&
+            clustered_util::matchesClusterKey(key, collection->getClusteredInfo())) {
+            return Status(ErrorCodes::CannotCreateIndex,
+                          "cannot create an index with the same key as the cluster key");
+        }
     }
 
     // --- only storage engine checks allowed below this ----
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 1c5c66587cd..f1e0ae2901b 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -34,6 +34,7 @@
 #include "mongo/db/catalog/validate_adaptor.h"
 
 #include "mongo/bson/bsonobj.h"
+#include "mongo/db/catalog/clustered_collection_util.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/index_catalog.h"
 #include "mongo/db/catalog/index_consistency.h"
@@ -73,8 +74,9 @@ const long long kInterruptIntervalNumBytes = 50 * 1024 * 1024;  // 50MB.
 void _validateClusteredCollectionRecordId(OperationContext* opCtx,
                                           const RecordId& rid,
                                           const BSONObj& doc,
+                                          const ClusteredIndexSpec& indexSpec,
                                           ValidateResults* results) {
-    const auto ridFromDoc = record_id_helpers::keyForDoc(doc);
+    const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec);
     if (!ridFromDoc.isOK()) {
         results->valid = false;
         results->errors.push_back(str::stream() << rid << " " << ridFromDoc.getStatus().reason());
@@ -86,12 +88,13 @@ void _validateClusteredCollectionRecordId(OperationContext* opCtx,
         KeyString::Builder(KeyString::Version::kLatestVersion, ridFromDoc.getValue());
     const auto ksFromRid = KeyString::Builder(KeyString::Version::kLatestVersion, rid);
 
+    const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec);
     if (ksFromRid != ksFromBSON) {
         results->valid = false;
-        results->errors.push_back(str::stream()
-                                  << "Document with " << rid << " has mismatched " << doc["_id"]
-                                  << " (RecordId KeyString='" << ksFromRid.toString()
-                                  << "', cluster key KeyString='" << ksFromBSON.toString() << "')");
+        results->errors.push_back(
+            str::stream() << "Document with " << rid << " has mismatched " << doc[clusterKeyField]
+                          << " (RecordId KeyString='" << ksFromRid.toString()
+                          << "', cluster key KeyString='" << ksFromBSON.toString() << "')");
         results->corruptRecords.push_back(rid);
     }
 }
@@ -120,7 +123,8 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
     }
 
     if (coll->isClustered()) {
-        _validateClusteredCollectionRecordId(opCtx, recordId, recordBson, results);
+        _validateClusteredCollectionRecordId(
+            opCtx, recordId, recordBson, coll->getClusteredInfo()->getIndexSpec(), results);
     }
 
     auto& executionCtx = StorageExecutionContext::get(opCtx);
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index 866683e3418..de3aba3ca02 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -70,7 +70,7 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
     _specificStats.tailable = params.tailable;
     if (params.minRecord || params.maxRecord) {
         // The 'minRecord' and 'maxRecord' parameters are used for a special optimization that
-        // applies only to forwards scans of the oplog and scans on collections clustered by _id.
+        // applies only to forwards scans of the oplog and scans on clustered collections.
         invariant(!params.resumeAfterRecordId);
         if (collection->ns().isOplog()) {
             invariant(params.direction == CollectionScanParams::FORWARD);
diff --git a/src/mongo/db/exec/collection_scan_common.h b/src/mongo/db/exec/collection_scan_common.h
index 39254279e09..d332abf5615 100644
--- a/src/mongo/db/exec/collection_scan_common.h
+++ b/src/mongo/db/exec/collection_scan_common.h
@@ -44,7 +44,7 @@ struct CollectionScanParams {
     // reverse scan. A forward scan will start scanning at the document with the lowest RecordId
     // greater than or equal to minRecord. A reverse scan will stop and return EOF on the first
     // document with a RecordId less than minRecord, or a higher record if none exists. May only
-    // be used for scans on collections clustered by _id and forward oplog scans. If exclusive
+    // be used for scans on clustered collections and forward oplog scans. If exclusive
     // bounds are required, a MatchExpression must be passed to the CollectionScan stage. This field
     // cannot be used in conjunction with 'resumeAfterRecordId'
     boost::optional<RecordId> minRecord;
@@ -53,7 +53,7 @@ struct CollectionScanParams {
     // forward scan. A forward scan will stop and return EOF on the first document with a RecordId
     // greater than maxRecord. A reverse scan will start scanning at the document with the
     // highest RecordId less than or equal to maxRecord, or a lower record if none exists. May
-    // only be used for scans on collections clustered by _id and forward oplog scans. If exclusive
+    // only be used for scans on clustered collections and forward oplog scans. If exclusive
     // bounds are required, a MatchExpression must be passed to the CollectionScan stage. This field
     // cannot be used in conjunction with 'resumeAfterRecordId'.
     boost::optional<RecordId> maxRecord;
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index e3dee4e66bc..75663948cf6 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -35,6 +35,7 @@
 
 #include "mongo/bson/bson_validate.h"
 #include "mongo/bson/timestamp.h"
+#include "mongo/db/catalog/clustered_collection_util.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/record_id.h"
 #include "mongo/db/storage/key_string.h"
@@ -82,22 +83,24 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
     return keyForOptime(elem.timestamp());
 }
 
-StatusWith<RecordId> keyForDoc(const BSONObj& doc) {
-    // Build a KeyString as the RecordId using the "_id" field.
-    BSONElement idElem;
-    bool foundId = doc.getObjectID(idElem);
-    if (!foundId) {
+StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec) {
+    // Get the collection's cluster key field name
+    const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec);
+    // Build a RecordId using the cluster key.
+    const BSONElement keyElement = doc.getField(clusterKeyField);
+    if (keyElement.eoo()) {
         return {ErrorCodes::BadValue,
-                str::stream() << "Document " << redact(doc) << " is missing the '_id' field"};
+                str::stream() << "Document " << redact(doc) << " is missing the '"
+                              << clusterKeyField << "' field"};
     }
 
-    return keyForElem(idElem);
+    return keyForElem(keyElement);
 }
 
 RecordId keyForElem(const BSONElement& elem) {
-    // Intentionally discard the TypeBits since the type information will be stored in the _id of
-    // the original document. The consequence of this behavior is that _id values that compare
-    // similarly, but are of different types may not be used concurrently.
+    // Intentionally discard the TypeBits since the type information will be stored in the cluster
+    // key of the original document. The consequence of this behavior is that cluster key values
+    // that compare similarly, but are of different types may not be used concurrently.
     KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
     keyBuilder.appendBSONElement(elem);
     return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 4e4e2d108f7..1d984c94a1e 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -32,6 +32,7 @@
 #include "mongo/base/status.h"
 #include "mongo/base/status_with.h"
 #include "mongo/bson/bsonobj.h"
+#include "mongo/db/catalog/clustered_collection_options_gen.h"
 #include "mongo/db/storage/key_format.h"
 
 namespace mongo {
@@ -46,9 +47,9 @@ namespace record_id_helpers {
 StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
 
 /**
- * For collections that use clustering by _id, converts various values into a RecordId.
+ * For clustered collections, converts various values into a RecordId.
  */
-StatusWith<RecordId> keyForDoc(const BSONObj& doc);
+StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec);
 RecordId keyForElem(const BSONElement& elem);
 RecordId keyForOID(OID oid);
 RecordId keyForDate(Date_t date);
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 7263afc9127..0a49be47d85 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -709,7 +709,12 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments(
                           makeDeleteStageParamsForDeleteDocuments(),
                           PlanYieldPolicy::YieldPolicy::NO_YIELD,
                           direction);
-            } else if (*indexName == kIdIndexName && collection->isClustered()) {
+            } else if (*indexName == kIdIndexName && collection->isClustered() &&
+                       collection->getClusteredInfo()
+                               ->getIndexSpec()
+                               .getKey()
+                               .firstElement()
+                               .fieldNameStringData() == "_id") {
                 // This collection is clustered by _id. Use a bounded collection scan, since a
                 // separate _id index is likely not available.
                 if (boundInclusion != BoundInclusion::kIncludeBothStartAndEndKeys) {
diff --git a/src/mongo/db/storage/index_entry_comparison.cpp b/src/mongo/db/storage/index_entry_comparison.cpp
index c5bd4348ad6..1e5d53eecf7 100644
--- a/src/mongo/db/storage/index_entry_comparison.cpp
+++ b/src/mongo/db/storage/index_entry_comparison.cpp
@@ -195,7 +195,8 @@ Status buildDupKeyErrorStatus(const BSONObj& key,
     sb << "E11000 duplicate key error";
     sb << " collection: " << collectionNamespace;
     if (indexName.size()) {
-        // This helper may be used for clustered collections when there is no _id index.
+        // This helper may be used for clustered collections when there is no index for the cluster
+        // key.
         sb << " index: " << indexName;
     }
     if (hasCollation) {
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index 7da8d424936..064e3907a37 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -247,7 +247,7 @@ public:
     /**
      * The key format for this RecordStore's RecordIds.
      *
-     * Collections with clustered indexes on _id may use the String format, however most
+     * Clustered collections may use the String format, however most
      * RecordStores use Long. RecordStores with the String format require callers to provide
      * RecordIds and will not generate them automatically.
      */
diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp
index c6913ff6b3c..6dbd86f7d1f 100644
--- a/src/mongo/db/storage/record_store_test_harness.cpp
+++ b/src/mongo/db/storage/record_store_test_harness.cpp
@@ -427,7 +427,8 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) {
         RecordData recordData = RecordData(doc.objdata(), doc.objsize());
         recordData.makeOwned();
 
-        RecordId id = uassertStatusOK(record_id_helpers::keyForDoc(doc));
+        RecordId id = uassertStatusOK(
+            record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec()));
         records.push_back({id, recordData});
     }
 
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 2c278ef7f18..85bc7b91ba2 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -1485,7 +1485,8 @@ std::unique_ptr<RecordStore> WiredTigerKVEngine::getRecordStore(OperationContext
     params.engineName = _canonicalName;
     params.isCapped = options.capped;
     params.keyFormat = (options.clusteredIndex) ? KeyFormat::String : KeyFormat::Long;
-    // Record stores clustered by _id need to guarantee uniqueness by preventing overwrites.
+    // Record stores for clustered collections need to guarantee uniqueness by preventing
+    // overwrites.
     params.overwrite = options.clusteredIndex ? false : true;
     params.isEphemeral = _ephemeral;
     params.cappedCallback = nullptr;
diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp
index 24c3fac00f6..81e4b4df597 100644
--- a/src/mongo/db/ttl.cpp
+++ b/src/mongo/db/ttl.cpp
@@ -467,7 +467,7 @@ private:
     }
 
     /*
-     * Removes expired documents from a collection clustered by _id using a bounded collection scan.
+     * Removes expired documents from a clustered collection using a bounded collection scan.
      * On time-series buckets collections, TTL operates on type 'ObjectId'. On general purpose
      * collections, TTL operates on type 'Date'.
      */
@@ -476,7 +476,7 @@ private:
                                    const CollectionPtr& collection) {
         const auto& collOptions = collection->getCollectionOptions();
         uassert(5400701,
-                "collection is not clustered by _id but is described as being TTL",
+                "collection is not clustered but is described as being TTL",
                 collOptions.clusteredIndex);
         invariant(collection->isClustered());
 
@@ -487,10 +487,8 @@ private:
             return;
         }
 
-        LOGV2_DEBUG(5400704,
-                    1,
-                    "running TTL job for collection clustered by _id",
-                    logAttrs(collection->ns()));
+        LOGV2_DEBUG(
+            5400704, 1, "running TTL job for clustered collection", logAttrs(collection->ns()));
 
         const auto startId = makeCollScanStartBound(collection, Date_t::min());
 
diff --git a/src/mongo/db/ttl_collection_cache.h b/src/mongo/db/ttl_collection_cache.h
index 053b9fc48a2..8c70ce9f8d8 100644
--- a/src/mongo/db/ttl_collection_cache.h
+++ b/src/mongo/db/ttl_collection_cache.h
@@ -48,7 +48,7 @@ class TTLCollectionCache {
 public:
     static TTLCollectionCache& get(ServiceContext* ctx);
 
-    // Specifies that a collection is clustered by _id and is TTL.
+    // Specifies that a collection is clustered and is TTL.
     class ClusteredId : public stdx::monostate {};
     // Names an index that is TTL.
     using IndexName = std::string;
author	Josef Ahmad <josef.ahmad@mongodb.com>	2021-10-25 10:52:16 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-10-25 11:38:11 +0000
commit	11e1a414386c0d4c670dfc306b8fdfd52a9e68e0 (patch)
tree	8510195e51449a0c6db4ae3ae0419f1afde79e5b /src/mongo
parent	3b168ad0f99534125d7457aa34a743a3cdce0765 (diff)
download	mongo-11e1a414386c0d4c670dfc306b8fdfd52a9e68e0.tar.gz