SERVER-61260 Support collations on clustered collections

author: Daniel Gómez Ferro <daniel.gomezferro@mongodb.com> 2022-01-18 15:38:18 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-01-18 16:58:37 +0000
commit: 754ea766abe5682738764d90b0d0c1d49600774a (patch)
tree: ef4db95e4ade57eead6d8b3566477ad974a39350 /src
parent: 31a411b86fafc6ca303f697dd0a88d7e32acc483 (diff)
download: mongo-754ea766abe5682738764d90b0d0c1d49600774a.tar.gz
19 files changed, 171 insertions, 85 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 2c6e794531f..dd869d95e19 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1279,6 +1279,7 @@ env.Library(
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/catalog/clustered_collection_options',
+        '$BUILD_DIR/mongo/db/query/collation/collator_interface',
         '$BUILD_DIR/mongo/db/storage/key_string',
     ],)
 
diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp
index d71c6360050..7a0222bfa6b 100644
--- a/src/mongo/db/catalog/clustered_collection_util.cpp
+++ b/src/mongo/db/catalog/clustered_collection_util.cpp
@@ -109,14 +109,17 @@ bool requiresLegacyFormat(const NamespaceString& nss) {
     return nss.isTimeseriesBucketsCollection() || nss.isChangeStreamPreImagesCollection();
 }
 
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo) {
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+                                       const BSONObj& collation) {
     BSONObjBuilder bob;
     collInfo.getIndexSpec().serialize(&bob);
+    if (!collation.isEmpty()) {
+        bob.append("collation", collation);
+    }
     bob.append("clustered", true);
     return bob.obj();
 }
 
-
 bool isClusteredOnId(const boost::optional<ClusteredCollectionInfo>& collInfo) {
     return clustered_util::matchesClusterKey(BSON("_id" << 1), collInfo);
 }
diff --git a/src/mongo/db/catalog/clustered_collection_util.h b/src/mongo/db/catalog/clustered_collection_util.h
index 2dd603430de..c20418b3db8 100644
--- a/src/mongo/db/catalog/clustered_collection_util.h
+++ b/src/mongo/db/catalog/clustered_collection_util.h
@@ -69,9 +69,11 @@ bool requiresLegacyFormat(const NamespaceString& nss);
 
 /**
  * listIndexes requires the ClusteredIndexSpec be formatted with an additional field 'clustered:
- * true' to indicate it is a clustered index.
+ * true' to indicate it is a clustered index and with the collection's default collation. If the
+ * collection has the 'simple' collation this expects an empty BSONObj.
  */
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo);
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+                                       const BSONObj& collation);
 
 /**
  * Returns true if the BSON object matches the collection's cluster key. Caller's should ensure
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index eb8212cd85b..8cf7e7aa320 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -824,8 +824,8 @@ Status CollectionImpl::insertDocumentForBulkLoader(
     RecordId recordId;
     if (isClustered()) {
         invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
-        recordId =
-            uassertStatusOK(record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+        recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+            doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
     }
 
     // Using timestamp 0 for these inserts, which are non-oplog so we don't have an appropriate
@@ -908,8 +908,8 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx,
         RecordId recordId;
         if (isClustered()) {
             invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
-            recordId = uassertStatusOK(
-                record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+            recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+                doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
         }
 
         if (MONGO_unlikely(corruptDocumentOnInsert.shouldFail())) {
diff --git a/src/mongo/db/catalog/list_indexes.cpp b/src/mongo/db/catalog/list_indexes.cpp
index 780cc3c0b12..ce1515a4c13 100644
--- a/src/mongo/db/catalog/list_indexes.cpp
+++ b/src/mongo/db/catalog/list_indexes.cpp
@@ -79,8 +79,12 @@ std::list<BSONObj> listIndexesInLock(OperationContext* opCtx,
         collection->getAllIndexes(&indexNames);
 
         if (collection->isClustered() && !collection->ns().isTimeseriesBucketsCollection()) {
+            BSONObj collation;
+            if (auto collator = collection->getDefaultCollator()) {
+                collation = collator->getSpec().toBSON();
+            }
             auto clusteredSpec = clustered_util::formatClusterKeyForListIndexes(
-                collection->getClusteredInfo().get());
+                collection->getClusteredInfo().get(), collation);
             if (additionalInclude == ListIndexesInclude::IndexBuildInfo) {
                 indexSpecs.push_back(BSON("spec"_sd << clusteredSpec));
             } else {
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 4c715746119..46b0439b29d 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -75,8 +75,9 @@ void _validateClusteredCollectionRecordId(OperationContext* opCtx,
                                           const RecordId& rid,
                                           const BSONObj& doc,
                                           const ClusteredIndexSpec& indexSpec,
+                                          const CollatorInterface* collator,
                                           ValidateResults* results) {
-    const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec);
+    const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec, collator);
     if (!ridFromDoc.isOK()) {
         results->valid = false;
         results->errors.push_back(str::stream() << rid << " " << ridFromDoc.getStatus().reason());
@@ -119,8 +120,12 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
 
     const CollectionPtr& coll = _validateState->getCollection();
     if (coll->isClustered()) {
-        _validateClusteredCollectionRecordId(
-            opCtx, recordId, recordBson, coll->getClusteredInfo()->getIndexSpec(), results);
+        _validateClusteredCollectionRecordId(opCtx,
+                                             recordId,
+                                             recordBson,
+                                             coll->getClusteredInfo()->getIndexSpec(),
+                                             coll->getDefaultCollator(),
+                                             results);
     }
 
     auto& executionCtx = StorageExecutionContext::get(opCtx);
diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
index 75fea08ded7..c7eb0d6e33f 100644
--- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
+++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
@@ -159,9 +159,10 @@ public:
         // Set up the new collection scan to start from the 'minPreImageId'.
         void setupPlanExecutor(boost::optional<ChangeStreamPreImageId> minPreImageId) {
             const auto minRecordId =
-                (minPreImageId ? boost::optional<RecordId>(record_id_helpers::keyForElem(
-                                     BSON("_id" << minPreImageId->toBSON()).firstElement()))
-                               : boost::none);
+                (minPreImageId
+                     ? boost::optional<RecordId>(record_id_helpers::keyForElem(
+                           BSON("_id" << minPreImageId->toBSON()).firstElement(), nullptr))
+                     : boost::none);
             _planExecutor =
                 InternalPlanner::collectionScan(_opCtx,
                                                 _preImagesCollPtr,
@@ -237,19 +238,20 @@ void deleteExpiredChangeStreamPreImages(Client* client) {
     for (auto it = expiredPreImages.begin(); it != expiredPreImages.end(); ++it) {
         it.saveState();
 
-        writeConflictRetry(
-            opCtx.get(),
-            "ChangeStreamExpiredPreImagesRemover",
-            NamespaceString::kChangeStreamPreImagesNamespace.ns(),
-            [&] {
-                WriteUnitOfWork wuow(opCtx.get());
-                const auto recordId =
-                    record_id_helpers::keyForElem(it->getField(ChangeStreamPreImage::kIdFieldName));
-                preImagesColl->deleteDocument(
-                    opCtx.get(), kUninitializedStmtId, recordId, &CurOp::get(*opCtx)->debug());
-                wuow.commit();
-                numberOfRemovals++;
-            });
+        writeConflictRetry(opCtx.get(),
+                           "ChangeStreamExpiredPreImagesRemover",
+                           NamespaceString::kChangeStreamPreImagesNamespace.ns(),
+                           [&] {
+                               WriteUnitOfWork wuow(opCtx.get());
+                               const auto recordId = record_id_helpers::keyForElem(
+                                   it->getField(ChangeStreamPreImage::kIdFieldName), nullptr);
+                               preImagesColl->deleteDocument(opCtx.get(),
+                                                             kUninitializedStmtId,
+                                                             recordId,
+                                                             &CurOp::get(*opCtx)->debug());
+                               wuow.commit();
+                               numberOfRemovals++;
+                           });
 
         it.restoreState();
     }
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 91f17d08a30..6898ff287a5 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -359,6 +359,7 @@ void fillOutPlannerParams(OperationContext* opCtx,
 
     if (collection->isClustered()) {
         plannerParams->clusteredInfo = collection->getClusteredInfo();
+        plannerParams->clusteredCollectionCollator = collection->getDefaultCollator();
     }
 }
 
diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp
index 649962c31cb..0f45d3b4649 100644
--- a/src/mongo/db/query/internal_plans.cpp
+++ b/src/mongo/db/query/internal_plans.cpp
@@ -77,13 +77,14 @@ CollectionScanParams convertIndexScanParamsToCollScanParams(
 
     dassert(collection->isClustered() &&
             clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo()));
+    invariant(collection->getDefaultCollator() == nullptr);
 
     boost::optional<RecordId> startRecord, endRecord;
     if (!startKey.isEmpty()) {
-        startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+        startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement(), nullptr));
     }
     if (!endKey.isEmpty()) {
-        endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+        endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement(), nullptr));
     }
 
     // For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord.
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 77f0ff9d90c..58652f73279 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -211,11 +211,41 @@ bool isOplogTsLowerBoundPred(const mongo::MatchExpression* me) {
     return me->path() == repl::OpTime::kTimestampFieldName;
 }
 
+// True if the element type is affected by a collator (i.e. it is or contains a String).
+bool affectedByCollator(const BSONElement& element) {
+    switch (element.type()) {
+        case BSONType::String:
+            return true;
+        case BSONType::Array:
+        case BSONType::Object:
+            for (const auto& sub : element.Obj()) {
+                if (affectedByCollator(sub))
+                    return true;
+            }
+            return false;
+        default:
+            return false;
+    }
+}
+
+// Returns whether element is not affected by collators or query and collection collators are
+// compatible.
+bool compatibleCollator(const QueryPlannerParams& params,
+                        const CollatorInterface* queryCollator,
+                        const BSONElement& element) {
+    auto const collCollator = params.clusteredCollectionCollator;
+    bool compatible = !queryCollator || (collCollator && *queryCollator == *collCollator);
+    return compatible || !affectedByCollator(element);
+}
+
 /**
  * Helper function that checks to see if min() or max() were provided along with the query. If so,
  * adjusts the collection scan bounds to fit the constraints.
  */
-void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collScan) {
+void handleRIDRangeMinMax(const CanonicalQuery& query,
+                          CollectionScanNode* collScan,
+                          const QueryPlannerParams& params,
+                          const CollatorInterface* collator) {
     BSONObj minObj = query.getFindCommandRequest().getMin();
     BSONObj maxObj = query.getFindCommandRequest().getMax();
     if (minObj.isEmpty() && maxObj.isEmpty()) {
@@ -232,17 +262,17 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
         collScan->direction == 1);
 
     boost::optional<RecordId> newMinRecord, newMaxRecord;
-    if (!maxObj.isEmpty()) {
+    if (!maxObj.isEmpty() && compatibleCollator(params, collator, maxObj.firstElement())) {
         // max() is exclusive.
         // Assumes clustered collection scans are only supported with the forward direction.
         collScan->boundInclusion =
             CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly;
-        newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement());
+        newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement(), collator);
     }
 
-    if (!minObj.isEmpty()) {
+    if (!minObj.isEmpty() && compatibleCollator(params, collator, minObj.firstElement())) {
         // The min() is inclusive as are bounded collection scans by default.
-        newMinRecord = record_id_helpers::keyForElem(minObj.firstElement());
+        newMinRecord = record_id_helpers::keyForElem(minObj.firstElement(), collator);
     }
 
     if (!collScan->minRecord) {
@@ -271,7 +301,8 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
  */
 void handleRIDRangeScan(const MatchExpression* conjunct,
                         CollectionScanNode* collScan,
-                        const QueryPlannerParams& params) {
+                        const QueryPlannerParams& params,
+                        const CollatorInterface* collator) {
     invariant(params.clusteredInfo);
 
     if (conjunct == nullptr) {
@@ -281,7 +312,7 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
     auto* andMatchPtr = dynamic_cast<const AndMatchExpression*>(conjunct);
     if (andMatchPtr != nullptr) {
         for (size_t index = 0; index < andMatchPtr->numChildren(); index++) {
-            handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params);
+            handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params, collator);
         }
         return;
     }
@@ -292,31 +323,30 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
         return;
     }
 
-    const bool hasMaxRecord = collScan->maxRecord.has_value();
-    const bool hasMinRecord = collScan->minRecord.has_value();
-
-    if (!hasMinRecord && !hasMaxRecord) {
-        if (auto eq = dynamic_cast<const EqualityMatchExpression*>(conjunct)) {
-            collScan->minRecord = record_id_helpers::keyForElem(eq->getData());
-            collScan->maxRecord = collScan->minRecord;
-            return;
-        }
+    auto match = dynamic_cast<const ComparisonMatchExpression*>(conjunct);
+    if (match == nullptr) {
+        return;  // Not a comparison match expression.
     }
 
-    if (!hasMaxRecord) {
-        if (auto ltConjunct = dynamic_cast<const LTMatchExpression*>(conjunct)) {
-            collScan->maxRecord = record_id_helpers::keyForElem(ltConjunct->getData());
-        } else if (auto lteConjunct = dynamic_cast<const LTEMatchExpression*>(conjunct)) {
-            collScan->maxRecord = record_id_helpers::keyForElem(lteConjunct->getData());
-        }
+    const auto& element = match->getData();
+    bool compatible = compatibleCollator(params, collator, element);
+    if (!compatible) {
+        return;  // Collator affects probe and it's not compatible with collection's collator.
     }
 
-    if (!hasMinRecord) {
-        if (auto gtConjunct = dynamic_cast<const GTMatchExpression*>(conjunct)) {
-            collScan->minRecord = record_id_helpers::keyForElem(gtConjunct->getData());
-        } else if (auto gteConjunct = dynamic_cast<const GTEMatchExpression*>(conjunct)) {
-            collScan->minRecord = record_id_helpers::keyForElem(gteConjunct->getData());
-        }
+    auto& maxRecord = collScan->maxRecord;
+    auto& minRecord = collScan->minRecord;
+    if (dynamic_cast<const EqualityMatchExpression*>(match)) {
+        minRecord = record_id_helpers::keyForElem(element, collator);
+        maxRecord = minRecord;
+    } else if (!maxRecord &&
+               (dynamic_cast<const LTMatchExpression*>(match) ||
+                dynamic_cast<const LTEMatchExpression*>(match))) {
+        maxRecord = record_id_helpers::keyForElem(element, collator);
+    } else if (!minRecord &&
+               (dynamic_cast<const GTMatchExpression*>(match) ||
+                dynamic_cast<const GTEMatchExpression*>(match))) {
+        minRecord = record_id_helpers::keyForElem(element, collator);
     }
 }
 
@@ -404,8 +434,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
     if (params.clusteredInfo && !csn->resumeAfterRecordId) {
         // This is a clustered collection. Attempt to perform an efficient, bounded collection scan
         // via minRecord and maxRecord if applicable.
-        handleRIDRangeScan(csn->filter.get(), csn.get(), params);
-        handleRIDRangeMinMax(query, csn.get());
+        handleRIDRangeScan(csn->filter.get(), csn.get(), params, query.getCollator());
+        handleRIDRangeMinMax(query, csn.get(), params, query.getCollator());
     }
 
     return csn;
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index c6c670d607f..2651bd5f547 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -715,20 +715,33 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
 
                 const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey();
 
-                // Since the clusteredIndex doesn't have a specific collator, check if it is
-                // compatible with the max and min using the same collator as the query.
+                // Check if the query collator is compatible with the collection collator for the
+                // provided min and max values.
                 if ((!minObj.isEmpty() &&
-                     !indexCompatibleMaxMin(
-                         minObj, query.getCollator(), query.getCollator(), clusterKey)) ||
+                     !indexCompatibleMaxMin(minObj,
+                                            query.getCollator(),
+                                            params.clusteredCollectionCollator,
+                                            clusterKey)) ||
                     (!maxObj.isEmpty() &&
-                     !indexCompatibleMaxMin(
-                         maxObj, query.getCollator(), query.getCollator(), clusterKey))) {
+                     !indexCompatibleMaxMin(maxObj,
+                                            query.getCollator(),
+                                            params.clusteredCollectionCollator,
+                                            clusterKey))) {
                     return Status(ErrorCodes::Error(6137400),
                                   "The clustered index is not compatible with the values provided "
-                                  "for min/max");
+                                  "for min/max due to the query collation");
                 }
 
-                if (!minObj.isEmpty() && !maxObj.isEmpty() && minObj.woCompare(maxObj) >= 0) {
+                auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() {
+                    if (collator) {
+                        auto min = stripFieldNamesAndApplyCollation(minObj, collator);
+                        auto max = stripFieldNamesAndApplyCollation(maxObj, collator);
+                        return min.woCompare(max) < 0;
+                    } else {
+                        return minObj.woCompare(maxObj) < 0;
+                    }
+                };
+                if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) {
                     return Status(ErrorCodes::Error(6137401), "max() must be greater than min()");
                 }
             }
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 8a9f14dcf7d..ad34bdd1ee6 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -33,6 +33,7 @@
 
 #include "mongo/db/catalog/clustered_collection_options_gen.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collator_interface.h"
 #include "mongo/db/query/index_entry.h"
 #include "mongo/db/query/query_knobs_gen.h"
 
@@ -142,6 +143,10 @@ struct QueryPlannerParams {
     // Specifies the clusteredIndex information necessary to utilize the cluster key in bounded
     // collection scans and other query operations.
     boost::optional<ClusteredCollectionInfo> clusteredInfo;
+
+    // Specifies the collator information necessary to utilize the cluster key in bounded
+    // collection scans and other query operations.
+    const CollatorInterface* clusteredCollectionCollator;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index 75663948cf6..35925a75019 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -37,6 +37,7 @@
 #include "mongo/bson/timestamp.h"
 #include "mongo/db/catalog/clustered_collection_util.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collation_index_key.h"
 #include "mongo/db/record_id.h"
 #include "mongo/db/storage/key_string.h"
 #include "mongo/logv2/redaction.h"
@@ -83,7 +84,9 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
     return keyForOptime(elem.timestamp());
 }
 
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec) {
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+                               const ClusteredIndexSpec& indexSpec,
+                               const CollatorInterface* collator) {
     // Get the collection's cluster key field name
     const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec);
     // Build a RecordId using the cluster key.
@@ -94,15 +97,21 @@ StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& ind
                               << clusterKeyField << "' field"};
     }
 
-    return keyForElem(keyElement);
+    return keyForElem(keyElement, collator);
 }
 
-RecordId keyForElem(const BSONElement& elem) {
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator) {
     // Intentionally discard the TypeBits since the type information will be stored in the cluster
     // key of the original document. The consequence of this behavior is that cluster key values
     // that compare similarly, but are of different types may not be used concurrently.
     KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
-    keyBuilder.appendBSONElement(elem);
+    if (collator) {
+        BSONObjBuilder out;
+        CollationIndexKey::collationAwareIndexKeyAppend(elem, collator, &out);
+        keyBuilder.appendBSONElement(out.done().firstElement());
+    } else {
+        keyBuilder.appendBSONElement(elem);
+    }
     return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
 }
 
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 1d984c94a1e..4627d134f9b 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -33,6 +33,7 @@
 #include "mongo/base/status_with.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/db/catalog/clustered_collection_options_gen.h"
+#include "mongo/db/query/collation/collator_interface.h"
 #include "mongo/db/storage/key_format.h"
 
 namespace mongo {
@@ -49,8 +50,10 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
 /**
  * For clustered collections, converts various values into a RecordId.
  */
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec);
-RecordId keyForElem(const BSONElement& elem);
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+                               const ClusteredIndexSpec& indexSpec,
+                               const CollatorInterface* collator);
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator);
 RecordId keyForOID(OID oid);
 RecordId keyForDate(Date_t date);
 
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index 773210d4cf3..0066240cc2f 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -212,7 +212,7 @@ BaseCloner::AfterStageBehavior CollectionCloner::listIndexesStage() {
             invariant(_collectionOptions.clusteredIndex);
             invariant(spec.getBoolField("clustered") == true);
             invariant(clustered_util::formatClusterKeyForListIndexes(
-                          _collectionOptions.clusteredIndex.get())
+                          _collectionOptions.clusteredIndex.get(), _collectionOptions.collation)
                           .woCompare(spec) == 0);
             // Skip if the spec is for the collection's clusteredIndex.
         } else if (spec.hasField("buildUUID")) {
diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp
index 79a98205972..c24c170e988 100644
--- a/src/mongo/db/repl/dbcheck.cpp
+++ b/src/mongo/db/repl/dbcheck.cpp
@@ -247,10 +247,14 @@ DbCheckHasher::DbCheckHasher(OperationContext* opCtx,
                                            InternalPlanner::IXSCAN_FETCH);
     } else {
         CollectionScanParams params;
-        params.minRecord = uassertStatusOK(record_id_helpers::keyForDoc(
-            start.obj(), collection->getClusteredInfo()->getIndexSpec()));
-        params.maxRecord = uassertStatusOK(record_id_helpers::keyForDoc(
-            end.obj(), collection->getClusteredInfo()->getIndexSpec()));
+        params.minRecord = uassertStatusOK(
+            record_id_helpers::keyForDoc(start.obj(),
+                                         collection->getClusteredInfo()->getIndexSpec(),
+                                         collection->getDefaultCollator()));
+        params.maxRecord = uassertStatusOK(
+            record_id_helpers::keyForDoc(end.obj(),
+                                         collection->getClusteredInfo()->getIndexSpec(),
+                                         collection->getDefaultCollator()));
         params.boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly;
         _exec = InternalPlanner::collectionScan(
             opCtx, &collection, params, PlanYieldPolicy::YieldPolicy::NO_YIELD);
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 5dd66d101d2..3c61a2de652 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -732,13 +732,16 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments(
                                   "bounded collection scans only support forward scans");
                 }
 
+                auto collator = collection->getDefaultCollator();
                 boost::optional<RecordId> minRecord, maxRecord;
                 if (!startKey.isEmpty()) {
-                    minRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+                    minRecord =
+                        RecordId(record_id_helpers::keyForElem(startKey.firstElement(), collator));
                 }
 
                 if (!endKey.isEmpty()) {
-                    maxRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+                    maxRecord =
+                        RecordId(record_id_helpers::keyForElem(endKey.firstElement(), collator));
                 }
 
                 planExecutor = isFind
diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp
index dd6a586c1a0..eec2f336fc7 100644
--- a/src/mongo/db/storage/record_store_test_harness.cpp
+++ b/src/mongo/db/storage/record_store_test_harness.cpp
@@ -428,7 +428,7 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) {
         recordData.makeOwned();
 
         RecordId id = uassertStatusOK(
-            record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec()));
+            record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec(), nullptr));
         records.push_back({id, recordData});
     }
 
diff --git a/src/mongo/dbtests/query_stage_collscan.cpp b/src/mongo/dbtests/query_stage_collscan.cpp
index cd041e97f4a..c1aaff74825 100644
--- a/src/mongo/dbtests/query_stage_collscan.cpp
+++ b/src/mongo/dbtests/query_stage_collscan.cpp
@@ -209,10 +209,10 @@ public:
         _client.insert(ns.ns(), docs, ordered);
     }
 
-    // Returns the recordId generated by doc, assuming doc takes the shape of {<cluster key> :
-    // <value>};
+    // Returns the recordId generated by doc, assuming there's no collation and doc takes the shape
+    // of {<cluster key> : <value>};
     RecordId getRecordIdForClusteredDoc(const BSONObj& doc) {
-        return RecordId(record_id_helpers::keyForElem(doc.firstElement()));
+        return RecordId(record_id_helpers::keyForElem(doc.firstElement(), nullptr));
     }
 
     // Performs a bounded collection scan from 'minRecord' to 'maxRecord' in the specified
author	Daniel Gómez Ferro <daniel.gomezferro@mongodb.com>	2022-01-18 15:38:18 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-01-18 16:58:37 +0000
commit	754ea766abe5682738764d90b0d0c1d49600774a (patch)
tree	ef4db95e4ade57eead6d8b3566477ad974a39350 /src
parent	31a411b86fafc6ca303f697dd0a88d7e32acc483 (diff)
download	mongo-754ea766abe5682738764d90b0d0c1d49600774a.tar.gz