summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Gómez Ferro <daniel.gomezferro@mongodb.com>2022-01-18 15:38:18 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-18 16:58:37 +0000
commit754ea766abe5682738764d90b0d0c1d49600774a (patch)
treeef4db95e4ade57eead6d8b3566477ad974a39350 /src
parent31a411b86fafc6ca303f697dd0a88d7e32acc483 (diff)
downloadmongo-754ea766abe5682738764d90b0d0c1d49600774a.tar.gz
SERVER-61260 Support collations on clustered collections
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/SConscript1
-rw-r--r--src/mongo/db/catalog/clustered_collection_util.cpp7
-rw-r--r--src/mongo/db/catalog/clustered_collection_util.h6
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp8
-rw-r--r--src/mongo/db/catalog/list_indexes.cpp6
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp11
-rw-r--r--src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp34
-rw-r--r--src/mongo/db/query/get_executor.cpp1
-rw-r--r--src/mongo/db/query/internal_plans.cpp5
-rw-r--r--src/mongo/db/query/planner_access.cpp90
-rw-r--r--src/mongo/db/query/query_planner.cpp29
-rw-r--r--src/mongo/db/query/query_planner_params.h5
-rw-r--r--src/mongo/db/record_id_helpers.cpp17
-rw-r--r--src/mongo/db/record_id_helpers.h7
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp2
-rw-r--r--src/mongo/db/repl/dbcheck.cpp12
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp7
-rw-r--r--src/mongo/db/storage/record_store_test_harness.cpp2
-rw-r--r--src/mongo/dbtests/query_stage_collscan.cpp6
19 files changed, 171 insertions, 85 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 2c6e794531f..dd869d95e19 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1279,6 +1279,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/catalog/clustered_collection_options',
+ '$BUILD_DIR/mongo/db/query/collation/collator_interface',
'$BUILD_DIR/mongo/db/storage/key_string',
],)
diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp
index d71c6360050..7a0222bfa6b 100644
--- a/src/mongo/db/catalog/clustered_collection_util.cpp
+++ b/src/mongo/db/catalog/clustered_collection_util.cpp
@@ -109,14 +109,17 @@ bool requiresLegacyFormat(const NamespaceString& nss) {
return nss.isTimeseriesBucketsCollection() || nss.isChangeStreamPreImagesCollection();
}
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo) {
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+ const BSONObj& collation) {
BSONObjBuilder bob;
collInfo.getIndexSpec().serialize(&bob);
+ if (!collation.isEmpty()) {
+ bob.append("collation", collation);
+ }
bob.append("clustered", true);
return bob.obj();
}
-
bool isClusteredOnId(const boost::optional<ClusteredCollectionInfo>& collInfo) {
return clustered_util::matchesClusterKey(BSON("_id" << 1), collInfo);
}
diff --git a/src/mongo/db/catalog/clustered_collection_util.h b/src/mongo/db/catalog/clustered_collection_util.h
index 2dd603430de..c20418b3db8 100644
--- a/src/mongo/db/catalog/clustered_collection_util.h
+++ b/src/mongo/db/catalog/clustered_collection_util.h
@@ -69,9 +69,11 @@ bool requiresLegacyFormat(const NamespaceString& nss);
/**
* listIndexes requires the ClusteredIndexSpec be formatted with an additional field 'clustered:
- * true' to indicate it is a clustered index.
+ * true' to indicate it is a clustered index and with the collection's default collation. If the
+ * collection has the 'simple' collation this expects an empty BSONObj.
*/
-BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo);
+BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo,
+ const BSONObj& collation);
/**
* Returns true if the BSON object matches the collection's cluster key. Caller's should ensure
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index eb8212cd85b..8cf7e7aa320 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -824,8 +824,8 @@ Status CollectionImpl::insertDocumentForBulkLoader(
RecordId recordId;
if (isClustered()) {
invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
- recordId =
- uassertStatusOK(record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+ recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+ doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
}
// Using timestamp 0 for these inserts, which are non-oplog so we don't have an appropriate
@@ -908,8 +908,8 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx,
RecordId recordId;
if (isClustered()) {
invariant(_shared->_recordStore->keyFormat() == KeyFormat::String);
- recordId = uassertStatusOK(
- record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec()));
+ recordId = uassertStatusOK(record_id_helpers::keyForDoc(
+ doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator()));
}
if (MONGO_unlikely(corruptDocumentOnInsert.shouldFail())) {
diff --git a/src/mongo/db/catalog/list_indexes.cpp b/src/mongo/db/catalog/list_indexes.cpp
index 780cc3c0b12..ce1515a4c13 100644
--- a/src/mongo/db/catalog/list_indexes.cpp
+++ b/src/mongo/db/catalog/list_indexes.cpp
@@ -79,8 +79,12 @@ std::list<BSONObj> listIndexesInLock(OperationContext* opCtx,
collection->getAllIndexes(&indexNames);
if (collection->isClustered() && !collection->ns().isTimeseriesBucketsCollection()) {
+ BSONObj collation;
+ if (auto collator = collection->getDefaultCollator()) {
+ collation = collator->getSpec().toBSON();
+ }
auto clusteredSpec = clustered_util::formatClusterKeyForListIndexes(
- collection->getClusteredInfo().get());
+ collection->getClusteredInfo().get(), collation);
if (additionalInclude == ListIndexesInclude::IndexBuildInfo) {
indexSpecs.push_back(BSON("spec"_sd << clusteredSpec));
} else {
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 4c715746119..46b0439b29d 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -75,8 +75,9 @@ void _validateClusteredCollectionRecordId(OperationContext* opCtx,
const RecordId& rid,
const BSONObj& doc,
const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator,
ValidateResults* results) {
- const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec);
+ const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec, collator);
if (!ridFromDoc.isOK()) {
results->valid = false;
results->errors.push_back(str::stream() << rid << " " << ridFromDoc.getStatus().reason());
@@ -119,8 +120,12 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
const CollectionPtr& coll = _validateState->getCollection();
if (coll->isClustered()) {
- _validateClusteredCollectionRecordId(
- opCtx, recordId, recordBson, coll->getClusteredInfo()->getIndexSpec(), results);
+ _validateClusteredCollectionRecordId(opCtx,
+ recordId,
+ recordBson,
+ coll->getClusteredInfo()->getIndexSpec(),
+ coll->getDefaultCollator(),
+ results);
}
auto& executionCtx = StorageExecutionContext::get(opCtx);
diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
index 75fea08ded7..c7eb0d6e33f 100644
--- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
+++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp
@@ -159,9 +159,10 @@ public:
// Set up the new collection scan to start from the 'minPreImageId'.
void setupPlanExecutor(boost::optional<ChangeStreamPreImageId> minPreImageId) {
const auto minRecordId =
- (minPreImageId ? boost::optional<RecordId>(record_id_helpers::keyForElem(
- BSON("_id" << minPreImageId->toBSON()).firstElement()))
- : boost::none);
+ (minPreImageId
+ ? boost::optional<RecordId>(record_id_helpers::keyForElem(
+ BSON("_id" << minPreImageId->toBSON()).firstElement(), nullptr))
+ : boost::none);
_planExecutor =
InternalPlanner::collectionScan(_opCtx,
_preImagesCollPtr,
@@ -237,19 +238,20 @@ void deleteExpiredChangeStreamPreImages(Client* client) {
for (auto it = expiredPreImages.begin(); it != expiredPreImages.end(); ++it) {
it.saveState();
- writeConflictRetry(
- opCtx.get(),
- "ChangeStreamExpiredPreImagesRemover",
- NamespaceString::kChangeStreamPreImagesNamespace.ns(),
- [&] {
- WriteUnitOfWork wuow(opCtx.get());
- const auto recordId =
- record_id_helpers::keyForElem(it->getField(ChangeStreamPreImage::kIdFieldName));
- preImagesColl->deleteDocument(
- opCtx.get(), kUninitializedStmtId, recordId, &CurOp::get(*opCtx)->debug());
- wuow.commit();
- numberOfRemovals++;
- });
+ writeConflictRetry(opCtx.get(),
+ "ChangeStreamExpiredPreImagesRemover",
+ NamespaceString::kChangeStreamPreImagesNamespace.ns(),
+ [&] {
+ WriteUnitOfWork wuow(opCtx.get());
+ const auto recordId = record_id_helpers::keyForElem(
+ it->getField(ChangeStreamPreImage::kIdFieldName), nullptr);
+ preImagesColl->deleteDocument(opCtx.get(),
+ kUninitializedStmtId,
+ recordId,
+ &CurOp::get(*opCtx)->debug());
+ wuow.commit();
+ numberOfRemovals++;
+ });
it.restoreState();
}
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 91f17d08a30..6898ff287a5 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -359,6 +359,7 @@ void fillOutPlannerParams(OperationContext* opCtx,
if (collection->isClustered()) {
plannerParams->clusteredInfo = collection->getClusteredInfo();
+ plannerParams->clusteredCollectionCollator = collection->getDefaultCollator();
}
}
diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp
index 649962c31cb..0f45d3b4649 100644
--- a/src/mongo/db/query/internal_plans.cpp
+++ b/src/mongo/db/query/internal_plans.cpp
@@ -77,13 +77,14 @@ CollectionScanParams convertIndexScanParamsToCollScanParams(
dassert(collection->isClustered() &&
clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo()));
+ invariant(collection->getDefaultCollator() == nullptr);
boost::optional<RecordId> startRecord, endRecord;
if (!startKey.isEmpty()) {
- startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+ startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement(), nullptr));
}
if (!endKey.isEmpty()) {
- endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+ endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement(), nullptr));
}
// For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord.
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 77f0ff9d90c..58652f73279 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -211,11 +211,41 @@ bool isOplogTsLowerBoundPred(const mongo::MatchExpression* me) {
return me->path() == repl::OpTime::kTimestampFieldName;
}
+// True if the element type is affected by a collator (i.e. it is or contains a String).
+bool affectedByCollator(const BSONElement& element) {
+ switch (element.type()) {
+ case BSONType::String:
+ return true;
+ case BSONType::Array:
+ case BSONType::Object:
+ for (const auto& sub : element.Obj()) {
+ if (affectedByCollator(sub))
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+}
+
+// Returns whether element is not affected by collators or query and collection collators are
+// compatible.
+bool compatibleCollator(const QueryPlannerParams& params,
+ const CollatorInterface* queryCollator,
+ const BSONElement& element) {
+ auto const collCollator = params.clusteredCollectionCollator;
+ bool compatible = !queryCollator || (collCollator && *queryCollator == *collCollator);
+ return compatible || !affectedByCollator(element);
+}
+
/**
* Helper function that checks to see if min() or max() were provided along with the query. If so,
* adjusts the collection scan bounds to fit the constraints.
*/
-void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collScan) {
+void handleRIDRangeMinMax(const CanonicalQuery& query,
+ CollectionScanNode* collScan,
+ const QueryPlannerParams& params,
+ const CollatorInterface* collator) {
BSONObj minObj = query.getFindCommandRequest().getMin();
BSONObj maxObj = query.getFindCommandRequest().getMax();
if (minObj.isEmpty() && maxObj.isEmpty()) {
@@ -232,17 +262,17 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
collScan->direction == 1);
boost::optional<RecordId> newMinRecord, newMaxRecord;
- if (!maxObj.isEmpty()) {
+ if (!maxObj.isEmpty() && compatibleCollator(params, collator, maxObj.firstElement())) {
// max() is exclusive.
// Assumes clustered collection scans are only supported with the forward direction.
collScan->boundInclusion =
CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly;
- newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement());
+ newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement(), collator);
}
- if (!minObj.isEmpty()) {
+ if (!minObj.isEmpty() && compatibleCollator(params, collator, minObj.firstElement())) {
// The min() is inclusive as are bounded collection scans by default.
- newMinRecord = record_id_helpers::keyForElem(minObj.firstElement());
+ newMinRecord = record_id_helpers::keyForElem(minObj.firstElement(), collator);
}
if (!collScan->minRecord) {
@@ -271,7 +301,8 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS
*/
void handleRIDRangeScan(const MatchExpression* conjunct,
CollectionScanNode* collScan,
- const QueryPlannerParams& params) {
+ const QueryPlannerParams& params,
+ const CollatorInterface* collator) {
invariant(params.clusteredInfo);
if (conjunct == nullptr) {
@@ -281,7 +312,7 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
auto* andMatchPtr = dynamic_cast<const AndMatchExpression*>(conjunct);
if (andMatchPtr != nullptr) {
for (size_t index = 0; index < andMatchPtr->numChildren(); index++) {
- handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params);
+ handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params, collator);
}
return;
}
@@ -292,31 +323,30 @@ void handleRIDRangeScan(const MatchExpression* conjunct,
return;
}
- const bool hasMaxRecord = collScan->maxRecord.has_value();
- const bool hasMinRecord = collScan->minRecord.has_value();
-
- if (!hasMinRecord && !hasMaxRecord) {
- if (auto eq = dynamic_cast<const EqualityMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(eq->getData());
- collScan->maxRecord = collScan->minRecord;
- return;
- }
+ auto match = dynamic_cast<const ComparisonMatchExpression*>(conjunct);
+ if (match == nullptr) {
+ return; // Not a comparison match expression.
}
- if (!hasMaxRecord) {
- if (auto ltConjunct = dynamic_cast<const LTMatchExpression*>(conjunct)) {
- collScan->maxRecord = record_id_helpers::keyForElem(ltConjunct->getData());
- } else if (auto lteConjunct = dynamic_cast<const LTEMatchExpression*>(conjunct)) {
- collScan->maxRecord = record_id_helpers::keyForElem(lteConjunct->getData());
- }
+ const auto& element = match->getData();
+ bool compatible = compatibleCollator(params, collator, element);
+ if (!compatible) {
+ return; // Collator affects probe and it's not compatible with collection's collator.
}
- if (!hasMinRecord) {
- if (auto gtConjunct = dynamic_cast<const GTMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(gtConjunct->getData());
- } else if (auto gteConjunct = dynamic_cast<const GTEMatchExpression*>(conjunct)) {
- collScan->minRecord = record_id_helpers::keyForElem(gteConjunct->getData());
- }
+ auto& maxRecord = collScan->maxRecord;
+ auto& minRecord = collScan->minRecord;
+ if (dynamic_cast<const EqualityMatchExpression*>(match)) {
+ minRecord = record_id_helpers::keyForElem(element, collator);
+ maxRecord = minRecord;
+ } else if (!maxRecord &&
+ (dynamic_cast<const LTMatchExpression*>(match) ||
+ dynamic_cast<const LTEMatchExpression*>(match))) {
+ maxRecord = record_id_helpers::keyForElem(element, collator);
+ } else if (!minRecord &&
+ (dynamic_cast<const GTMatchExpression*>(match) ||
+ dynamic_cast<const GTEMatchExpression*>(match))) {
+ minRecord = record_id_helpers::keyForElem(element, collator);
}
}
@@ -404,8 +434,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
if (params.clusteredInfo && !csn->resumeAfterRecordId) {
// This is a clustered collection. Attempt to perform an efficient, bounded collection scan
// via minRecord and maxRecord if applicable.
- handleRIDRangeScan(csn->filter.get(), csn.get(), params);
- handleRIDRangeMinMax(query, csn.get());
+ handleRIDRangeScan(csn->filter.get(), csn.get(), params, query.getCollator());
+ handleRIDRangeMinMax(query, csn.get(), params, query.getCollator());
}
return csn;
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index c6c670d607f..2651bd5f547 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -715,20 +715,33 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey();
- // Since the clusteredIndex doesn't have a specific collator, check if it is
- // compatible with the max and min using the same collator as the query.
+ // Check if the query collator is compatible with the collection collator for the
+ // provided min and max values.
if ((!minObj.isEmpty() &&
- !indexCompatibleMaxMin(
- minObj, query.getCollator(), query.getCollator(), clusterKey)) ||
+ !indexCompatibleMaxMin(minObj,
+ query.getCollator(),
+ params.clusteredCollectionCollator,
+ clusterKey)) ||
(!maxObj.isEmpty() &&
- !indexCompatibleMaxMin(
- maxObj, query.getCollator(), query.getCollator(), clusterKey))) {
+ !indexCompatibleMaxMin(maxObj,
+ query.getCollator(),
+ params.clusteredCollectionCollator,
+ clusterKey))) {
return Status(ErrorCodes::Error(6137400),
"The clustered index is not compatible with the values provided "
- "for min/max");
+ "for min/max due to the query collation");
}
- if (!minObj.isEmpty() && !maxObj.isEmpty() && minObj.woCompare(maxObj) >= 0) {
+ auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() {
+ if (collator) {
+ auto min = stripFieldNamesAndApplyCollation(minObj, collator);
+ auto max = stripFieldNamesAndApplyCollation(maxObj, collator);
+ return min.woCompare(max) < 0;
+ } else {
+ return minObj.woCompare(maxObj) < 0;
+ }
+ };
+ if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) {
return Status(ErrorCodes::Error(6137401), "max() must be greater than min()");
}
}
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 8a9f14dcf7d..ad34bdd1ee6 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -33,6 +33,7 @@
#include "mongo/db/catalog/clustered_collection_options_gen.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/index_entry.h"
#include "mongo/db/query/query_knobs_gen.h"
@@ -142,6 +143,10 @@ struct QueryPlannerParams {
// Specifies the clusteredIndex information necessary to utilize the cluster key in bounded
// collection scans and other query operations.
boost::optional<ClusteredCollectionInfo> clusteredInfo;
+
+ // Specifies the collator information necessary to utilize the cluster key in bounded
+ // collection scans and other query operations.
+ const CollatorInterface* clusteredCollectionCollator;
};
} // namespace mongo
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index 75663948cf6..35925a75019 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -37,6 +37,7 @@
#include "mongo/bson/timestamp.h"
#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/record_id.h"
#include "mongo/db/storage/key_string.h"
#include "mongo/logv2/redaction.h"
@@ -83,7 +84,9 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
return keyForOptime(elem.timestamp());
}
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec) {
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+ const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator) {
// Get the collection's cluster key field name
const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec);
// Build a RecordId using the cluster key.
@@ -94,15 +97,21 @@ StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& ind
<< clusterKeyField << "' field"};
}
- return keyForElem(keyElement);
+ return keyForElem(keyElement, collator);
}
-RecordId keyForElem(const BSONElement& elem) {
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator) {
// Intentionally discard the TypeBits since the type information will be stored in the cluster
// key of the original document. The consequence of this behavior is that cluster key values
// that compare similarly, but are of different types may not be used concurrently.
KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
- keyBuilder.appendBSONElement(elem);
+ if (collator) {
+ BSONObjBuilder out;
+ CollationIndexKey::collationAwareIndexKeyAppend(elem, collator, &out);
+ keyBuilder.appendBSONElement(out.done().firstElement());
+ } else {
+ keyBuilder.appendBSONElement(elem);
+ }
return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
}
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 1d984c94a1e..4627d134f9b 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -33,6 +33,7 @@
#include "mongo/base/status_with.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/catalog/clustered_collection_options_gen.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/storage/key_format.h"
namespace mongo {
@@ -49,8 +50,10 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
/**
* For clustered collections, converts various values into a RecordId.
*/
-StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec);
-RecordId keyForElem(const BSONElement& elem);
+StatusWith<RecordId> keyForDoc(const BSONObj& doc,
+ const ClusteredIndexSpec& indexSpec,
+ const CollatorInterface* collator);
+RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator);
RecordId keyForOID(OID oid);
RecordId keyForDate(Date_t date);
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index 773210d4cf3..0066240cc2f 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -212,7 +212,7 @@ BaseCloner::AfterStageBehavior CollectionCloner::listIndexesStage() {
invariant(_collectionOptions.clusteredIndex);
invariant(spec.getBoolField("clustered") == true);
invariant(clustered_util::formatClusterKeyForListIndexes(
- _collectionOptions.clusteredIndex.get())
+ _collectionOptions.clusteredIndex.get(), _collectionOptions.collation)
.woCompare(spec) == 0);
// Skip if the spec is for the collection's clusteredIndex.
} else if (spec.hasField("buildUUID")) {
diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp
index 79a98205972..c24c170e988 100644
--- a/src/mongo/db/repl/dbcheck.cpp
+++ b/src/mongo/db/repl/dbcheck.cpp
@@ -247,10 +247,14 @@ DbCheckHasher::DbCheckHasher(OperationContext* opCtx,
InternalPlanner::IXSCAN_FETCH);
} else {
CollectionScanParams params;
- params.minRecord = uassertStatusOK(record_id_helpers::keyForDoc(
- start.obj(), collection->getClusteredInfo()->getIndexSpec()));
- params.maxRecord = uassertStatusOK(record_id_helpers::keyForDoc(
- end.obj(), collection->getClusteredInfo()->getIndexSpec()));
+ params.minRecord = uassertStatusOK(
+ record_id_helpers::keyForDoc(start.obj(),
+ collection->getClusteredInfo()->getIndexSpec(),
+ collection->getDefaultCollator()));
+ params.maxRecord = uassertStatusOK(
+ record_id_helpers::keyForDoc(end.obj(),
+ collection->getClusteredInfo()->getIndexSpec(),
+ collection->getDefaultCollator()));
params.boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly;
_exec = InternalPlanner::collectionScan(
opCtx, &collection, params, PlanYieldPolicy::YieldPolicy::NO_YIELD);
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 5dd66d101d2..3c61a2de652 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -732,13 +732,16 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments(
"bounded collection scans only support forward scans");
}
+ auto collator = collection->getDefaultCollator();
boost::optional<RecordId> minRecord, maxRecord;
if (!startKey.isEmpty()) {
- minRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement()));
+ minRecord =
+ RecordId(record_id_helpers::keyForElem(startKey.firstElement(), collator));
}
if (!endKey.isEmpty()) {
- maxRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement()));
+ maxRecord =
+ RecordId(record_id_helpers::keyForElem(endKey.firstElement(), collator));
}
planExecutor = isFind
diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp
index dd6a586c1a0..eec2f336fc7 100644
--- a/src/mongo/db/storage/record_store_test_harness.cpp
+++ b/src/mongo/db/storage/record_store_test_harness.cpp
@@ -428,7 +428,7 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) {
recordData.makeOwned();
RecordId id = uassertStatusOK(
- record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec()));
+ record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec(), nullptr));
records.push_back({id, recordData});
}
diff --git a/src/mongo/dbtests/query_stage_collscan.cpp b/src/mongo/dbtests/query_stage_collscan.cpp
index cd041e97f4a..c1aaff74825 100644
--- a/src/mongo/dbtests/query_stage_collscan.cpp
+++ b/src/mongo/dbtests/query_stage_collscan.cpp
@@ -209,10 +209,10 @@ public:
_client.insert(ns.ns(), docs, ordered);
}
- // Returns the recordId generated by doc, assuming doc takes the shape of {<cluster key> :
- // <value>};
+ // Returns the recordId generated by doc, assuming there's no collation and doc takes the shape
+ // of {<cluster key> : <value>};
RecordId getRecordIdForClusteredDoc(const BSONObj& doc) {
- return RecordId(record_id_helpers::keyForElem(doc.firstElement()));
+ return RecordId(record_id_helpers::keyForElem(doc.firstElement(), nullptr));
}
// Performs a bounded collection scan from 'minRecord' to 'maxRecord' in the specified