diff options
author | Daniel Gómez Ferro <daniel.gomezferro@mongodb.com> | 2022-01-18 15:38:18 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-01-18 16:58:37 +0000 |
commit | 754ea766abe5682738764d90b0d0c1d49600774a (patch) | |
tree | ef4db95e4ade57eead6d8b3566477ad974a39350 /src | |
parent | 31a411b86fafc6ca303f697dd0a88d7e32acc483 (diff) | |
download | mongo-754ea766abe5682738764d90b0d0c1d49600774a.tar.gz |
SERVER-61260 Support collations on clustered collections
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/catalog/clustered_collection_util.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/catalog/clustered_collection_util.h | 6 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_impl.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/catalog/list_indexes.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/catalog/validate_adaptor.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/internal_plans.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 90 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 5 | ||||
-rw-r--r-- | src/mongo/db/record_id_helpers.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/record_id_helpers.h | 7 | ||||
-rw-r--r-- | src/mongo/db/repl/collection_cloner.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/dbcheck.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/repl/storage_interface_impl.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/storage/record_store_test_harness.cpp | 2 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_collscan.cpp | 6 |
19 files changed, 171 insertions, 85 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 2c6e794531f..dd869d95e19 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1279,6 +1279,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/catalog/clustered_collection_options', + '$BUILD_DIR/mongo/db/query/collation/collator_interface', '$BUILD_DIR/mongo/db/storage/key_string', ],) diff --git a/src/mongo/db/catalog/clustered_collection_util.cpp b/src/mongo/db/catalog/clustered_collection_util.cpp index d71c6360050..7a0222bfa6b 100644 --- a/src/mongo/db/catalog/clustered_collection_util.cpp +++ b/src/mongo/db/catalog/clustered_collection_util.cpp @@ -109,14 +109,17 @@ bool requiresLegacyFormat(const NamespaceString& nss) { return nss.isTimeseriesBucketsCollection() || nss.isChangeStreamPreImagesCollection(); } -BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo) { +BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo, + const BSONObj& collation) { BSONObjBuilder bob; collInfo.getIndexSpec().serialize(&bob); + if (!collation.isEmpty()) { + bob.append("collation", collation); + } bob.append("clustered", true); return bob.obj(); } - bool isClusteredOnId(const boost::optional<ClusteredCollectionInfo>& collInfo) { return clustered_util::matchesClusterKey(BSON("_id" << 1), collInfo); } diff --git a/src/mongo/db/catalog/clustered_collection_util.h b/src/mongo/db/catalog/clustered_collection_util.h index 2dd603430de..c20418b3db8 100644 --- a/src/mongo/db/catalog/clustered_collection_util.h +++ b/src/mongo/db/catalog/clustered_collection_util.h @@ -69,9 +69,11 @@ bool requiresLegacyFormat(const NamespaceString& nss); /** * listIndexes requires the ClusteredIndexSpec be formatted with an additional field 'clustered: - * true' to indicate it is a clustered index. + * true' to indicate it is a clustered index and with the collection's default collation. If the + * collection has the 'simple' collation this expects an empty BSONObj. */ -BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo); +BSONObj formatClusterKeyForListIndexes(const ClusteredCollectionInfo& collInfo, + const BSONObj& collation); /** * Returns true if the BSON object matches the collection's cluster key. Caller's should ensure diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index eb8212cd85b..8cf7e7aa320 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -824,8 +824,8 @@ Status CollectionImpl::insertDocumentForBulkLoader( RecordId recordId; if (isClustered()) { invariant(_shared->_recordStore->keyFormat() == KeyFormat::String); - recordId = - uassertStatusOK(record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec())); + recordId = uassertStatusOK(record_id_helpers::keyForDoc( + doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator())); } // Using timestamp 0 for these inserts, which are non-oplog so we don't have an appropriate @@ -908,8 +908,8 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx, RecordId recordId; if (isClustered()) { invariant(_shared->_recordStore->keyFormat() == KeyFormat::String); - recordId = uassertStatusOK( - record_id_helpers::keyForDoc(doc, getClusteredInfo()->getIndexSpec())); + recordId = uassertStatusOK(record_id_helpers::keyForDoc( + doc, getClusteredInfo()->getIndexSpec(), getDefaultCollator())); } if (MONGO_unlikely(corruptDocumentOnInsert.shouldFail())) { diff --git a/src/mongo/db/catalog/list_indexes.cpp b/src/mongo/db/catalog/list_indexes.cpp index 780cc3c0b12..ce1515a4c13 100644 --- a/src/mongo/db/catalog/list_indexes.cpp +++ b/src/mongo/db/catalog/list_indexes.cpp @@ -79,8 +79,12 @@ std::list<BSONObj> listIndexesInLock(OperationContext* opCtx, collection->getAllIndexes(&indexNames); if (collection->isClustered() && !collection->ns().isTimeseriesBucketsCollection()) { + BSONObj collation; + if (auto collator = collection->getDefaultCollator()) { + collation = collator->getSpec().toBSON(); + } auto clusteredSpec = clustered_util::formatClusterKeyForListIndexes( - collection->getClusteredInfo().get()); + collection->getClusteredInfo().get(), collation); if (additionalInclude == ListIndexesInclude::IndexBuildInfo) { indexSpecs.push_back(BSON("spec"_sd << clusteredSpec)); } else { diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index 4c715746119..46b0439b29d 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -75,8 +75,9 @@ void _validateClusteredCollectionRecordId(OperationContext* opCtx, const RecordId& rid, const BSONObj& doc, const ClusteredIndexSpec& indexSpec, + const CollatorInterface* collator, ValidateResults* results) { - const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec); + const auto ridFromDoc = record_id_helpers::keyForDoc(doc, indexSpec, collator); if (!ridFromDoc.isOK()) { results->valid = false; results->errors.push_back(str::stream() << rid << " " << ridFromDoc.getStatus().reason()); @@ -119,8 +120,12 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, const CollectionPtr& coll = _validateState->getCollection(); if (coll->isClustered()) { - _validateClusteredCollectionRecordId( - opCtx, recordId, recordBson, coll->getClusteredInfo()->getIndexSpec(), results); + _validateClusteredCollectionRecordId(opCtx, + recordId, + recordBson, + coll->getClusteredInfo()->getIndexSpec(), + coll->getDefaultCollator(), + results); } auto& executionCtx = StorageExecutionContext::get(opCtx); diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp index 75fea08ded7..c7eb0d6e33f 100644 --- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp +++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp @@ -159,9 +159,10 @@ public: // Set up the new collection scan to start from the 'minPreImageId'. void setupPlanExecutor(boost::optional<ChangeStreamPreImageId> minPreImageId) { const auto minRecordId = - (minPreImageId ? boost::optional<RecordId>(record_id_helpers::keyForElem( - BSON("_id" << minPreImageId->toBSON()).firstElement())) - : boost::none); + (minPreImageId + ? boost::optional<RecordId>(record_id_helpers::keyForElem( + BSON("_id" << minPreImageId->toBSON()).firstElement(), nullptr)) + : boost::none); _planExecutor = InternalPlanner::collectionScan(_opCtx, _preImagesCollPtr, @@ -237,19 +238,20 @@ void deleteExpiredChangeStreamPreImages(Client* client) { for (auto it = expiredPreImages.begin(); it != expiredPreImages.end(); ++it) { it.saveState(); - writeConflictRetry( - opCtx.get(), - "ChangeStreamExpiredPreImagesRemover", - NamespaceString::kChangeStreamPreImagesNamespace.ns(), - [&] { - WriteUnitOfWork wuow(opCtx.get()); - const auto recordId = - record_id_helpers::keyForElem(it->getField(ChangeStreamPreImage::kIdFieldName)); - preImagesColl->deleteDocument( - opCtx.get(), kUninitializedStmtId, recordId, &CurOp::get(*opCtx)->debug()); - wuow.commit(); - numberOfRemovals++; - }); + writeConflictRetry(opCtx.get(), + "ChangeStreamExpiredPreImagesRemover", + NamespaceString::kChangeStreamPreImagesNamespace.ns(), + [&] { + WriteUnitOfWork wuow(opCtx.get()); + const auto recordId = record_id_helpers::keyForElem( + it->getField(ChangeStreamPreImage::kIdFieldName), nullptr); + preImagesColl->deleteDocument(opCtx.get(), + kUninitializedStmtId, + recordId, + &CurOp::get(*opCtx)->debug()); + wuow.commit(); + numberOfRemovals++; + }); it.restoreState(); } diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 91f17d08a30..6898ff287a5 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -359,6 +359,7 @@ void fillOutPlannerParams(OperationContext* opCtx, if (collection->isClustered()) { plannerParams->clusteredInfo = collection->getClusteredInfo(); + plannerParams->clusteredCollectionCollator = collection->getDefaultCollator(); } } diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp index 649962c31cb..0f45d3b4649 100644 --- a/src/mongo/db/query/internal_plans.cpp +++ b/src/mongo/db/query/internal_plans.cpp @@ -77,13 +77,14 @@ CollectionScanParams convertIndexScanParamsToCollScanParams( dassert(collection->isClustered() && clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo())); + invariant(collection->getDefaultCollator() == nullptr); boost::optional<RecordId> startRecord, endRecord; if (!startKey.isEmpty()) { - startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement())); + startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement(), nullptr)); } if (!endKey.isEmpty()) { - endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement())); + endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement(), nullptr)); } // For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord. diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 77f0ff9d90c..58652f73279 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -211,11 +211,41 @@ bool isOplogTsLowerBoundPred(const mongo::MatchExpression* me) { return me->path() == repl::OpTime::kTimestampFieldName; } +// True if the element type is affected by a collator (i.e. it is or contains a String). +bool affectedByCollator(const BSONElement& element) { + switch (element.type()) { + case BSONType::String: + return true; + case BSONType::Array: + case BSONType::Object: + for (const auto& sub : element.Obj()) { + if (affectedByCollator(sub)) + return true; + } + return false; + default: + return false; + } +} + +// Returns whether element is not affected by collators or query and collection collators are +// compatible. +bool compatibleCollator(const QueryPlannerParams& params, + const CollatorInterface* queryCollator, + const BSONElement& element) { + auto const collCollator = params.clusteredCollectionCollator; + bool compatible = !queryCollator || (collCollator && *queryCollator == *collCollator); + return compatible || !affectedByCollator(element); +} + /** * Helper function that checks to see if min() or max() were provided along with the query. If so, * adjusts the collection scan bounds to fit the constraints. */ -void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collScan) { +void handleRIDRangeMinMax(const CanonicalQuery& query, + CollectionScanNode* collScan, + const QueryPlannerParams& params, + const CollatorInterface* collator) { BSONObj minObj = query.getFindCommandRequest().getMin(); BSONObj maxObj = query.getFindCommandRequest().getMax(); if (minObj.isEmpty() && maxObj.isEmpty()) { @@ -232,17 +262,17 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS collScan->direction == 1); boost::optional<RecordId> newMinRecord, newMaxRecord; - if (!maxObj.isEmpty()) { + if (!maxObj.isEmpty() && compatibleCollator(params, collator, maxObj.firstElement())) { // max() is exclusive. // Assumes clustered collection scans are only supported with the forward direction. collScan->boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly; - newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement()); + newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement(), collator); } - if (!minObj.isEmpty()) { + if (!minObj.isEmpty() && compatibleCollator(params, collator, minObj.firstElement())) { // The min() is inclusive as are bounded collection scans by default. - newMinRecord = record_id_helpers::keyForElem(minObj.firstElement()); + newMinRecord = record_id_helpers::keyForElem(minObj.firstElement(), collator); } if (!collScan->minRecord) { @@ -271,7 +301,8 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, CollectionScanNode* collS */ void handleRIDRangeScan(const MatchExpression* conjunct, CollectionScanNode* collScan, - const QueryPlannerParams& params) { + const QueryPlannerParams& params, + const CollatorInterface* collator) { invariant(params.clusteredInfo); if (conjunct == nullptr) { @@ -281,7 +312,7 @@ void handleRIDRangeScan(const MatchExpression* conjunct, auto* andMatchPtr = dynamic_cast<const AndMatchExpression*>(conjunct); if (andMatchPtr != nullptr) { for (size_t index = 0; index < andMatchPtr->numChildren(); index++) { - handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params); + handleRIDRangeScan(andMatchPtr->getChild(index), collScan, params, collator); } return; } @@ -292,31 +323,30 @@ void handleRIDRangeScan(const MatchExpression* conjunct, return; } - const bool hasMaxRecord = collScan->maxRecord.has_value(); - const bool hasMinRecord = collScan->minRecord.has_value(); - - if (!hasMinRecord && !hasMaxRecord) { - if (auto eq = dynamic_cast<const EqualityMatchExpression*>(conjunct)) { - collScan->minRecord = record_id_helpers::keyForElem(eq->getData()); - collScan->maxRecord = collScan->minRecord; - return; - } + auto match = dynamic_cast<const ComparisonMatchExpression*>(conjunct); + if (match == nullptr) { + return; // Not a comparison match expression. } - if (!hasMaxRecord) { - if (auto ltConjunct = dynamic_cast<const LTMatchExpression*>(conjunct)) { - collScan->maxRecord = record_id_helpers::keyForElem(ltConjunct->getData()); - } else if (auto lteConjunct = dynamic_cast<const LTEMatchExpression*>(conjunct)) { - collScan->maxRecord = record_id_helpers::keyForElem(lteConjunct->getData()); - } + const auto& element = match->getData(); + bool compatible = compatibleCollator(params, collator, element); + if (!compatible) { + return; // Collator affects probe and it's not compatible with collection's collator. } - if (!hasMinRecord) { - if (auto gtConjunct = dynamic_cast<const GTMatchExpression*>(conjunct)) { - collScan->minRecord = record_id_helpers::keyForElem(gtConjunct->getData()); - } else if (auto gteConjunct = dynamic_cast<const GTEMatchExpression*>(conjunct)) { - collScan->minRecord = record_id_helpers::keyForElem(gteConjunct->getData()); - } + auto& maxRecord = collScan->maxRecord; + auto& minRecord = collScan->minRecord; + if (dynamic_cast<const EqualityMatchExpression*>(match)) { + minRecord = record_id_helpers::keyForElem(element, collator); + maxRecord = minRecord; + } else if (!maxRecord && + (dynamic_cast<const LTMatchExpression*>(match) || + dynamic_cast<const LTEMatchExpression*>(match))) { + maxRecord = record_id_helpers::keyForElem(element, collator); + } else if (!minRecord && + (dynamic_cast<const GTMatchExpression*>(match) || + dynamic_cast<const GTEMatchExpression*>(match))) { + minRecord = record_id_helpers::keyForElem(element, collator); } } @@ -404,8 +434,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan( if (params.clusteredInfo && !csn->resumeAfterRecordId) { // This is a clustered collection. Attempt to perform an efficient, bounded collection scan // via minRecord and maxRecord if applicable. - handleRIDRangeScan(csn->filter.get(), csn.get(), params); - handleRIDRangeMinMax(query, csn.get()); + handleRIDRangeScan(csn->filter.get(), csn.get(), params, query.getCollator()); + handleRIDRangeMinMax(query, csn.get(), params, query.getCollator()); } return csn; diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index c6c670d607f..2651bd5f547 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -715,20 +715,33 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey(); - // Since the clusteredIndex doesn't have a specific collator, check if it is - // compatible with the max and min using the same collator as the query. + // Check if the query collator is compatible with the collection collator for the + // provided min and max values. if ((!minObj.isEmpty() && - !indexCompatibleMaxMin( - minObj, query.getCollator(), query.getCollator(), clusterKey)) || + !indexCompatibleMaxMin(minObj, + query.getCollator(), + params.clusteredCollectionCollator, + clusterKey)) || (!maxObj.isEmpty() && - !indexCompatibleMaxMin( - maxObj, query.getCollator(), query.getCollator(), clusterKey))) { + !indexCompatibleMaxMin(maxObj, + query.getCollator(), + params.clusteredCollectionCollator, + clusterKey))) { return Status(ErrorCodes::Error(6137400), "The clustered index is not compatible with the values provided " - "for min/max"); + "for min/max due to the query collation"); } - if (!minObj.isEmpty() && !maxObj.isEmpty() && minObj.woCompare(maxObj) >= 0) { + auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() { + if (collator) { + auto min = stripFieldNamesAndApplyCollation(minObj, collator); + auto max = stripFieldNamesAndApplyCollation(maxObj, collator); + return min.woCompare(max) < 0; + } else { + return minObj.woCompare(maxObj) < 0; + } + }; + if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) { return Status(ErrorCodes::Error(6137401), "max() must be greater than min()"); } } diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 8a9f14dcf7d..ad34bdd1ee6 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -33,6 +33,7 @@ #include "mongo/db/catalog/clustered_collection_options_gen.h" #include "mongo/db/jsobj.h" +#include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/query/index_entry.h" #include "mongo/db/query/query_knobs_gen.h" @@ -142,6 +143,10 @@ struct QueryPlannerParams { // Specifies the clusteredIndex information necessary to utilize the cluster key in bounded // collection scans and other query operations. boost::optional<ClusteredCollectionInfo> clusteredInfo; + + // Specifies the collator information necessary to utilize the cluster key in bounded + // collection scans and other query operations. + const CollatorInterface* clusteredCollectionCollator; }; } // namespace mongo diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp index 75663948cf6..35925a75019 100644 --- a/src/mongo/db/record_id_helpers.cpp +++ b/src/mongo/db/record_id_helpers.cpp @@ -37,6 +37,7 @@ #include "mongo/bson/timestamp.h" #include "mongo/db/catalog/clustered_collection_util.h" #include "mongo/db/jsobj.h" +#include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/record_id.h" #include "mongo/db/storage/key_string.h" #include "mongo/logv2/redaction.h" @@ -83,7 +84,9 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) { return keyForOptime(elem.timestamp()); } -StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec) { +StatusWith<RecordId> keyForDoc(const BSONObj& doc, + const ClusteredIndexSpec& indexSpec, + const CollatorInterface* collator) { // Get the collection's cluster key field name const auto clusterKeyField = clustered_util::getClusterKeyFieldName(indexSpec); // Build a RecordId using the cluster key. @@ -94,15 +97,21 @@ StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& ind << clusterKeyField << "' field"}; } - return keyForElem(keyElement); + return keyForElem(keyElement, collator); } -RecordId keyForElem(const BSONElement& elem) { +RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator) { // Intentionally discard the TypeBits since the type information will be stored in the cluster // key of the original document. The consequence of this behavior is that cluster key values // that compare similarly, but are of different types may not be used concurrently. KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion); - keyBuilder.appendBSONElement(elem); + if (collator) { + BSONObjBuilder out; + CollationIndexKey::collationAwareIndexKeyAppend(elem, collator, &out); + keyBuilder.appendBSONElement(out.done().firstElement()); + } else { + keyBuilder.appendBSONElement(elem); + } return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize()); } diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h index 1d984c94a1e..4627d134f9b 100644 --- a/src/mongo/db/record_id_helpers.h +++ b/src/mongo/db/record_id_helpers.h @@ -33,6 +33,7 @@ #include "mongo/base/status_with.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/catalog/clustered_collection_options_gen.h" +#include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/storage/key_format.h" namespace mongo { @@ -49,8 +50,10 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime); /** * For clustered collections, converts various values into a RecordId. */ -StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec); -RecordId keyForElem(const BSONElement& elem); +StatusWith<RecordId> keyForDoc(const BSONObj& doc, + const ClusteredIndexSpec& indexSpec, + const CollatorInterface* collator); +RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator); RecordId keyForOID(OID oid); RecordId keyForDate(Date_t date); diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp index 773210d4cf3..0066240cc2f 100644 --- a/src/mongo/db/repl/collection_cloner.cpp +++ b/src/mongo/db/repl/collection_cloner.cpp @@ -212,7 +212,7 @@ BaseCloner::AfterStageBehavior CollectionCloner::listIndexesStage() { invariant(_collectionOptions.clusteredIndex); invariant(spec.getBoolField("clustered") == true); invariant(clustered_util::formatClusterKeyForListIndexes( - _collectionOptions.clusteredIndex.get()) + _collectionOptions.clusteredIndex.get(), _collectionOptions.collation) .woCompare(spec) == 0); // Skip if the spec is for the collection's clusteredIndex. } else if (spec.hasField("buildUUID")) { diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp index 79a98205972..c24c170e988 100644 --- a/src/mongo/db/repl/dbcheck.cpp +++ b/src/mongo/db/repl/dbcheck.cpp @@ -247,10 +247,14 @@ DbCheckHasher::DbCheckHasher(OperationContext* opCtx, InternalPlanner::IXSCAN_FETCH); } else { CollectionScanParams params; - params.minRecord = uassertStatusOK(record_id_helpers::keyForDoc( - start.obj(), collection->getClusteredInfo()->getIndexSpec())); - params.maxRecord = uassertStatusOK(record_id_helpers::keyForDoc( - end.obj(), collection->getClusteredInfo()->getIndexSpec())); + params.minRecord = uassertStatusOK( + record_id_helpers::keyForDoc(start.obj(), + collection->getClusteredInfo()->getIndexSpec(), + collection->getDefaultCollator())); + params.maxRecord = uassertStatusOK( + record_id_helpers::keyForDoc(end.obj(), + collection->getClusteredInfo()->getIndexSpec(), + collection->getDefaultCollator())); params.boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly; _exec = InternalPlanner::collectionScan( opCtx, &collection, params, PlanYieldPolicy::YieldPolicy::NO_YIELD); diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 5dd66d101d2..3c61a2de652 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -732,13 +732,16 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments( "bounded collection scans only support forward scans"); } + auto collator = collection->getDefaultCollator(); boost::optional<RecordId> minRecord, maxRecord; if (!startKey.isEmpty()) { - minRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement())); + minRecord = + RecordId(record_id_helpers::keyForElem(startKey.firstElement(), collator)); } if (!endKey.isEmpty()) { - maxRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement())); + maxRecord = + RecordId(record_id_helpers::keyForElem(endKey.firstElement(), collator)); } planExecutor = isFind diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp index dd6a586c1a0..eec2f336fc7 100644 --- a/src/mongo/db/storage/record_store_test_harness.cpp +++ b/src/mongo/db/storage/record_store_test_harness.cpp @@ -428,7 +428,7 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) { recordData.makeOwned(); RecordId id = uassertStatusOK( - record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec())); + record_id_helpers::keyForDoc(doc, options.clusteredIndex->getIndexSpec(), nullptr)); records.push_back({id, recordData}); } diff --git a/src/mongo/dbtests/query_stage_collscan.cpp b/src/mongo/dbtests/query_stage_collscan.cpp index cd041e97f4a..c1aaff74825 100644 --- a/src/mongo/dbtests/query_stage_collscan.cpp +++ b/src/mongo/dbtests/query_stage_collscan.cpp @@ -209,10 +209,10 @@ public: _client.insert(ns.ns(), docs, ordered); } - // Returns the recordId generated by doc, assuming doc takes the shape of {<cluster key> : - // <value>}; + // Returns the recordId generated by doc, assuming there's no collation and doc takes the shape + // of {<cluster key> : <value>}; RecordId getRecordIdForClusteredDoc(const BSONObj& doc) { - return RecordId(record_id_helpers::keyForElem(doc.firstElement())); + return RecordId(record_id_helpers::keyForElem(doc.firstElement(), nullptr)); } // Performs a bounded collection scan from 'minRecord' to 'maxRecord' in the specified |