diff options
author | Katherine Wu <katherine.wu@mongodb.com> | 2021-04-09 10:03:38 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-30 15:51:08 +0000 |
commit | 6f026d8a741553b49177ae1fdfd8c0e967baa2d8 (patch) | |
tree | 023dd6beddb13d5dec866180b4542f0f95552815 | |
parent | 49f6fcebd3c4a97abcd05c862ad1204eed9d66ef (diff) | |
download | mongo-6f026d8a741553b49177ae1fdfd8c0e967baa2d8.tar.gz |
SERVER-55790 Ensure query rewrites for time-series collections are correct in the case of mismatched collations
-rw-r--r-- | src/mongo/db/commands/map_reduce_agg.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/commands/run_aggregate.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_context.h | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.h | 31 |
5 files changed, 59 insertions, 22 deletions
diff --git a/src/mongo/db/commands/map_reduce_agg.cpp b/src/mongo/db/commands/map_reduce_agg.cpp index d86b4679814..43a66fb2314 100644 --- a/src/mongo/db/commands/map_reduce_agg.cpp +++ b/src/mongo/db/commands/map_reduce_agg.cpp @@ -69,7 +69,7 @@ auto makeExpressionContext(OperationContext* opCtx, "mapReduce on a view is not supported", !ctx.getView()); - auto resolvedCollator = PipelineD::resolveCollator( + auto [resolvedCollator, _] = PipelineD::resolveCollator( opCtx, parsedMr.getCollation().get_value_or(BSONObj()), ctx.getCollection()); // The UUID of the collection for the execution namespace of this aggregation. diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 25a87ca9d06..e458871d695 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -446,7 +446,8 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext( OperationContext* opCtx, const AggregateCommandRequest& request, std::unique_ptr<CollatorInterface> collator, - boost::optional<UUID> uuid) { + boost::optional<UUID> uuid, + ExpressionContext::CollationMatchesDefault collationMatchesDefault) { setIgnoredShardVersionForMergeCursors(opCtx, request); boost::intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(opCtx, @@ -458,6 +459,7 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext( CurOp::get(opCtx)->dbProfileLevel() > 0); expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp"; expCtx->inMultiDocumentTransaction = opCtx->inMultiDocumentTransaction(); + expCtx->collationMatchesDefault = collationMatchesDefault; return expCtx; } @@ -522,7 +524,8 @@ std::vector<std::unique_ptr<Pipeline, PipelineDeleter>> createExchangePipelinesI request, expCtx->getCollator() ? expCtx->getCollator()->clone() : nullptr, - uuid); + uuid, + expCtx->collationMatchesDefault); // Create a new pipeline for the consumer consisting of a single // DocumentSourceExchange. @@ -579,6 +582,7 @@ Status runAggregate(OperationContext* opCtx, // The collation to use for this aggregation. boost::optional to distinguish between the case // where the collation has not yet been resolved, and where it has been resolved to nullptr. boost::optional<std::unique_ptr<CollatorInterface>> collatorToUse; + ExpressionContext::CollationMatchesDefault collatorToUseMatchesDefault; // The UUID of the collection for the execution namespace of this aggregation. boost::optional<UUID> uuid; @@ -637,8 +641,10 @@ Status runAggregate(OperationContext* opCtx, // If the user specified an explicit collation, adopt it; otherwise, use the simple // collation. We do not inherit the collection's default collation or UUID, since // the stream may be resuming from a point before the current UUID existed. - collatorToUse.emplace(PipelineD::resolveCollator( - opCtx, request.getCollation().get_value_or(BSONObj()), nullptr)); + auto [collator, match] = PipelineD::resolveCollator( + opCtx, request.getCollation().get_value_or(BSONObj()), nullptr); + collatorToUse.emplace(std::move(collator)); + collatorToUseMatchesDefault = match; // Obtain collection locks on the execution namespace; that is, the oplog. ctx.emplace(opCtx, nss, AutoGetCollectionViewMode::kViewsForbidden); @@ -654,13 +660,17 @@ Status runAggregate(OperationContext* opCtx, Top::LockType::NotLocked, AutoStatsTracker::LogMode::kUpdateTopAndCurOp, 0); - collatorToUse.emplace(PipelineD::resolveCollator( - opCtx, request.getCollation().get_value_or(BSONObj()), nullptr)); + auto [collator, match] = PipelineD::resolveCollator( + opCtx, request.getCollation().get_value_or(BSONObj()), nullptr); + collatorToUse.emplace(std::move(collator)); + collatorToUseMatchesDefault = match; } else { // This is a regular aggregation. Lock the collection or view. ctx.emplace(opCtx, nss, AutoGetCollectionViewMode::kViewsPermitted); - collatorToUse.emplace(PipelineD::resolveCollator( - opCtx, request.getCollation().get_value_or(BSONObj()), ctx->getCollection())); + auto [collator, match] = PipelineD::resolveCollator( + opCtx, request.getCollation().get_value_or(BSONObj()), ctx->getCollection()); + collatorToUse.emplace(std::move(collator)); + collatorToUseMatchesDefault = match; if (ctx->getCollection()) { uuid = ctx->getCollection()->uuid(); } @@ -752,7 +762,8 @@ Status runAggregate(OperationContext* opCtx, } invariant(collatorToUse); - expCtx = makeExpressionContext(opCtx, request, std::move(*collatorToUse), uuid); + expCtx = makeExpressionContext( + opCtx, request, std::move(*collatorToUse), uuid, collatorToUseMatchesDefault); auto pipeline = Pipeline::parse(request.getPipeline(), expCtx); diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 877c88fad55..5e1cd9d0b8e 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -545,7 +545,8 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje std::unique_ptr<MatchExpression> createComparisonPredicate( const ComparisonMatchExpression* matchExpr, const BucketSpec& bucketSpec, - int bucketMaxSpanSeconds) { + int bucketMaxSpanSeconds, + ExpressionContext::CollationMatchesDefault collationMatchesDefault) { using namespace timeseries; // The control field's min and max are chosen using a field-order insensitive comparator, while @@ -562,6 +563,14 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( if (matchExpr->getData().type() == BSONType::jstNULL) return nullptr; + // The control field's min and max are chosen based on the collation of the collection. If the + // query's collation does not match the collection's collation and the query operand is a + // string or compound type (skipped above) we will not perform this optimization. + if (collationMatchesDefault == ExpressionContext::CollationMatchesDefault::kNo && + matchExpr->getData().type() == BSONType::String) { + return nullptr; + } + // We must avoid mapping predicates on the meta field onto the control field. if (bucketSpec.metaField && (matchExpr->path() == bucketSpec.metaField.get() || @@ -658,7 +667,8 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( } else if (ComparisonMatchExpression::isComparisonMatchExpression(matchExpr)) { return createComparisonPredicate(static_cast<const ComparisonMatchExpression*>(matchExpr), _bucketUnpacker.bucketSpec(), - _bucketMaxSpanSeconds); + _bucketMaxSpanSeconds, + pExpCtx->collationMatchesDefault); } return nullptr; diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h index e75983b93b1..84d9f46236d 100644 --- a/src/mongo/db/pipeline/expression_context.h +++ b/src/mongo/db/pipeline/expression_context.h @@ -378,6 +378,11 @@ public: bool exprUnstableForApiV1 = false; bool exprDeprectedForApiV1 = false; + // Tracks whether the collator to use for the aggregation matches the default collation of the + // collection or view. For collectionless aggregates this is set to 'kNoDefaultCollation'. + enum class CollationMatchesDefault { kNoDefault, kYes, kNo }; + CollationMatchesDefault collationMatchesDefault = CollationMatchesDefault::kNoDefault; + protected: static const int kInterruptCheckPeriod = 128; diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h index 89214cb1b10..3dad7678d33 100644 --- a/src/mongo/db/pipeline/pipeline_d.h +++ b/src/mongo/db/pipeline/pipeline_d.h @@ -134,17 +134,28 @@ public: * Resolves the collator to either the user-specified collation or, if none was specified, to * the collection-default collation. */ - static std::unique_ptr<CollatorInterface> resolveCollator(OperationContext* opCtx, - BSONObj userCollation, - const CollectionPtr& collection) { - if (!userCollation.isEmpty()) { - return uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) - ->makeFromBSON(userCollation)); + static std::pair<std::unique_ptr<CollatorInterface>, ExpressionContext::CollationMatchesDefault> + resolveCollator(OperationContext* opCtx, + BSONObj userCollation, + const CollectionPtr& collection) { + if (!collection || !collection->getDefaultCollator()) { + return {userCollation.isEmpty() + ? nullptr + : uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) + ->makeFromBSON(userCollation)), + ExpressionContext::CollationMatchesDefault::kNoDefault}; } - - return (collection && collection->getDefaultCollator() - ? collection->getDefaultCollator()->clone() - : nullptr); + if (userCollation.isEmpty()) { + return {collection->getDefaultCollator()->clone(), + ExpressionContext::CollationMatchesDefault::kYes}; + } + auto userCollator = uassertStatusOK( + CollatorFactoryInterface::get(opCtx->getServiceContext())->makeFromBSON(userCollation)); + return { + std::move(userCollator), + CollatorInterface::collatorsMatch(collection->getDefaultCollator(), userCollator.get()) + ? ExpressionContext::CollationMatchesDefault::kYes + : ExpressionContext::CollationMatchesDefault::kNo}; } private: |