diff options
author | samontea <merciers.merciers@gmail.com> | 2021-11-16 14:54:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-14 22:02:37 +0000 |
commit | 8fb1d8039361bcbf3a0ab77127cc6712783485ca (patch) | |
tree | 2681117e701eaffd05e48ea49f4f444044a320a5 /src | |
parent | 56d055c507a94a70e31a014142934060b76710ed (diff) | |
download | mongo-8fb1d8039361bcbf3a0ab77127cc6712783485ca.tar.gz |
SERVER-60672 Simpler pushdown when timeseries collection has no mixed-schema buckets
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp | 63 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.h | 8 | ||||
-rw-r--r-- | src/mongo/db/views/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/views/resolved_view.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/views/resolved_view.h | 9 | ||||
-rw-r--r-- | src/mongo/db/views/view_catalog.cpp | 18 |
6 files changed, 114 insertions, 17 deletions
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index c6f353f3293..ed706683ba5 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -220,8 +220,10 @@ bool fieldIsComputed(BucketSpec spec, std::string field) { DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket( const boost::intrusive_ptr<ExpressionContext>& expCtx, BucketUnpacker bucketUnpacker, - int bucketMaxSpanSeconds) + int bucketMaxSpanSeconds, + bool assumeNoMixedSchemaData) : DocumentSource(kStageNameInternal, expCtx), + _assumeNoMixedSchemaData(assumeNoMixedSchemaData), _bucketUnpacker(std::move(bucketUnpacker)), _bucketMaxSpanSeconds{bucketMaxSpanSeconds} {} @@ -240,6 +242,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF auto hasTimeField = false; auto hasBucketMaxSpanSeconds = false; auto bucketMaxSpanSeconds = 0; + auto assumeClean = false; std::vector<std::string> computedMetaProjFields; for (auto&& elem : specElem.embeddedObject()) { auto fieldName = elem.fieldNameStringData(); @@ -268,6 +271,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF unpackerBehavior = fieldName == kInclude ? BucketUnpacker::Behavior::kInclude : BucketUnpacker::Behavior::kExclude; hasIncludeExclude = true; + } else if (fieldName == kAssumeNoMixedSchemaData) { + uassert(6067202, + str::stream() << "assumeClean field must be a bool, got: " << elem.type(), + elem.type() == BSONType::Bool); + assumeClean = elem.boolean(); } else if (fieldName == timeseries::kTimeFieldName) { uassert(5346504, str::stream() << "timeField field must be a string, got: " << elem.type(), @@ -326,7 +334,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF hasBucketMaxSpanSeconds); return make_intrusive<DocumentSourceInternalUnpackBucket>( - expCtx, BucketUnpacker{std::move(bucketSpec), unpackerBehavior}, bucketMaxSpanSeconds); + expCtx, + BucketUnpacker{std::move(bucketSpec), unpackerBehavior}, + bucketMaxSpanSeconds, + assumeClean); } boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createFromBsonExternal( @@ -338,6 +349,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF BucketSpec bucketSpec; auto hasTimeField = false; + auto assumeClean = false; for (auto&& elem : specElem.embeddedObject()) { auto fieldName = elem.fieldNameStringData(); // We only expose "timeField" and "metaField" as parameters in $_unpackBucket. @@ -356,6 +368,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF str::stream() << "metaField field must be a single-element field path", metaField.find('.') == std::string::npos); bucketSpec.metaField = std::move(metaField); + } else if (fieldName == kAssumeNoMixedSchemaData) { + uassert(6067203, + str::stream() << "assumeClean field must be a bool, got: " << elem.type(), + elem.type() == BSONType::Bool); + assumeClean = elem.boolean(); } else { uasserted(5612404, str::stream() << "unrecognized parameter to $_unpackBucket: " << fieldName); @@ -366,7 +383,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF hasTimeField); return make_intrusive<DocumentSourceInternalUnpackBucket>( - expCtx, BucketUnpacker{std::move(bucketSpec), BucketUnpacker::Behavior::kExclude}, 3600); + expCtx, + BucketUnpacker{std::move(bucketSpec), BucketUnpacker::Behavior::kExclude}, + 3600, + assumeClean); } void DocumentSourceInternalUnpackBucket::serializeToArray( @@ -394,6 +414,8 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( out.addField(timeseries::kMetaFieldName, Value{*spec.metaField}); } out.addField(kBucketMaxSpanSeconds, Value{_bucketMaxSpanSeconds}); + if (_assumeNoMixedSchemaData) + out.addField(kAssumeNoMixedSchemaData, Value(_assumeNoMixedSchemaData)); if (!spec.computedMetaProjFields.empty()) out.addField("computedMetaProjFields", Value{[&] { @@ -545,12 +567,22 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje * Creates a predicate that ensures that if there exists a subpath of matchExprPath such that the * type of `control.min.subpath` is not the same as `control.max.subpath` then we will match that * document. + * + * However, if the buckets collection has no mixed-schema data then this type-equality predicate is + * unnecessary. In that case this function returns an empty, always-true predicate. */ std::unique_ptr<MatchExpression> createTypeEqualityPredicate( - boost::intrusive_ptr<ExpressionContext> pExpCtx, const StringData& matchExprPath) { + boost::intrusive_ptr<ExpressionContext> pExpCtx, + const StringData& matchExprPath, + bool assumeNoMixedSchemaData) { + + std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates; + + if (assumeNoMixedSchemaData) + return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates)); + FieldPath matchExprField(matchExprPath); using namespace timeseries; - std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates; // Assume that we're generating a predicate on "a.b" for (size_t i = 0; i < matchExprField.getPathLength(); i++) { @@ -595,7 +627,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( const BucketSpec& bucketSpec, int bucketMaxSpanSeconds, ExpressionContext::CollationMatchesDefault collationMatchesDefault, - boost::intrusive_ptr<ExpressionContext> pExpCtx) { + boost::intrusive_ptr<ExpressionContext> pExpCtx, + bool assumeNoMixedSchemaData) { using namespace timeseries; const auto matchExprPath = matchExpr->path(); const auto matchExprData = matchExpr->getData(); @@ -684,7 +717,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( minPath, matchExprData), MatchExprPredicate<InternalExprGTEMatchExpression>( maxPath, matchExprData)), - createTypeEqualityPredicate(pExpCtx, matchExprPath))); + createTypeEqualityPredicate( + pExpCtx, matchExprPath, assumeNoMixedSchemaData))); case MatchExpression::GT: // For $gt, make a $gt predicate against 'control.max'. In addition, if the comparison @@ -706,7 +740,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( bucketMaxSpanSeconds))) : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData), - createTypeEqualityPredicate(pExpCtx, matchExprPath))); + createTypeEqualityPredicate( + pExpCtx, matchExprPath, assumeNoMixedSchemaData))); case MatchExpression::GTE: // For $gte, make a $gte predicate against 'control.max'. In addition, if the comparison @@ -728,7 +763,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( bucketMaxSpanSeconds))) : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData), - createTypeEqualityPredicate(pExpCtx, matchExprPath))); + createTypeEqualityPredicate( + pExpCtx, matchExprPath, assumeNoMixedSchemaData))); case MatchExpression::LT: // For $lt, make a $lt predicate against 'control.min'. In addition, if the comparison @@ -748,7 +784,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( bucketMaxSpanSeconds))) : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData), - createTypeEqualityPredicate(pExpCtx, matchExprPath))); + createTypeEqualityPredicate( + pExpCtx, matchExprPath, assumeNoMixedSchemaData))); case MatchExpression::LTE: // For $lte, make a $lte predicate against 'control.min'. In addition, if the comparison @@ -768,7 +805,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( bucketMaxSpanSeconds))) : std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>( std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData), - createTypeEqualityPredicate(pExpCtx, matchExprPath))); + createTypeEqualityPredicate( + pExpCtx, matchExprPath, assumeNoMixedSchemaData))); default: MONGO_UNREACHABLE_TASSERT(5348302); @@ -797,7 +835,8 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( _bucketUnpacker.bucketSpec(), _bucketMaxSpanSeconds, pExpCtx->collationMatchesDefault, - pExpCtx); + pExpCtx, + _assumeNoMixedSchemaData); } else if (matchExpr->matchType() == MatchExpression::GEO) { auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression(); if (geoExpr.getPred() == GeoExpression::WITHIN || diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 40e154c5478..c1b59078fae 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -43,6 +43,7 @@ public: static constexpr StringData kStageNameExternal = "$_unpackBucket"_sd; static constexpr StringData kInclude = "include"_sd; static constexpr StringData kExclude = "exclude"_sd; + static constexpr StringData kAssumeNoMixedSchemaData = "assumeNoMixedSchemaData"_sd; static constexpr StringData kBucketMaxSpanSeconds = "bucketMaxSpanSeconds"_sd; static boost::intrusive_ptr<DocumentSource> createFromBsonInternal( @@ -52,7 +53,8 @@ public: DocumentSourceInternalUnpackBucket(const boost::intrusive_ptr<ExpressionContext>& expCtx, BucketUnpacker bucketUnpacker, - int bucketMaxSpanSeconds); + int bucketMaxSpanSeconds, + bool assumeNoMixedSchemaData = false); const char* getSourceName() const override { return kStageNameInternal.rawData(); @@ -202,6 +204,10 @@ public: private: GetNextResult doGetNext() final; + // If buckets contained a mixed type schema along some path, we have to push down special + // predicates in order to ensure correctness. + bool _assumeNoMixedSchemaData = false; + BucketUnpacker _bucketUnpacker; int _bucketMaxSpanSeconds; diff --git a/src/mongo/db/views/SConscript b/src/mongo/db/views/SConscript index 5ca16d561ed..38d2204fce9 100644 --- a/src/mongo/db/views/SConscript +++ b/src/mongo/db/views/SConscript @@ -27,6 +27,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/catalog/collection', '$BUILD_DIR/mongo/db/pipeline/aggregation', '$BUILD_DIR/mongo/db/query/collation/collator_factory_interface', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', diff --git a/src/mongo/db/views/resolved_view.cpp b/src/mongo/db/views/resolved_view.cpp index 9c754c559e7..c0435b8e924 100644 --- a/src/mongo/db/views/resolved_view.cpp +++ b/src/mongo/db/views/resolved_view.cpp @@ -72,15 +72,30 @@ ResolvedView ResolvedView::fromBSON(const BSONObj& commandResponseObj) { collationSpec = collationElt.embeddedObject().getOwned(); } + + boost::optional<bool> mixedSchema = boost::none; + if (auto mixedSchemaElem = viewDef[kTimeseriesMayContainMixedData]) { + uassert(6067204, + str::stream() << "view definition must have " << kTimeseriesMayContainMixedData + << " of type bool or no such field", + mixedSchemaElem.type() == BSONType::Bool); + + mixedSchema = boost::optional<bool>(mixedSchemaElem.boolean()); + } + return {NamespaceString(viewDef["ns"].valueStringData()), std::move(pipeline), - std::move(collationSpec)}; + std::move(collationSpec), + std::move(mixedSchema)}; } void ResolvedView::serialize(BSONObjBuilder* builder) const { BSONObjBuilder subObj(builder->subobjStart("resolvedView")); subObj.append("ns", _namespace.ns()); subObj.append("pipeline", _pipeline); + // Only serialize if it doesn't contain mixed data. + if ((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData))) + subObj.append(kTimeseriesMayContainMixedData, *_timeseriesMayContainMixedData); if (!_defaultCollation.isEmpty()) { subObj.append("collation", _defaultCollation); } @@ -125,6 +140,21 @@ AggregateCommandRequest ResolvedView::asExpandedViewAggregation( } resolvedPipeline[1] = BSON(DocumentSourceInternalConvertBucketIndexStats::kStageName << builder.obj()); + } else if (resolvedPipeline.size() >= 1 && + resolvedPipeline[0][DocumentSourceInternalUnpackBucket::kStageNameInternal] && + serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo( + multiversion::FeatureCompatibilityVersion::kVersion_5_2)) { + auto unpackStage = resolvedPipeline[0]; + + BSONObjBuilder builder; + for (const auto& elem : + unpackStage[DocumentSourceInternalUnpackBucket::kStageNameInternal].Obj()) { + builder.append(elem); + } + builder.append(DocumentSourceInternalUnpackBucket::kAssumeNoMixedSchemaData, + ((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData)))); + resolvedPipeline[0] = + BSON(DocumentSourceInternalUnpackBucket::kStageNameInternal << builder.obj()); } AggregateCommandRequest expandedRequest{_namespace, resolvedPipeline}; diff --git a/src/mongo/db/views/resolved_view.h b/src/mongo/db/views/resolved_view.h index 19cfefe82f9..d19642c8dd5 100644 --- a/src/mongo/db/views/resolved_view.h +++ b/src/mongo/db/views/resolved_view.h @@ -46,10 +46,12 @@ class ResolvedView final : public ErrorExtraInfo { public: ResolvedView(const NamespaceString& collectionNs, std::vector<BSONObj> pipeline, - BSONObj defaultCollation) + BSONObj defaultCollation, + boost::optional<bool> timeseriesMayContainMixedData = boost::none) : _namespace(collectionNs), _pipeline(std::move(pipeline)), - _defaultCollation(std::move(defaultCollation)) {} + _defaultCollation(std::move(defaultCollation)), + _timeseriesMayContainMixedData(timeseriesMayContainMixedData) {} static ResolvedView fromBSON(const BSONObj& commandResponseObj); @@ -74,6 +76,7 @@ public: // ErrorExtraInfo API static constexpr auto code = ErrorCodes::CommandOnShardedViewNotSupportedOnMongod; + static constexpr StringData kTimeseriesMayContainMixedData = "timeseriesMayContainMixedData"_sd; void serialize(BSONObjBuilder* bob) const final; static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&); @@ -88,6 +91,8 @@ private: // that operations on the view which do not specify a collation inherit the default. Operations // on the view which specify any other collation fail with a user error. BSONObj _defaultCollation; + + boost::optional<bool> _timeseriesMayContainMixedData; }; } // namespace mongo diff --git a/src/mongo/db/views/view_catalog.cpp b/src/mongo/db/views/view_catalog.cpp index 84e0f7b46a8..b043eb5a03d 100644 --- a/src/mongo/db/views/view_catalog.cpp +++ b/src/mongo/db/views/view_catalog.cpp @@ -740,6 +740,7 @@ StatusWith<ResolvedView> ViewCatalog::resolveView( std::vector<NamespaceString> dependencyChain{nss}; int depth = 0; + boost::optional<bool> mixedData = boost::none; for (; depth < ViewGraph::kMaxViewDepth; depth++) { auto view = _lookup(opCtx, *resolvedNss, ViewCatalogLookupBehavior::kValidateDurableViews); @@ -761,10 +762,25 @@ StatusWith<ResolvedView> ViewCatalog::resolveView( return StatusWith<ResolvedView>( {*resolvedNss, std::move(resolvedPipeline), - collation ? std::move(collation.get()) : CollationSpec::kSimpleSpec}); + collation ? std::move(collation.get()) : CollationSpec::kSimpleSpec, + mixedData}); } resolvedNss = &view->viewOn(); + + if (view->timeseries()) { + auto tsCollection = + CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, *resolvedNss); + tassert(6067201, + str::stream() << "expected time-series buckets collection " << *resolvedNss + << " to exist", + + tsCollection); + mixedData = tsCollection + ? tsCollection->getTimeseriesBucketsMayHaveMixedSchemaData() + : false; + } + dependencyChain.push_back(*resolvedNss); if (!collation) { if (timeSeriesCollator) { |