summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorsamontea <merciers.merciers@gmail.com>2021-11-16 14:54:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-14 22:02:37 +0000
commit8fb1d8039361bcbf3a0ab77127cc6712783485ca (patch)
tree2681117e701eaffd05e48ea49f4f444044a320a5 /src
parent56d055c507a94a70e31a014142934060b76710ed (diff)
downloadmongo-8fb1d8039361bcbf3a0ab77127cc6712783485ca.tar.gz
SERVER-60672 Simpler pushdown when timeseries collection has no mixed-schema buckets
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp63
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h8
-rw-r--r--src/mongo/db/views/SConscript1
-rw-r--r--src/mongo/db/views/resolved_view.cpp32
-rw-r--r--src/mongo/db/views/resolved_view.h9
-rw-r--r--src/mongo/db/views/view_catalog.cpp18
6 files changed, 114 insertions, 17 deletions
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index c6f353f3293..ed706683ba5 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -220,8 +220,10 @@ bool fieldIsComputed(BucketSpec spec, std::string field) {
DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
BucketUnpacker bucketUnpacker,
- int bucketMaxSpanSeconds)
+ int bucketMaxSpanSeconds,
+ bool assumeNoMixedSchemaData)
: DocumentSource(kStageNameInternal, expCtx),
+ _assumeNoMixedSchemaData(assumeNoMixedSchemaData),
_bucketUnpacker(std::move(bucketUnpacker)),
_bucketMaxSpanSeconds{bucketMaxSpanSeconds} {}
@@ -240,6 +242,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
auto hasTimeField = false;
auto hasBucketMaxSpanSeconds = false;
auto bucketMaxSpanSeconds = 0;
+ auto assumeClean = false;
std::vector<std::string> computedMetaProjFields;
for (auto&& elem : specElem.embeddedObject()) {
auto fieldName = elem.fieldNameStringData();
@@ -268,6 +271,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
unpackerBehavior = fieldName == kInclude ? BucketUnpacker::Behavior::kInclude
: BucketUnpacker::Behavior::kExclude;
hasIncludeExclude = true;
+ } else if (fieldName == kAssumeNoMixedSchemaData) {
+ uassert(6067202,
+ str::stream() << "assumeClean field must be a bool, got: " << elem.type(),
+ elem.type() == BSONType::Bool);
+ assumeClean = elem.boolean();
} else if (fieldName == timeseries::kTimeFieldName) {
uassert(5346504,
str::stream() << "timeField field must be a string, got: " << elem.type(),
@@ -326,7 +334,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
hasBucketMaxSpanSeconds);
return make_intrusive<DocumentSourceInternalUnpackBucket>(
- expCtx, BucketUnpacker{std::move(bucketSpec), unpackerBehavior}, bucketMaxSpanSeconds);
+ expCtx,
+ BucketUnpacker{std::move(bucketSpec), unpackerBehavior},
+ bucketMaxSpanSeconds,
+ assumeClean);
}
boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createFromBsonExternal(
@@ -338,6 +349,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
BucketSpec bucketSpec;
auto hasTimeField = false;
+ auto assumeClean = false;
for (auto&& elem : specElem.embeddedObject()) {
auto fieldName = elem.fieldNameStringData();
// We only expose "timeField" and "metaField" as parameters in $_unpackBucket.
@@ -356,6 +368,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
str::stream() << "metaField field must be a single-element field path",
metaField.find('.') == std::string::npos);
bucketSpec.metaField = std::move(metaField);
+ } else if (fieldName == kAssumeNoMixedSchemaData) {
+ uassert(6067203,
+ str::stream() << "assumeClean field must be a bool, got: " << elem.type(),
+ elem.type() == BSONType::Bool);
+ assumeClean = elem.boolean();
} else {
uasserted(5612404,
str::stream() << "unrecognized parameter to $_unpackBucket: " << fieldName);
@@ -366,7 +383,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
hasTimeField);
return make_intrusive<DocumentSourceInternalUnpackBucket>(
- expCtx, BucketUnpacker{std::move(bucketSpec), BucketUnpacker::Behavior::kExclude}, 3600);
+ expCtx,
+ BucketUnpacker{std::move(bucketSpec), BucketUnpacker::Behavior::kExclude},
+ 3600,
+ assumeClean);
}
void DocumentSourceInternalUnpackBucket::serializeToArray(
@@ -394,6 +414,8 @@ void DocumentSourceInternalUnpackBucket::serializeToArray(
out.addField(timeseries::kMetaFieldName, Value{*spec.metaField});
}
out.addField(kBucketMaxSpanSeconds, Value{_bucketMaxSpanSeconds});
+ if (_assumeNoMixedSchemaData)
+ out.addField(kAssumeNoMixedSchemaData, Value(_assumeNoMixedSchemaData));
if (!spec.computedMetaProjFields.empty())
out.addField("computedMetaProjFields", Value{[&] {
@@ -545,12 +567,22 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje
* Creates a predicate that ensures that if there exists a subpath of matchExprPath such that the
* type of `control.min.subpath` is not the same as `control.max.subpath` then we will match that
* document.
+ *
+ * However, if the buckets collection has no mixed-schema data then this type-equality predicate is
+ * unnecessary. In that case this function returns an empty, always-true predicate.
*/
std::unique_ptr<MatchExpression> createTypeEqualityPredicate(
- boost::intrusive_ptr<ExpressionContext> pExpCtx, const StringData& matchExprPath) {
+ boost::intrusive_ptr<ExpressionContext> pExpCtx,
+ const StringData& matchExprPath,
+ bool assumeNoMixedSchemaData) {
+
+ std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates;
+
+ if (assumeNoMixedSchemaData)
+ return std::make_unique<OrMatchExpression>(std::move(typeEqualityPredicates));
+
FieldPath matchExprField(matchExprPath);
using namespace timeseries;
- std::vector<std::unique_ptr<MatchExpression>> typeEqualityPredicates;
// Assume that we're generating a predicate on "a.b"
for (size_t i = 0; i < matchExprField.getPathLength(); i++) {
@@ -595,7 +627,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
const BucketSpec& bucketSpec,
int bucketMaxSpanSeconds,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
- boost::intrusive_ptr<ExpressionContext> pExpCtx) {
+ boost::intrusive_ptr<ExpressionContext> pExpCtx,
+ bool assumeNoMixedSchemaData) {
using namespace timeseries;
const auto matchExprPath = matchExpr->path();
const auto matchExprData = matchExpr->getData();
@@ -684,7 +717,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
minPath, matchExprData),
MatchExprPredicate<InternalExprGTEMatchExpression>(
maxPath, matchExprData)),
- createTypeEqualityPredicate(pExpCtx, matchExprPath)));
+ createTypeEqualityPredicate(
+ pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
case MatchExpression::GT:
// For $gt, make a $gt predicate against 'control.max'. In addition, if the comparison
@@ -706,7 +740,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
bucketMaxSpanSeconds)))
: std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData),
- createTypeEqualityPredicate(pExpCtx, matchExprPath)));
+ createTypeEqualityPredicate(
+ pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
case MatchExpression::GTE:
// For $gte, make a $gte predicate against 'control.max'. In addition, if the comparison
@@ -728,7 +763,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
bucketMaxSpanSeconds)))
: std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData),
- createTypeEqualityPredicate(pExpCtx, matchExprPath)));
+ createTypeEqualityPredicate(
+ pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
case MatchExpression::LT:
// For $lt, make a $lt predicate against 'control.min'. In addition, if the comparison
@@ -748,7 +784,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
bucketMaxSpanSeconds)))
: std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData),
- createTypeEqualityPredicate(pExpCtx, matchExprPath)));
+ createTypeEqualityPredicate(
+ pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
case MatchExpression::LTE:
// For $lte, make a $lte predicate against 'control.min'. In addition, if the comparison
@@ -768,7 +805,8 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
bucketMaxSpanSeconds)))
: std::make_unique<OrMatchExpression>(makeVector<std::unique_ptr<MatchExpression>>(
std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData),
- createTypeEqualityPredicate(pExpCtx, matchExprPath)));
+ createTypeEqualityPredicate(
+ pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
default:
MONGO_UNREACHABLE_TASSERT(5348302);
@@ -797,7 +835,8 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField(
_bucketUnpacker.bucketSpec(),
_bucketMaxSpanSeconds,
pExpCtx->collationMatchesDefault,
- pExpCtx);
+ pExpCtx,
+ _assumeNoMixedSchemaData);
} else if (matchExpr->matchType() == MatchExpression::GEO) {
auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression();
if (geoExpr.getPred() == GeoExpression::WITHIN ||
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 40e154c5478..c1b59078fae 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -43,6 +43,7 @@ public:
static constexpr StringData kStageNameExternal = "$_unpackBucket"_sd;
static constexpr StringData kInclude = "include"_sd;
static constexpr StringData kExclude = "exclude"_sd;
+ static constexpr StringData kAssumeNoMixedSchemaData = "assumeNoMixedSchemaData"_sd;
static constexpr StringData kBucketMaxSpanSeconds = "bucketMaxSpanSeconds"_sd;
static boost::intrusive_ptr<DocumentSource> createFromBsonInternal(
@@ -52,7 +53,8 @@ public:
DocumentSourceInternalUnpackBucket(const boost::intrusive_ptr<ExpressionContext>& expCtx,
BucketUnpacker bucketUnpacker,
- int bucketMaxSpanSeconds);
+ int bucketMaxSpanSeconds,
+ bool assumeNoMixedSchemaData = false);
const char* getSourceName() const override {
return kStageNameInternal.rawData();
@@ -202,6 +204,10 @@ public:
private:
GetNextResult doGetNext() final;
+ // If buckets contained a mixed type schema along some path, we have to push down special
+ // predicates in order to ensure correctness.
+ bool _assumeNoMixedSchemaData = false;
+
BucketUnpacker _bucketUnpacker;
int _bucketMaxSpanSeconds;
diff --git a/src/mongo/db/views/SConscript b/src/mongo/db/views/SConscript
index 5ca16d561ed..38d2204fce9 100644
--- a/src/mongo/db/views/SConscript
+++ b/src/mongo/db/views/SConscript
@@ -27,6 +27,7 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/catalog/collection',
'$BUILD_DIR/mongo/db/pipeline/aggregation',
'$BUILD_DIR/mongo/db/query/collation/collator_factory_interface',
'$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
diff --git a/src/mongo/db/views/resolved_view.cpp b/src/mongo/db/views/resolved_view.cpp
index 9c754c559e7..c0435b8e924 100644
--- a/src/mongo/db/views/resolved_view.cpp
+++ b/src/mongo/db/views/resolved_view.cpp
@@ -72,15 +72,30 @@ ResolvedView ResolvedView::fromBSON(const BSONObj& commandResponseObj) {
collationSpec = collationElt.embeddedObject().getOwned();
}
+
+ boost::optional<bool> mixedSchema = boost::none;
+ if (auto mixedSchemaElem = viewDef[kTimeseriesMayContainMixedData]) {
+ uassert(6067204,
+ str::stream() << "view definition must have " << kTimeseriesMayContainMixedData
+ << " of type bool or no such field",
+ mixedSchemaElem.type() == BSONType::Bool);
+
+ mixedSchema = boost::optional<bool>(mixedSchemaElem.boolean());
+ }
+
return {NamespaceString(viewDef["ns"].valueStringData()),
std::move(pipeline),
- std::move(collationSpec)};
+ std::move(collationSpec),
+ std::move(mixedSchema)};
}
void ResolvedView::serialize(BSONObjBuilder* builder) const {
BSONObjBuilder subObj(builder->subobjStart("resolvedView"));
subObj.append("ns", _namespace.ns());
subObj.append("pipeline", _pipeline);
+ // Only serialize if it doesn't contain mixed data.
+ if ((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData)))
+ subObj.append(kTimeseriesMayContainMixedData, *_timeseriesMayContainMixedData);
if (!_defaultCollation.isEmpty()) {
subObj.append("collation", _defaultCollation);
}
@@ -125,6 +140,21 @@ AggregateCommandRequest ResolvedView::asExpandedViewAggregation(
}
resolvedPipeline[1] =
BSON(DocumentSourceInternalConvertBucketIndexStats::kStageName << builder.obj());
+ } else if (resolvedPipeline.size() >= 1 &&
+ resolvedPipeline[0][DocumentSourceInternalUnpackBucket::kStageNameInternal] &&
+ serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
+ multiversion::FeatureCompatibilityVersion::kVersion_5_2)) {
+ auto unpackStage = resolvedPipeline[0];
+
+ BSONObjBuilder builder;
+ for (const auto& elem :
+ unpackStage[DocumentSourceInternalUnpackBucket::kStageNameInternal].Obj()) {
+ builder.append(elem);
+ }
+ builder.append(DocumentSourceInternalUnpackBucket::kAssumeNoMixedSchemaData,
+ ((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData))));
+ resolvedPipeline[0] =
+ BSON(DocumentSourceInternalUnpackBucket::kStageNameInternal << builder.obj());
}
AggregateCommandRequest expandedRequest{_namespace, resolvedPipeline};
diff --git a/src/mongo/db/views/resolved_view.h b/src/mongo/db/views/resolved_view.h
index 19cfefe82f9..d19642c8dd5 100644
--- a/src/mongo/db/views/resolved_view.h
+++ b/src/mongo/db/views/resolved_view.h
@@ -46,10 +46,12 @@ class ResolvedView final : public ErrorExtraInfo {
public:
ResolvedView(const NamespaceString& collectionNs,
std::vector<BSONObj> pipeline,
- BSONObj defaultCollation)
+ BSONObj defaultCollation,
+ boost::optional<bool> timeseriesMayContainMixedData = boost::none)
: _namespace(collectionNs),
_pipeline(std::move(pipeline)),
- _defaultCollation(std::move(defaultCollation)) {}
+ _defaultCollation(std::move(defaultCollation)),
+ _timeseriesMayContainMixedData(timeseriesMayContainMixedData) {}
static ResolvedView fromBSON(const BSONObj& commandResponseObj);
@@ -74,6 +76,7 @@ public:
// ErrorExtraInfo API
static constexpr auto code = ErrorCodes::CommandOnShardedViewNotSupportedOnMongod;
+ static constexpr StringData kTimeseriesMayContainMixedData = "timeseriesMayContainMixedData"_sd;
void serialize(BSONObjBuilder* bob) const final;
static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&);
@@ -88,6 +91,8 @@ private:
// that operations on the view which do not specify a collation inherit the default. Operations
// on the view which specify any other collation fail with a user error.
BSONObj _defaultCollation;
+
+ boost::optional<bool> _timeseriesMayContainMixedData;
};
} // namespace mongo
diff --git a/src/mongo/db/views/view_catalog.cpp b/src/mongo/db/views/view_catalog.cpp
index 84e0f7b46a8..b043eb5a03d 100644
--- a/src/mongo/db/views/view_catalog.cpp
+++ b/src/mongo/db/views/view_catalog.cpp
@@ -740,6 +740,7 @@ StatusWith<ResolvedView> ViewCatalog::resolveView(
std::vector<NamespaceString> dependencyChain{nss};
int depth = 0;
+ boost::optional<bool> mixedData = boost::none;
for (; depth < ViewGraph::kMaxViewDepth; depth++) {
auto view =
_lookup(opCtx, *resolvedNss, ViewCatalogLookupBehavior::kValidateDurableViews);
@@ -761,10 +762,25 @@ StatusWith<ResolvedView> ViewCatalog::resolveView(
return StatusWith<ResolvedView>(
{*resolvedNss,
std::move(resolvedPipeline),
- collation ? std::move(collation.get()) : CollationSpec::kSimpleSpec});
+ collation ? std::move(collation.get()) : CollationSpec::kSimpleSpec,
+ mixedData});
}
resolvedNss = &view->viewOn();
+
+ if (view->timeseries()) {
+ auto tsCollection =
+ CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, *resolvedNss);
+ tassert(6067201,
+ str::stream() << "expected time-series buckets collection " << *resolvedNss
+ << " to exist",
+
+ tsCollection);
+ mixedData = tsCollection
+ ? tsCollection->getTimeseriesBucketsMayHaveMixedSchemaData()
+ : false;
+ }
+
dependencyChain.push_back(*resolvedNss);
if (!collation) {
if (timeSeriesCollator) {