From c1eb9dc5a6b507cff928d73cb43bb6d2656a0abd Mon Sep 17 00:00:00 2001 From: Dan Larkin-York Date: Tue, 28 Sep 2021 14:21:23 +0000 Subject: SERVER-59933 Improve array handling for time-series metric indexes --- src/mongo/db/index/expression_keys_private.cpp | 40 ++++++++++++++++++-------- 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'src/mongo/db/index') diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index daf084b5422..26eb7d7b8f1 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -358,27 +358,43 @@ using std::vector; void ExpressionKeysPrivate::validateDocumentCommon(const CollectionPtr& collection, const BSONObj& obj, const BSONObj& keyPattern) { - // If we have a timeseries collection, check that indexed metric fields do not have array values + // If we have a timeseries collection, check that indexed metric fields do not have expanded + // array values if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() && collection->getTimeseriesOptions()) { + // Each user metric field will be included twice, as both control.min. and + // control.max., so we'll want to keep track that we've checked data. to avoid + // scanning it twice. + StringSet userFieldsChecked; + for (const auto& keyElem : keyPattern) { if (keyElem.isNumber()) { StringData field = keyElem.fieldName(); - StringData dataField; + StringData userField; if (field.startsWith(timeseries::kControlMaxFieldNamePrefix)) { - dataField = field.substr(timeseries::kControlMaxFieldNamePrefix.size()); + userField = field.substr(timeseries::kControlMaxFieldNamePrefix.size()); } else if (field.startsWith(timeseries::kControlMinFieldNamePrefix)) { - dataField = field.substr(timeseries::kControlMinFieldNamePrefix.size()); + userField = field.substr(timeseries::kControlMinFieldNamePrefix.size()); } - if (!dataField.empty()) { - // We are in fact dealing with a metric field. Go ahead and examine individual - // values to check for array values. - uassert(5930501, - str::stream() << "Indexed measurement field contains an array value: " - << redact(obj), - !timeseries::dotted_path_support::haveArrayAlongBucketDataPath( - obj, std::string(timeseries::kDataFieldNamePrefix) + dataField)); + if (!userField.empty() && !userFieldsChecked.contains(userField)) { + namespace tdps = timeseries::dotted_path_support; + // We are in fact dealing with a metric field. First let's check the min and max + // values to see if we can conclude that there are no arrays present in the + // data. + auto decision = tdps::fieldContainsArrayData(obj, userField); + if (decision != tdps::Decision::No) { + // Go ahead and look closer + uassert(5930501, + str::stream() + << "Indexed measurement field contains an array value: " + << redact(obj), + decision == tdps::Decision::Maybe && + !tdps::haveArrayAlongBucketDataPath( + obj, + std::string(timeseries::kDataFieldNamePrefix) + userField)); + } + userFieldsChecked.emplace(userField); } } } -- cgit v1.2.1