diff options
author | Dan Larkin-York <dan.larkin-york@mongodb.com> | 2021-09-28 14:21:23 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-09-28 14:52:34 +0000 |
commit | c1eb9dc5a6b507cff928d73cb43bb6d2656a0abd (patch) | |
tree | d2cdd38f250c975672f12a07c1b423951937dbba /src/mongo/db/index | |
parent | 28a41e017b9562a83dd448bff93c577ffc4e082c (diff) | |
download | mongo-c1eb9dc5a6b507cff928d73cb43bb6d2656a0abd.tar.gz |
SERVER-59933 Improve array handling for time-series metric indexes
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r-- | src/mongo/db/index/expression_keys_private.cpp | 40 |
1 files changed, 28 insertions, 12 deletions
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index daf084b5422..26eb7d7b8f1 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -358,27 +358,43 @@ using std::vector; void ExpressionKeysPrivate::validateDocumentCommon(const CollectionPtr& collection, const BSONObj& obj, const BSONObj& keyPattern) { - // If we have a timeseries collection, check that indexed metric fields do not have array values + // If we have a timeseries collection, check that indexed metric fields do not have expanded + // array values if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() && collection->getTimeseriesOptions()) { + // Each user metric field will be included twice, as both control.min.<field> and + // control.max.<field>, so we'll want to keep track that we've checked data.<field> to avoid + // scanning it twice. + StringSet userFieldsChecked; + for (const auto& keyElem : keyPattern) { if (keyElem.isNumber()) { StringData field = keyElem.fieldName(); - StringData dataField; + StringData userField; if (field.startsWith(timeseries::kControlMaxFieldNamePrefix)) { - dataField = field.substr(timeseries::kControlMaxFieldNamePrefix.size()); + userField = field.substr(timeseries::kControlMaxFieldNamePrefix.size()); } else if (field.startsWith(timeseries::kControlMinFieldNamePrefix)) { - dataField = field.substr(timeseries::kControlMinFieldNamePrefix.size()); + userField = field.substr(timeseries::kControlMinFieldNamePrefix.size()); } - if (!dataField.empty()) { - // We are in fact dealing with a metric field. Go ahead and examine individual - // values to check for array values. - uassert(5930501, - str::stream() << "Indexed measurement field contains an array value: " - << redact(obj), - !timeseries::dotted_path_support::haveArrayAlongBucketDataPath( - obj, std::string(timeseries::kDataFieldNamePrefix) + dataField)); + if (!userField.empty() && !userFieldsChecked.contains(userField)) { + namespace tdps = timeseries::dotted_path_support; + // We are in fact dealing with a metric field. First let's check the min and max + // values to see if we can conclude that there are no arrays present in the + // data. + auto decision = tdps::fieldContainsArrayData(obj, userField); + if (decision != tdps::Decision::No) { + // Go ahead and look closer + uassert(5930501, + str::stream() + << "Indexed measurement field contains an array value: " + << redact(obj), + decision == tdps::Decision::Maybe && + !tdps::haveArrayAlongBucketDataPath( + obj, + std::string(timeseries::kDataFieldNamePrefix) + userField)); + } + userFieldsChecked.emplace(userField); } } } |