summaryrefslogtreecommitdiff
path: root/src/mongo/db/timeseries
diff options
context:
space:
mode:
authorDan Larkin-York <dan.larkin-york@mongodb.com>2022-07-05 21:33:57 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-06 14:53:19 +0000
commit3cc20216a850af1d4bf63956740d73e8fc3779df (patch)
tree728200bda82321867cdc170c84b254278980b1b5 /src/mongo/db/timeseries
parent5451a539fd4e469c66b35a263a169aaa485604a5 (diff)
downloadmongo-3cc20216a850af1d4bf63956740d73e8fc3779df.tar.gz
SERVER-67622 Optimize timeseries_dotted_path_support functions and fix array handling
Diffstat (limited to 'src/mongo/db/timeseries')
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support.cpp306
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp20
2 files changed, 209 insertions, 117 deletions
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
index b28e198429e..e36c674ad6f 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support.cpp
@@ -66,78 +66,105 @@ void _handleElementForExtractAllElementsOnBucketPath(const BSONObj& obj,
BSONElementSet& elements,
bool expandArrayOnTrailingField,
BSONDepthIndex depth,
- MultikeyComponents* arrayComponents) {
- if (elem.eoo()) {
- size_t idx = path.find('.');
- if (idx != std::string::npos) {
- invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
- StringData left = path.substr(0, idx);
- StringData next = path.substr(idx + 1, path.size());
+ MultikeyComponents* arrayComponents);
- BSONElement e = obj.getField(left);
-
- if (e.type() == Object) {
- BSONObj embedded = e.embeddedObject();
- _handleElementForExtractAllElementsOnBucketPath(embedded,
- embedded.getField(next),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
- } else if (e.type() == Array) {
- bool allDigits = false;
- if (next.size() > 0 && ctype::isDigit(next[0])) {
- unsigned temp = 1;
- while (temp < next.size() && ctype::isDigit(next[temp]))
- temp++;
- allDigits = temp == next.size() || next[temp] == '.';
- }
- if (allDigits) {
- BSONObj embedded = e.embeddedObject();
+void _handleIntermediateElementForExtractAllElementsOnBucketPath(
+ BSONElement elem,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ BSONDepthIndex depth,
+ MultikeyComponents* arrayComponents) {
+ if (elem.type() == Object) {
+ BSONObj embedded = elem.embeddedObject();
+ _handleElementForExtractAllElementsOnBucketPath(embedded,
+ embedded.getField(path),
+ path,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
+ } else if (elem.type() == Array) {
+ bool allDigits = false;
+ if (path.size() > 0 && ctype::isDigit(path[0])) {
+ unsigned temp = 1;
+ while (temp < path.size() && ctype::isDigit(path[temp]))
+ temp++;
+ allDigits = temp == path.size() || path[temp] == '.';
+ }
+ if (allDigits) {
+ BSONObj embedded = elem.embeddedObject();
+ _handleElementForExtractAllElementsOnBucketPath(embedded,
+ embedded.getField(path),
+ path,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
+ } else {
+ BSONObjIterator i(elem.embeddedObject());
+ while (i.more()) {
+ BSONElement e2 = i.next();
+ if (e2.type() == Object || e2.type() == Array) {
+ BSONObj embedded = e2.embeddedObject();
_handleElementForExtractAllElementsOnBucketPath(embedded,
- embedded.getField(next),
- next,
+ embedded.getField(path),
+ path,
elements,
expandArrayOnTrailingField,
depth + 1,
arrayComponents);
- } else {
- BSONObjIterator i(e.embeddedObject());
- while (i.more()) {
- BSONElement e2 = i.next();
- if (e2.type() == Object || e2.type() == Array) {
- BSONObj embedded = e2.embeddedObject();
- _handleElementForExtractAllElementsOnBucketPath(
- embedded,
- embedded.getField(next),
- next,
- elements,
- expandArrayOnTrailingField,
- depth + 1,
- arrayComponents);
- }
- }
- if (arrayComponents) {
- arrayComponents->insert(depth);
- }
}
- } else {
- // do nothing: no match
- }
- }
- } else {
- if (elem.type() == Array && expandArrayOnTrailingField) {
- BSONObjIterator i(elem.embeddedObject());
- while (i.more()) {
- elements.insert(i.next());
}
if (arrayComponents) {
arrayComponents->insert(depth);
}
- } else {
- elements.insert(elem);
}
+ } else {
+ // do nothing: no match
+ }
+}
+
+void _handleTerminalElementForExtractAllElementsOnBucketPath(BSONElement elem,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ BSONDepthIndex depth,
+ MultikeyComponents* arrayComponents) {
+ if (elem.type() == Array && expandArrayOnTrailingField) {
+ BSONObjIterator i(elem.embeddedObject());
+ while (i.more()) {
+ elements.insert(i.next());
+ }
+ if (arrayComponents) {
+ arrayComponents->insert(depth);
+ }
+ } else {
+ elements.insert(elem);
+ }
+}
+
+void _handleElementForExtractAllElementsOnBucketPath(const BSONObj& obj,
+ BSONElement elem,
+ StringData path,
+ BSONElementSet& elements,
+ bool expandArrayOnTrailingField,
+ BSONDepthIndex depth,
+ MultikeyComponents* arrayComponents) {
+ if (elem.eoo()) {
+ size_t idx = path.find('.');
+ if (idx != std::string::npos) {
+ invariant(depth != std::numeric_limits<BSONDepthIndex>::max());
+ StringData left = path.substr(0, idx);
+ StringData next = path.substr(idx + 1, path.size());
+
+ BSONElement e = obj.getField(left);
+
+ _handleIntermediateElementForExtractAllElementsOnBucketPath(
+ e, next, elements, expandArrayOnTrailingField, depth, arrayComponents);
+ }
+ } else {
+ _handleTerminalElementForExtractAllElementsOnBucketPath(
+ elem, elements, expandArrayOnTrailingField, depth, arrayComponents);
}
}
@@ -165,16 +192,21 @@ boost::optional<BSONColumn> _extractAllElementsAlongBucketPath(
depth + 1,
arrayComponents);
} else if (isCompressed && e.type() == BinData) {
- // Unbucketing magic happens here for nested measurement fields (i.e.
- // data.a.b) in compressed buckets.
+ // Unbucketing happens here for nested measurement fields (i.e. data.a.b) in
+ // compressed buckets. We know that 'e' corresponds to the top-level
+ // measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) and do a
+ // field lookup to extract the actual field we want (i.e. data.a.1.b).
+ // Thanks to the bucket structure, we know there is no literal field with a
+ // dot at this depth (e.g. '1.b'), so we can skip that field lookup, but
+ // it's possible that the element e2 stores an object, array, or other type,
+ // and we need to figure out how to resolve the rest of the path based on
+ // the type.
BSONColumn storage{e};
for (const BSONElement& e2 : storage) {
if (!e2.eoo()) {
- BSONObj embedded =
- e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
- _handleElementForExtractAllElementsOnBucketPath(
- embedded,
- embedded.getField(next),
+ _handleIntermediateElementForExtractAllElementsOnBucketPath(
+ e2,
next,
elements,
expandArrayOnTrailingField,
@@ -182,6 +214,8 @@ boost::optional<BSONColumn> _extractAllElementsAlongBucketPath(
arrayComponents);
}
}
+ // Need to pass along the column since it owns the memory referenced by the
+ // extracted elements.
return std::move(storage);
}
}
@@ -196,43 +230,48 @@ boost::optional<BSONColumn> _extractAllElementsAlongBucketPath(
depth + 1,
arrayComponents);
} else if (isCompressed && BinData == e.type()) {
- // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
- // in compressed buckets.
+ // Unbucketing happens here for top-level measurement fields (i.e. data.a) in
+ // compressed buckets. We know that 'e' corresponds to the top-level
+ // measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) to extract
+ // the actual field we want.
invariant(depth == 1);
BSONColumn storage{e};
for (const BSONElement& e2 : storage) {
if (!e2.eoo()) {
BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
- _handleElementForExtractAllElementsOnBucketPath(
- embedded,
- e2,
- ""_sd,
- elements,
- expandArrayOnTrailingField,
- depth,
- arrayComponents);
+ _handleTerminalElementForExtractAllElementsOnBucketPath(
+ e2, elements, expandArrayOnTrailingField, depth, arrayComponents);
}
}
+ // Need to pass along the column since it owns the memory referenced by the
+ // extracted elements.
return std::move(storage);
}
}
break;
}
case 2: {
- // Unbucketing magic happens here for uncompressed buckets.
+ // Unbucketing happens here for uncompressed buckets. We know that 'obj' corresponds to
+ // the top-level measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) to extract the actual
+ // field we want. If we are after a top-level field, then we already have the element we
+ // want in 'e'. If we are after a nested field, then we need to recurse.
invariant(!isCompressed);
for (const BSONElement& e : obj) {
- std::string subPath = e.fieldName();
- if (!path.empty()) {
- subPath.append("." + path);
+ if (path.empty()) {
+ // The top-level measurement field (i.e. data.a) is the indexed field we are
+ // trying to extract, so we can target it directly.
+ _handleTerminalElementForExtractAllElementsOnBucketPath(
+ e, elements, expandArrayOnTrailingField, depth, arrayComponents);
+ } else {
+ // We'll need to inspect at least one more field, but we can take advantage of
+ // the bucket structure here to skip checking for any field at this depth with a
+ // dot in the name (e.g. '1.b') and instead just look at the next level down
+ // (e.g. look within '1' for a field named 'b').
+ _handleIntermediateElementForExtractAllElementsOnBucketPath(
+ e, path, elements, expandArrayOnTrailingField, depth, arrayComponents);
}
- _handleElementForExtractAllElementsOnBucketPath(obj,
- obj.getField(subPath),
- subPath,
- elements,
- expandArrayOnTrailingField,
- depth,
- arrayComponents);
}
break;
}
@@ -247,6 +286,30 @@ bool _haveArrayAlongBucketDataPath(const BSONObj& obj, StringData path, BSONDept
bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
BSONElement elem,
StringData path,
+ BSONDepthIndex depth);
+
+bool _handleIntermediateElementForHaveArrayAlongBucketDataPath(BSONElement elem,
+ StringData path,
+ BSONDepthIndex depth) {
+ if (elem.type() == Object) {
+ auto embedded = elem.embeddedObject();
+ return _handleElementForHaveArrayAlongBucketDataPath(
+ embedded, embedded.getField(path), path, depth + 1);
+ } else if (elem.type() == Array) {
+ return true;
+ }
+ // no match
+ return false;
+}
+
+bool _handleTerminalElementForHaveArrayAlongBucketDataPath(BSONElement elem) {
+ return (elem.type() == Array);
+}
+
+
+bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
+ BSONElement elem,
+ StringData path,
BSONDepthIndex depth) {
if (elem.eoo()) {
size_t idx = path.find('.');
@@ -259,20 +322,10 @@ bool _handleElementForHaveArrayAlongBucketDataPath(const BSONObj& obj,
BSONElement e = obj.getField(left);
- if (e.type() == Object) {
- auto embedded = e.embeddedObject();
- return _handleElementForHaveArrayAlongBucketDataPath(
- embedded, embedded.getField(next), next, depth + 1);
- } else if (e.type() == Array) {
- return true;
- } else {
- // do nothing: no match
- }
+ return _handleIntermediateElementForHaveArrayAlongBucketDataPath(e, next, depth);
}
} else {
- if (elem.type() == Array) {
- return true;
- }
+ return _handleTerminalElementForHaveArrayAlongBucketDataPath(elem);
}
return false;
@@ -293,13 +346,21 @@ bool _haveArrayAlongBucketDataPath(const BSONObj& obj,
return _haveArrayAlongBucketDataPath(
e.embeddedObject(), next, isCompressed, depth + 1);
} else if (isCompressed && BinData == e.type()) {
- // Unbucketing magic happens here for nested measurement fields (i.e.
- // data.a.b) in compressed buckets.
+ // Unbucketing happens here for nested measurement fields (i.e. data.a.b) in
+ // compressed buckets. We know that 'e' corresponds to the top-level
+ // measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) and do a
+ // field lookup to find the actual field we want (i.e. data.a.1.b).
+ // Thanks to the bucket structure, we know there is no literal field with a
+ // dot at this depth (e.g. '1.b'), so we can skip that field lookup, but
+ // it's possible that the element e2 stores an object, array, or other type,
+ // and we need to figure out how to resolve the rest of the path based on
+ // the type.
BSONColumn column{e};
for (const BSONElement& e2 : column) {
- BSONObj embedded = e2.isABSONObj() ? e2.embeddedObject() : BSONObj();
- const bool foundArray = _handleElementForHaveArrayAlongBucketDataPath(
- embedded, embedded.getField(next), next, depth);
+ const bool foundArray =
+ _handleIntermediateElementForHaveArrayAlongBucketDataPath(
+ e2, next, depth);
if (foundArray) {
return foundArray;
}
@@ -312,12 +373,15 @@ bool _haveArrayAlongBucketDataPath(const BSONObj& obj,
return _haveArrayAlongBucketDataPath(
e.embeddedObject(), StringData(), isCompressed, depth + 1);
} else if (BinData == e.type()) {
- // Unbucketing magic happens here for top-level measurement fields (i.e. data.a)
- // in compressed buckets.
+ // Unbucketing happens here for top-level measurement fields (i.e. data.a) in
+ // compressed buckets. We know that 'e' corresponds to the top-level
+ // measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) to decide whether
+ // we have any arrays.
invariant(isCompressed && depth == 1);
BSONColumn column{e};
for (const BSONElement& e2 : column) {
- if (e2.type() == Array) {
+ if (_handleTerminalElementForHaveArrayAlongBucketDataPath(e2)) {
return true;
}
}
@@ -326,16 +390,28 @@ bool _haveArrayAlongBucketDataPath(const BSONObj& obj,
return false;
}
case 2: {
- // Unbucketing magic happens here for uncompressed buckets.
+ // Unbucketing happens here for uncompressed buckets. We know that 'obj' corresponds to
+ // the top-level measurement field (i.e. data.a) and we need to iterate over each of the
+ // numerically-indexed entries (i.e. data.a.1, data.a.5, etc.) to extract the actual
+ // field we want. If we are after a top-level field, then we already have the element we
+ // want in 'e'. If we are after a nested field, then we need recurse.
invariant(!isCompressed);
for (const BSONElement& e : obj) {
- std::string subPath = e.fieldName();
- if (!path.empty()) {
- subPath.append("." + path);
+ bool foundArray = false;
+ if (path.empty()) {
+ // The top-level measurement field (i.e. data.a) is the field we are trying to
+ // check, so we can target it directly.
+ if (_handleTerminalElementForHaveArrayAlongBucketDataPath(e)) {
+ return true;
+ }
+ } else {
+ // We'll need to inspect at least one more field, but we can take advantage of
+ // the bucket structure here to skip checking for any field at this depth with a
+ // dot in the name (e.g. '1.b') and instead just look at the next level down
+ // (e.g. look within '1' for a field named 'b').
+ foundArray =
+ _handleIntermediateElementForHaveArrayAlongBucketDataPath(e, path, depth);
}
- BSONElement sub = obj.getField(subPath);
- const bool foundArray =
- _handleElementForHaveArrayAlongBucketDataPath(obj, sub, subPath, depth);
if (foundArray) {
return foundArray;
}
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
index afabfc4e0a6..072deec9b77 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
@@ -100,6 +100,12 @@ TEST_F(TimeseriesDottedPathSupportTest, HaveArrayAlongBucketPath) {
b: true
}
}
+ },
+ i: {
+ "1": [
+ {a: true},
+ {a: false}
+ ]
}
}
})");
@@ -115,7 +121,7 @@ TEST_F(TimeseriesDottedPathSupportTest, HaveArrayAlongBucketPath) {
ASSERT_FALSE(
tdps::haveArrayAlongBucketDataPath(obj, "data.b")); // bucket expansion hides array
ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.c"));
- invariant(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.d"));
ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.e"));
ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.f"));
ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.f.a"));
@@ -123,6 +129,8 @@ TEST_F(TimeseriesDottedPathSupportTest, HaveArrayAlongBucketPath) {
ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a"));
ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.g.a.a"));
ASSERT_FALSE(tdps::haveArrayAlongBucketDataPath(obj, "data.h.a.b"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.i"));
+ ASSERT_TRUE(tdps::haveArrayAlongBucketDataPath(obj, "data.i.a"));
});
}
@@ -365,6 +373,12 @@ TEST_F(TimeseriesDottedPathSupportTest, ExtractAllElementsAlongBucketPath) {
b: false
}
}
+ },
+ i: {
+ "1": [
+ {a: true},
+ {a: false}
+ ]
}
}
})");
@@ -379,7 +393,8 @@ TEST_F(TimeseriesDottedPathSupportTest, ExtractAllElementsAlongBucketPath) {
expected.emplace(el);
}
- ASSERT_EQ(actual.size(), expected.size());
+ ASSERT_EQ(actual.size(), expected.size())
+ << "Expected path '" << path << "' to yield " << expectedStorage << " from " << obj;
auto actualIt = actual.begin();
auto expectedIt = expected.begin();
@@ -408,6 +423,7 @@ TEST_F(TimeseriesDottedPathSupportTest, ExtractAllElementsAlongBucketPath) {
BSON_ARRAY(BSON("a" << BSON("b" << true)) << BSON("a" << BSON("b" << false))));
assertExtractionMatches("data.h.a"_sd, BSON_ARRAY(BSON("b" << true) << BSON("b" << false)));
assertExtractionMatches("data.h.a.b"_sd, BSON_ARRAY(true << false));
+ assertExtractionMatches("data.i.a"_sd, BSON_ARRAY(true << false));
});
}
} // namespace