summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorsamontea <merciers.merciers@gmail.com>2021-05-25 15:32:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-25 16:21:25 +0000
commitde5d5ec32afdfd58ff578929effae8eca0164d62 (patch)
treeb41b3457e81f6325223c0c9f934bd811c16f01cd /src
parent27f800c38c785130818a7f64f7302e5e720f459e (diff)
downloadmongo-de5d5ec32afdfd58ff578929effae8eca0164d62.tar.gz
SERVER-57130 Traverse arrays while filling the Document cache
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/document_value/document.cpp4
-rw-r--r--src/mongo/db/exec/document_value/value.cpp10
-rw-r--r--src/mongo/db/exec/document_value/value.h5
-rw-r--r--src/mongo/db/pipeline/window_function/partition_iterator_test.cpp33
4 files changed, 45 insertions, 7 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp
index 54f7f22c433..a4298f41f87 100644
--- a/src/mongo/db/exec/document_value/document.cpp
+++ b/src/mongo/db/exec/document_value/document.cpp
@@ -364,9 +364,7 @@ void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) {
void DocumentStorage::fillCache() const {
for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) {
- // Retrieve the value and force it to be cached.
- if (it->val.getType() == BSONType::Object)
- it->val.getDocument().fillCache();
+ it->val.fillCache();
}
}
diff --git a/src/mongo/db/exec/document_value/value.cpp b/src/mongo/db/exec/document_value/value.cpp
index 29e5680a303..08409bd2461 100644
--- a/src/mongo/db/exec/document_value/value.cpp
+++ b/src/mongo/db/exec/document_value/value.cpp
@@ -1253,6 +1253,16 @@ ostream& operator<<(ostream& out, const Value& val) {
verify(false);
}
+void Value::fillCache() const {
+ if (isObject()) {
+ getDocument().fillCache();
+ } else if (isArray()) {
+ for (auto&& val : getArray()) {
+ val.fillCache();
+ }
+ }
+}
+
void Value::serializeForSorter(BufBuilder& buf) const {
buf.appendChar(getType());
switch (getType()) {
diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h
index c89fedcf092..42c7b1c0ba9 100644
--- a/src/mongo/db/exec/document_value/value.h
+++ b/src/mongo/db/exec/document_value/value.h
@@ -343,6 +343,11 @@ public:
std::string toString() const;
friend std::ostream& operator<<(std::ostream& out, const Value& v);
+ /**
+ * Populates the internal cache by recursively walking the underlying BSON.
+ */
+ void fillCache() const;
+
void swap(Value& rhs) {
_storage.swap(rhs._storage);
}
diff --git a/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp b/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp
index de702eb112f..af917bd2275 100644
--- a/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp
+++ b/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp
@@ -520,15 +520,40 @@ TEST_F(PartitionIteratorTest, MemoryUsageAccountsForArraysInDocumentIteratorCach
// triple the size of the document. The reason for this is that 'largeStr' is cached twice; once
// for the 'arr' element and once for the nested 'subObj' element.
ASSERT_DOCUMENT_EQ(*_iter->current(), docs[0].getDocument());
- ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 2);
- ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 2 + 1024);
+ ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 3);
+ ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 3 + 1024);
+
+ // Pull in the second document. Both docs remain in the cache so the reported memory should
+ // include both.
+ advance();
+ ASSERT_DOCUMENT_EQ(*_iter->current(), docs[1].getDocument());
+ ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 3) * 2);
+ ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 3) * 2 + 1024);
+}
+
+TEST_F(PartitionIteratorTest, MemoryUsageAccountsForNestedArraysInDocumentIteratorCache) {
+ std::string largeStr(1024, 'x');
+ auto bsonDoc = BSON("arr" << BSON_ARRAY(BSON_ARRAY(BSON("subObj" << largeStr))));
+ const auto docs =
+ std::deque<DocumentSource::GetNextResult>{Document(bsonDoc), Document(bsonDoc)};
+ const auto mock = DocumentSourceMock::createForTest(docs, getExpCtx());
+
+ [[maybe_unused]] auto accessor = makeDefaultAccessor(mock, boost::none);
+ size_t initialDocSize = docs[0].getDocument().getApproximateSize();
+
+ // Pull in the first document, and verify the reported size of the iterator is roughly
+ // triple the size of the document. The reason for this is that 'largeStr' is cached twice; once
+ // for the 'arr' element and once for the nested 'subObj' element.
+ ASSERT_DOCUMENT_EQ(*_iter->current(), docs[0].getDocument());
+ ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 3);
+ ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 3 + 1024);
// Pull in the second document. Both docs remain in the cache so the reported memory should
// include both.
advance();
ASSERT_DOCUMENT_EQ(*_iter->current(), docs[1].getDocument());
- ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 2) * 2);
- ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 2) * 2 + 1024);
+ ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 3) * 2);
+ ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 3) * 2 + 1024);
}
TEST_F(PartitionIteratorTest, MemoryUsageAccountsForNestedObjInDocumentIteratorCache) {