diff options
author | samontea <merciers.merciers@gmail.com> | 2021-05-25 15:32:17 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-25 16:21:25 +0000 |
commit | de5d5ec32afdfd58ff578929effae8eca0164d62 (patch) | |
tree | b41b3457e81f6325223c0c9f934bd811c16f01cd /src/mongo | |
parent | 27f800c38c785130818a7f64f7302e5e720f459e (diff) | |
download | mongo-de5d5ec32afdfd58ff578929effae8eca0164d62.tar.gz |
SERVER-57130 Traverse arrays while filling the Document cache
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/document_value/document.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/value.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/value.h | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/window_function/partition_iterator_test.cpp | 33 |
4 files changed, 45 insertions, 7 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp index 54f7f22c433..a4298f41f87 100644 --- a/src/mongo/db/exec/document_value/document.cpp +++ b/src/mongo/db/exec/document_value/document.cpp @@ -364,9 +364,7 @@ void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) { void DocumentStorage::fillCache() const { for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) { - // Retrieve the value and force it to be cached. - if (it->val.getType() == BSONType::Object) - it->val.getDocument().fillCache(); + it->val.fillCache(); } } diff --git a/src/mongo/db/exec/document_value/value.cpp b/src/mongo/db/exec/document_value/value.cpp index 29e5680a303..08409bd2461 100644 --- a/src/mongo/db/exec/document_value/value.cpp +++ b/src/mongo/db/exec/document_value/value.cpp @@ -1253,6 +1253,16 @@ ostream& operator<<(ostream& out, const Value& val) { verify(false); } +void Value::fillCache() const { + if (isObject()) { + getDocument().fillCache(); + } else if (isArray()) { + for (auto&& val : getArray()) { + val.fillCache(); + } + } +} + void Value::serializeForSorter(BufBuilder& buf) const { buf.appendChar(getType()); switch (getType()) { diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h index c89fedcf092..42c7b1c0ba9 100644 --- a/src/mongo/db/exec/document_value/value.h +++ b/src/mongo/db/exec/document_value/value.h @@ -343,6 +343,11 @@ public: std::string toString() const; friend std::ostream& operator<<(std::ostream& out, const Value& v); + /** + * Populates the internal cache by recursively walking the underlying BSON. + */ + void fillCache() const; + void swap(Value& rhs) { _storage.swap(rhs._storage); } diff --git a/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp b/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp index de702eb112f..af917bd2275 100644 --- a/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp +++ b/src/mongo/db/pipeline/window_function/partition_iterator_test.cpp @@ -520,15 +520,40 @@ TEST_F(PartitionIteratorTest, MemoryUsageAccountsForArraysInDocumentIteratorCach // triple the size of the document. The reason for this is that 'largeStr' is cached twice; once // for the 'arr' element and once for the nested 'subObj' element. ASSERT_DOCUMENT_EQ(*_iter->current(), docs[0].getDocument()); - ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 2); - ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 2 + 1024); + ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 3); + ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 3 + 1024); + + // Pull in the second document. Both docs remain in the cache so the reported memory should + // include both. + advance(); + ASSERT_DOCUMENT_EQ(*_iter->current(), docs[1].getDocument()); + ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 3) * 2); + ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 3) * 2 + 1024); +} + +TEST_F(PartitionIteratorTest, MemoryUsageAccountsForNestedArraysInDocumentIteratorCache) { + std::string largeStr(1024, 'x'); + auto bsonDoc = BSON("arr" << BSON_ARRAY(BSON_ARRAY(BSON("subObj" << largeStr)))); + const auto docs = + std::deque<DocumentSource::GetNextResult>{Document(bsonDoc), Document(bsonDoc)}; + const auto mock = DocumentSourceMock::createForTest(docs, getExpCtx()); + + [[maybe_unused]] auto accessor = makeDefaultAccessor(mock, boost::none); + size_t initialDocSize = docs[0].getDocument().getApproximateSize(); + + // Pull in the first document, and verify the reported size of the iterator is roughly + // triple the size of the document. The reason for this is that 'largeStr' is cached twice; once + // for the 'arr' element and once for the nested 'subObj' element. + ASSERT_DOCUMENT_EQ(*_iter->current(), docs[0].getDocument()); + ASSERT_GT(_iter->getApproximateSize(), initialDocSize * 3); + ASSERT_LT(_iter->getApproximateSize(), initialDocSize * 3 + 1024); // Pull in the second document. Both docs remain in the cache so the reported memory should // include both. advance(); ASSERT_DOCUMENT_EQ(*_iter->current(), docs[1].getDocument()); - ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 2) * 2); - ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 2) * 2 + 1024); + ASSERT_GT(_iter->getApproximateSize(), (initialDocSize * 3) * 2); + ASSERT_LT(_iter->getApproximateSize(), (initialDocSize * 3) * 2 + 1024); } TEST_F(PartitionIteratorTest, MemoryUsageAccountsForNestedObjInDocumentIteratorCache) { |