summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIrina Yatsenko <irina.yatsenko@mongodb.com>2022-09-23 15:58:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-23 17:00:51 +0000
commit02c78ac52fd67148b499b0cbf0f422a50bd92943 (patch)
treed74689c80944e87bf8c97bfc460a5d7d1cd0a327
parentd22d68c4c2fcdc76fcd0079a12e9f5e39b119381 (diff)
downloadmongo-02c78ac52fd67148b499b0cbf0f422a50bd92943.tar.gz
SERVER-68792 Refactor iterating over cell values into a cursor
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.cpp104
-rw-r--r--src/mongo/db/exec/sbe/values/columnar.h50
-rw-r--r--src/mongo/db/storage/column_store.h153
3 files changed, 163 insertions, 144 deletions
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 640894aa33b..0585296ed23 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -354,106 +354,22 @@ void ColumnScanStage::readParentsIntoObj(StringData path,
// The result of the filter predicate will be the same regardless of sparseness or sub objects,
// therefore we don't look at the parents and don't consult the row store.
-// (TODO SERVER-68792) Refactor the iteration over values into its own type.
bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) {
auto splitCellView = SplitCellView::parse(cell);
- auto translatedCell = translateCell(path, splitCellView);
- if (!translatedCell.moreValues()) {
- return false;
- }
+ SplitCellView::CursorWithArrayDepth<value::ColumnStoreEncoder> cellCursor{
+ splitCellView.firstValuePtr, splitCellView.arrInfo, &_encoder};
- if (translatedCell.arrInfo.empty()) {
- // Have a single non-nested value -- evaluate the filter on it.
- // (TODO SERVER-68792) Could we avoid copying by using a non-owning accessor? Same question
- // for other locations in this function when the predicate is evaluated immediately after
- // setting the slot.
- auto [tag, val] = translatedCell.nextValue();
- _filterInputAccessors[filterIndex].reset(tag, val);
- return _bytecode.runPredicate(_filterExprsCode[filterIndex].get());
- } else {
- ArrInfoReader arrInfoReader{translatedCell.arrInfo};
- int depth = 0;
-
- // Avoid allocating memory for this stack except in the rare case of deeply nested
- // documents.
- std::stack<bool, absl::InlinedVector<bool, 64>> inArray;
- while (arrInfoReader.moreExplicitComponents()) {
- switch (arrInfoReader.takeNextChar()) {
- case '{': {
- // We consider as nested only the arrays that are elements of other arrays. When
- // there is an array of objects and some of the fields of these objects are
- // arrays, the latter aren't nested.
- inArray.push(false);
- break;
- }
- case '[': {
- // A '[' can be followed by a number if there are objects in the array, that
- // should be retrieved from other paths when reconstructing the record. We can
- // ignore them as they don't contribute to the values.
- (void)arrInfoReader.takeNumber();
- if (!inArray.empty() && inArray.top()) {
- depth++;
- }
- inArray.push(true);
- break;
- }
- case '+': {
- // Indicates elements in arrays that are objects that don't have the path. These
- // objects don't contribute to the cell's values, so we can ignore them.
- (void)arrInfoReader.takeNumber();
- break;
- }
- case '|': {
- auto repeats = arrInfoReader.takeNumber();
- for (size_t i = 0; i < repeats + 1; i++) {
- auto [tag, val] = translatedCell.nextValue();
- if (depth == 0) {
- _filterInputAccessors[filterIndex].reset(tag, val);
- if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) {
- return true;
- }
- }
- }
- break;
- }
- case 'o': {
- // Indicates the start of a nested object inside the cell. We don't need to
- // track this info because the nested objects don't contribute to the values in
- // the cell.
- (void)arrInfoReader.takeNumber();
- break;
- }
- case ']': {
- invariant(inArray.size() > 0 && inArray.top());
- inArray.pop();
- if (inArray.size() > 0 && inArray.top()) {
- invariant(depth > 0);
- depth--;
- }
-
- // Closing an array implicitly closes all objects on the path between it and the
- // previous array.
- while (inArray.size() > 0 && !inArray.top()) {
- inArray.pop();
- }
- }
- }
- }
- if (depth == 0) {
- // For the remaining values the depth isn't going to change so we don't need to advance
- // the value iterator if the values are too deep.
- while (translatedCell.moreValues()) {
- auto [tag, val] = translatedCell.nextValue();
- _filterInputAccessors[filterIndex].reset(tag, val);
- if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) {
- return true;
- }
- }
+ while (cellCursor.hasNext()) {
+ auto [val, depth] = cellCursor.nextValue();
+ if (depth > 0)
+ continue;
+
+ _filterInputAccessors[filterIndex].reset(val->first, val->second);
+ if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) {
+ return true;
}
}
-
- // None of the values matched the filter.
return false;
}
diff --git a/src/mongo/db/exec/sbe/values/columnar.h b/src/mongo/db/exec/sbe/values/columnar.h
index 01aef2acbce..ec0fb5f07c8 100644
--- a/src/mongo/db/exec/sbe/values/columnar.h
+++ b/src/mongo/db/exec/sbe/values/columnar.h
@@ -39,56 +39,6 @@
*/
namespace mongo::sbe {
-/*
- * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of
- * individual components.
- */
-class ArrInfoReader {
-public:
- explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {}
-
- char nextChar() const {
- if (_offsetInArrInfo == _arrInfo.size()) {
- // Reaching the end of the array info means an unlimited number of '|'s.
- return '|';
- }
- return _arrInfo[_offsetInArrInfo];
- }
-
- char takeNextChar() {
- if (_offsetInArrInfo == _arrInfo.size()) {
- // Reaching the end of the array info means an unlimited number of '|'s.
- return '|';
- }
- return _arrInfo[_offsetInArrInfo++];
- }
-
- size_t takeNumber() {
- return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo);
- }
-
- bool empty() const {
- return _arrInfo.empty();
- }
-
- /*
- * Returns whether more explicit components are yet to be consumed. Since array info logically
- * ends with an infinite stream of |, this function indicates whether there are more components
- * which are physically present to be read, not including the infinite sequence of |.
- */
- bool moreExplicitComponents() const {
- return _offsetInArrInfo < _arrInfo.size();
- }
-
- StringData rawArrInfo() const {
- return _arrInfo;
- }
-
-private:
- StringData _arrInfo;
- size_t _offsetInArrInfo = 0;
-};
-
/**
* Represents a cell in a columnar index with ability to retrieve values in an SBE native format.
*/
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index 995b7e15755..9db8ca7d1bb 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -30,6 +30,7 @@
#pragma once
#include <boost/optional/optional.hpp>
+#include <stack>
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
@@ -382,6 +383,56 @@ protected:
std::shared_ptr<Ident> _ident;
};
+/*
+ * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of
+ * individual components.
+ */
+class ArrInfoReader {
+public:
+ explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {}
+
+ char nextChar() const {
+ if (_offsetInArrInfo == _arrInfo.size()) {
+ // Reaching the end of the array info means an unlimited number of '|'s.
+ return '|';
+ }
+ return _arrInfo[_offsetInArrInfo];
+ }
+
+ char takeNextChar() {
+ if (_offsetInArrInfo == _arrInfo.size()) {
+ // Reaching the end of the array info means an unlimited number of '|'s.
+ return '|';
+ }
+ return _arrInfo[_offsetInArrInfo++];
+ }
+
+ size_t takeNumber() {
+ return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo);
+ }
+
+ bool empty() const {
+ return _arrInfo.empty();
+ }
+
+ /*
+ * Returns whether more explicit components are yet to be consumed. Since array info logically
+ * ends with an infinite stream of |, this function indicates whether there are more components
+ * which are physically present to be read, not including the infinite sequence of |.
+ */
+ bool moreExplicitComponents() const {
+ return _offsetInArrInfo < _arrInfo.size();
+ }
+
+ StringData rawArrInfo() const {
+ return _arrInfo;
+ }
+
+private:
+ StringData _arrInfo;
+ size_t _offsetInArrInfo = 0;
+};
+
struct SplitCellView {
StringData arrInfo; // rawData() is 1-past-end of range starting with firstValuePtr.
const char* firstValuePtr = nullptr;
@@ -423,6 +474,108 @@ struct SplitCellView {
return Cursor<ValueEncoder>{firstValuePtr, arrInfo.rawData(), valEncoder};
}
+ template <class ValueEncoder>
+ struct CursorWithArrayDepth {
+ CursorWithArrayDepth(const char* elemPtr,
+ const StringData& arrayInfo,
+ ValueEncoder* encoder)
+ : elemPtr(elemPtr),
+ end(arrayInfo.rawData()),
+ encoder(encoder),
+ arrInfoReader(arrayInfo) {}
+
+ bool hasNext() const {
+ return elemPtr < end;
+ }
+
+ using Out = typename std::remove_reference_t<ValueEncoder>::Out;
+ /**
+ * Returns the next value with its array-nestedness level.
+ */
+ std::pair<Out /*decoded value*/, int /*depth*/> nextValue() {
+ invariant(elemPtr < end, "The client should have checked 'hasNext()'");
+
+ // The expected most common case: implicit tail of values at the same depth.
+ if (!arrInfoReader.moreExplicitComponents()) {
+ return {decodeAndAdvance(elemPtr, *encoder), depth};
+ }
+
+ // The next expected most common case: a range of values at the same depth.
+ if (repeats > 0) {
+ repeats--;
+ return {decodeAndAdvance(elemPtr, *encoder), depth};
+ }
+
+ // An end of a range means we have to check for structural changes and update depth.
+ while (arrInfoReader.moreExplicitComponents()) {
+ switch (arrInfoReader.takeNextChar()) {
+ case '[': {
+ // A '[' can be followed by a number if there are objects in the array,
+ // that should be retrieved from other paths when reconstructing the
+ // record. We can ignore them as they don't contribute to the values.
+ (void)arrInfoReader.takeNumber();
+ if (!inArray.empty() && inArray.top()) {
+ depth++;
+ }
+ inArray.push(true);
+ break;
+ }
+ case '|': {
+ repeats = arrInfoReader.takeNumber();
+ return {decodeAndAdvance(elemPtr, *encoder), depth};
+ }
+ case '{': {
+ // We consider as nested only the arrays that are elements of other
+ // arrays. When there is an array of objects and some of the fields of
+ // these objects are arrays, the latter aren't nested.
+ inArray.push(false);
+ break;
+ }
+ case ']': {
+ invariant(inArray.size() > 0 && inArray.top());
+ inArray.pop();
+ if (inArray.size() > 0 && inArray.top()) {
+ invariant(depth > 0);
+ depth--;
+ }
+
+ // Closing an array implicitly closes all objects on the path between it
+ // and the previous array.
+ while (inArray.size() > 0 && !inArray.top()) {
+ inArray.pop();
+ }
+ break;
+ }
+ case '+': {
+ // Indicates elements in arrays that are objects that don't have the
+ // path. These objects don't contribute to the cell's values, so we can
+ // ignore them.
+ (void)arrInfoReader.takeNumber();
+ break;
+ }
+ case 'o': {
+ // Indicates the start of a nested object inside the cell. We don't need
+ // to track this info because the nested objects don't contribute to the
+ // values in the cell.
+ (void)arrInfoReader.takeNumber();
+ break;
+ }
+ }
+ }
+
+ // Start consuming the implicit tail range.
+ return {decodeAndAdvance(elemPtr, *encoder), depth};
+ }
+
+ const char* elemPtr;
+ const char* end;
+ ValueEncoder* encoder;
+ ArrInfoReader arrInfoReader;
+ int depth = 0;
+ std::stack<bool, absl::InlinedVector<bool, 64>> inArray;
+ size_t repeats = 0;
+ };
+
static SplitCellView parse(CellView cell) {
using Bytes = ColumnStore::Bytes;
using TinySize = ColumnStore::Bytes::TinySize;