Revert "SERVER-67336 Per-path filters"

This reverts commit ede6b2adff822dd767cef40c5e1351ee8ed27aaa.
author: Uladzimir Makouski <uladzimir.makouski@mongodb.com> 2022-08-05 10:39:49 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-08-05 13:51:17 +0000
commit: bc1652aba351de2c4596a63dfa3abe6849fd56f3 (patch)
tree: c619f0fdeb13406fa4feb0828b3f29deb0e45df2 /src/mongo/db
parent: bddf4693d7957adc09f0306dfd43508ba226d0c3 (diff)
download: mongo-bc1652aba351de2c4596a63dfa3abe6849fd56f3.tar.gz
5 files changed, 86 insertions, 370 deletions
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 35e023bfac3..a003515427f 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -52,12 +52,10 @@ TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView)
 ColumnScanStage::ColumnScanStage(UUID collectionUuid,
                                  StringData columnIndexName,
                                  std::vector<std::string> paths,
-                                 std::vector<bool> includeInOutput,
                                  boost::optional<value::SlotId> recordIdSlot,
                                  boost::optional<value::SlotId> reconstuctedRecordSlot,
                                  value::SlotId rowStoreSlot,
                                  std::unique_ptr<EExpression> rowStoreExpr,
-                                 std::vector<PathFilter> filteredPaths,
                                  PlanYieldPolicy* yieldPolicy,
                                  PlanNodeId nodeId,
                                  bool participateInTrialRunTracking)
@@ -65,39 +63,26 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid,
       _collUuid(collectionUuid),
       _columnIndexName(columnIndexName),
       _paths(std::move(paths)),
-      _includeInOutput(std::move(includeInOutput)),
       _recordIdSlot(recordIdSlot),
       _reconstructedRecordSlot(reconstuctedRecordSlot),
       _rowStoreSlot(rowStoreSlot),
-      _rowStoreExpr(std::move(rowStoreExpr)),
-      _filteredPaths(std::move(filteredPaths)) {
-    invariant(_filteredPaths.size() <= _paths.size(),
-              "Filtered paths should be a subset of all paths");
-    invariant(_paths.size() == _includeInOutput.size());
-}
+      _rowStoreExpr(std::move(rowStoreExpr)) {}
 
 std::unique_ptr<PlanStage> ColumnScanStage::clone() const {
-    std::vector<PathFilter> filteredPaths;
-    for (const auto& fp : _filteredPaths) {
-        filteredPaths.emplace_back(fp.pathIndex, fp.filterExpr->clone(), fp.inputSlotId);
-    }
+    std::vector<std::unique_ptr<EExpression>> pathExprs;
     return std::make_unique<ColumnScanStage>(_collUuid,
                                              _columnIndexName,
                                              _paths,
-                                             _includeInOutput,
                                              _recordIdSlot,
                                              _reconstructedRecordSlot,
                                              _rowStoreSlot,
                                              _rowStoreExpr ? _rowStoreExpr->clone() : nullptr,
-                                             std::move(filteredPaths),
                                              _yieldPolicy,
                                              _commonStats.nodeId,
                                              _participateInTrialRunTracking);
 }
 
 void ColumnScanStage::prepare(CompileCtx& ctx) {
-    ctx.root = this;
-
     if (_reconstructedRecordSlot) {
         _reconstructedRecordAccessor = std::make_unique<value::OwnedValueAccessor>();
     }
@@ -107,19 +92,10 @@ void ColumnScanStage::prepare(CompileCtx& ctx) {
 
     _rowStoreAccessor = std::make_unique<value::OwnedValueAccessor>();
     if (_rowStoreExpr) {
+        ctx.root = this;
         _rowStoreExprCode = _rowStoreExpr->compile(ctx);
     }
 
-    _filterInputAccessors.resize(_filteredPaths.size());
-    for (size_t idx = 0; idx < _filterInputAccessors.size(); ++idx) {
-        auto slot = _filteredPaths[idx].inputSlotId;
-        auto [it, inserted] = _filterInputAccessorsMap.emplace(slot, &_filterInputAccessors[idx]);
-        uassert(6610212, str::stream() << "duplicate slot: " << slot, inserted);
-    }
-    for (auto& filteredPath : _filteredPaths) {
-        _filterExprsCode.emplace_back(filteredPath.filterExpr->compile(ctx));
-    }
-
     tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll);
     std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid);
 
@@ -144,23 +120,12 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId
     if (_rowStoreSlot == slot) {
         return _rowStoreAccessor.get();
     }
-
-    if (auto it = _filterInputAccessorsMap.find(slot); it != _filterInputAccessorsMap.end()) {
-        return it->second;
-    }
-
     return ctx.getAccessor(slot);
 }
 
 void ColumnScanStage::doSaveState(bool relinquishCursor) {
-    if (_denseColumnCursor) {
-        _denseColumnCursor->makeOwned();
-        _denseColumnCursor->cursor().save();
-    }
-
     for (auto& cursor : _columnCursors) {
         cursor.makeOwned();
-        cursor.cursor().save();
     }
 
     if (_rowStoreCursor && relinquishCursor) {
@@ -171,6 +136,9 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) {
         _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor);
     }
 
+    for (auto& cursor : _columnCursors) {
+        cursor.cursor().save();
+    }
     for (auto& [path, cursor] : _parentPathCursors) {
         cursor->cursor().saveUnpositioned();
     }
@@ -202,9 +170,6 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
         }
     }
 
-    if (_denseColumnCursor) {
-        _denseColumnCursor->cursor().restore();
-    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().restore();
     }
@@ -217,9 +182,6 @@ void ColumnScanStage::doDetachFromOperationContext() {
     if (_rowStoreCursor) {
         _rowStoreCursor->detachFromOperationContext();
     }
-    if (_denseColumnCursor) {
-        _denseColumnCursor->cursor().detachFromOperationContext();
-    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().detachFromOperationContext();
     }
@@ -232,9 +194,6 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) {
     if (_rowStoreCursor) {
         _rowStoreCursor->reattachToOperationContext(opCtx);
     }
-    if (_denseColumnCursor) {
-        _denseColumnCursor->cursor().reattachToOperationContext(opCtx);
-    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().reattachToOperationContext(opCtx);
     }
@@ -288,30 +247,24 @@ void ColumnScanStage::open(bool reOpen) {
 
         auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod());
 
-        // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377:
-        // eventually we can avoid including this column for the cases where a known dense column
-        // such as _id is being read anyway).
-        if (_filteredPaths.empty()) {
-            _denseColumnCursor = std::make_unique<ColumnCursor>(
-                iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
-                _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(),
-                                                        false /*includeInOutput*/));
-        }
-        for (size_t i = 0; i < _paths.size(); i++) {
-            _columnCursors.emplace_back(
-                iam->storage()->newCursor(_opCtx, _paths[i]),
-                _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i]));
+        // Eventually we can not include this column for the cases where a known dense column (_id)
+        // is being read anyway.
+
+        // Add a stats struct that will be shared by overall ColumnScanStats and individual
+        // cursor.
+        _columnCursors.emplace_back(
+            iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
+            _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), false));
+
+        for (auto&& path : _paths) {
+            _columnCursors.emplace_back(iam->storage()->newCursor(_opCtx, path),
+                                        _specificStats.cursorStats.emplace_back(path, true));
         }
     }
 
-    // Set the cursors.
-    if (_denseColumnCursor) {
-        _denseColumnCursor->seekAtOrPast(RecordId());
-    }
     for (auto& columnCursor : _columnCursors) {
         columnCursor.seekAtOrPast(RecordId());
     }
-    _recordId = _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns();
 
     _open = true;
 }
@@ -370,152 +323,6 @@ void ColumnScanStage::readParentsIntoObj(StringData path,
     }
 }
 
-// The result of the filter predicate will be the same regardless of sparseness or sub objects,
-// therefore we don't look at the parents and don't consult the row store.
-//
-// (TODO SERVER-68285) Currently, the per-path predicates expect an object to run on, so we create
-// one. This is very inefficient (profiles show considerable time spent under Object::push_back) and
-// should be replaced with predicates that run directly on values. The fact that the implementation
-// of the filter depends on the implementation of the expressions passed to the stage indicates a
-// tight coupling. Unfortunately, this dependency can only be discovered at runtime.
-bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) {
-    auto [tag, val] = value::makeNewObject();
-    value::ValueGuard materializedObjGuard(tag, val);
-    auto& obj = *value::bitcastTo<value::Object*>(val);
-
-    auto splitCellView = SplitCellView::parse(cell);
-    auto translatedCell = translateCell(path, splitCellView);
-    addCellToObject(translatedCell, obj);
-
-    _filterInputAccessors[filterIndex].reset(true /*owned*/, tag, val);
-    materializedObjGuard.reset();
-    return _bytecode.runPredicate(_filterExprsCode[filterIndex].get());
-}
-
-RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() {
-    invariant(!_filteredPaths.empty());
-
-    // Initialize 'targetRecordId' from the filtered cursor we are currently iterating.
-    RecordId targetRecordId;
-    {
-        auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
-        if (!cursor.lastCell()) {
-            return RecordId();  // Have exhausted one of the columns.
-        }
-        targetRecordId = cursor.lastCell()->rid;
-    }
-
-    size_t matchedSinceAdvance = 0;
-    // The loop will terminate because when 'matchedSinceAdvance' is reset the 'targetRecordId' is
-    // guaranteed to advance. It will do no more than N 'next()' calls across all cursors, where N
-    // is the number of records (might do fewer, if for some columns there are missing values). The
-    // number of seeks and filter checks depends on the selectivity of the filters.
-    while (matchedSinceAdvance < _filteredPaths.size()) {
-        auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
-
-        // Avoid seeking into the column that we started with.
-        auto& result = cursor.lastCell();
-        if (result && result->rid < targetRecordId) {
-            result = cursor.seekAtOrPast(targetRecordId);
-        }
-        if (!result) {
-            return RecordId();
-        }
-
-        if (result->rid > targetRecordId) {
-            // The column skipped ahead - have to restart at this new record ID.
-            matchedSinceAdvance = 0;
-            targetRecordId = result->rid;
-        }
-
-        if (!checkFilter(result->value, _nextUnmatched, cursor.path())) {
-            // Advance the column until find a match and restart at this new record ID.
-            do {
-                result = cursor.next();
-                if (!result) {
-                    return RecordId();
-                }
-            } while (!checkFilter(result->value, _nextUnmatched, cursor.path()));
-            matchedSinceAdvance = 0;
-            invariant(result->rid > targetRecordId);
-            targetRecordId = result->rid;
-        }
-        ++matchedSinceAdvance;
-        _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size();
-    }
-    invariant(!targetRecordId.isNull());
-
-    // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors
-    // might skip ahead, which would mean the column is missing a value for this 'recordId'.
-    for (auto& cursor : _columnCursors) {
-        const auto& result = cursor.lastCell();
-        if (result && result->rid < targetRecordId) {
-            cursor.seekAtOrPast(targetRecordId);
-        }
-    }
-
-    return targetRecordId;
-}
-
-RecordId ColumnScanStage::findMinRecordId() const {
-    if (_denseColumnCursor) {
-        // The cursor of the dense column cannot be ahead of any other, so it's always at the
-        // minimum.
-        auto& result = _denseColumnCursor->lastCell();
-        if (!result) {
-            return RecordId();
-        }
-        return result->rid;
-    }
-
-    auto recordId = RecordId();
-    for (const auto& cursor : _columnCursors) {
-        const auto& result = cursor.lastCell();
-        if (result && (recordId.isNull() || result->rid < recordId)) {
-            recordId = result->rid;
-        }
-    }
-    return recordId;
-}
-
-RecordId ColumnScanStage::advanceCursors() {
-    if (!_filteredPaths.empty()) {
-        // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized by
-        // 'findNextRecordIdForFilteredColumns()'.
-        cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next();
-        return findNextRecordIdForFilteredColumns();
-    }
-
-    // In absence of filters all cursors iterate forward on their own. Some of the cursors might be
-    // ahead of the current '_recordId' because there are gaps in their columns - don't move them
-    // but only those that are at '_recordId' and therefore their values have been consumed. While
-    // at it, compute the new min record ID.
-    auto nextRecordId = RecordId();
-    if (_denseColumnCursor) {
-        invariant(_denseColumnCursor->lastCell()->rid == _recordId,
-                  "Dense cursor should always be at the current minimum record ID");
-        auto cell = _denseColumnCursor->next();
-        if (!cell) {
-            return RecordId();
-        }
-        nextRecordId = cell->rid;
-    }
-    for (auto& cursor : _columnCursors) {
-        auto& cell = cursor.lastCell();
-        if (!cell) {
-            continue;  // this column has been exhausted
-        }
-        if (cell->rid == _recordId) {
-            cell = cursor.next();
-        }
-        if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) {
-            invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID");
-            nextRecordId = cell->rid;
-        }
-    }
-    return nextRecordId;
-}
-
 PlanState ColumnScanStage::getNext() {
     auto optTimer(getOptTimer(_opCtx));
 
@@ -525,32 +332,35 @@ PlanState ColumnScanStage::getNext() {
 
     checkForInterrupt(_opCtx);
 
+    // Find minimum record ID of all column cursors.
+    _recordId = RecordId();
+    for (auto& cursor : _columnCursors) {
+        auto& result = cursor.lastCell();
+        if (result && (_recordId.isNull() || result->rid < _recordId)) {
+            _recordId = result->rid;
+        }
+    }
+
     if (_recordId.isNull()) {
         return trackPlanState(PlanState::IS_EOF);
     }
 
-    bool useRowStore = false;
-
     auto [outTag, outVal] = value::makeNewObject();
     auto& outObj = *value::bitcastTo<value::Object*>(outVal);
     value::ValueGuard materializedObjGuard(outTag, outVal);
 
     StringDataSet pathsRead;
+    bool useRowStore = false;
     for (size_t i = 0; i < _columnCursors.size(); ++i) {
-        if (!_includeInOutput[i]) {
-            continue;
-        }
-        auto& cursor = _columnCursors[i];
-        auto& lastCell = cursor.lastCell();
+        auto& lastCell = _columnCursors[i].lastCell();
+        const auto& path = _columnCursors[i].path();
 
         boost::optional<SplitCellView> splitCellView;
         if (lastCell && lastCell->rid == _recordId) {
             splitCellView = SplitCellView::parse(lastCell->value);
         }
 
-        const auto& path = cursor.path();
-
-        if (!useRowStore) {
+        if (_columnCursors[i].includeInOutput() && !useRowStore) {
             if (splitCellView &&
                 (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) {
                 useRowStore = true;
@@ -566,6 +376,10 @@ PlanState ColumnScanStage::getNext() {
                 }
             }
         }
+
+        if (splitCellView) {
+            _columnCursors[i].next();
+        }
     }
 
     if (useRowStore) {
@@ -609,8 +423,6 @@ PlanState ColumnScanStage::getNext() {
         _tracker = nullptr;
         uasserted(ErrorCodes::QueryTrialRunCompleted, "Trial run early exit in scan");
     }
-
-    _recordId = advanceCursors();
     return trackPlanState(PlanState::ADVANCED);
 }
 
@@ -695,31 +507,6 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const {
     }
     ret.emplace_back(DebugPrinter::Block("`]"));
 
-    // Print out per-path filters (if any).
-    if (!_filteredPaths.empty()) {
-        ret.emplace_back(DebugPrinter::Block("[`"));
-        for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) {
-            if (idx) {
-                ret.emplace_back(DebugPrinter::Block("`;"));
-            }
-
-            ret.emplace_back(str::stream()
-                             << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": ");
-            DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId);
-            ret.emplace_back(DebugPrinter::Block("`,"));
-            DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint());
-        }
-        ret.emplace_back(DebugPrinter::Block("`]"));
-    }
-
-    if (_rowStoreExpr) {
-        ret.emplace_back(DebugPrinter::Block("[`"));
-        DebugPrinter::addIdentifier(ret, _rowStoreSlot);
-        ret.emplace_back(DebugPrinter::Block("`,"));
-        DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint());
-        ret.emplace_back(DebugPrinter::Block("`]"));
-    }
-
     ret.emplace_back("@\"`");
     DebugPrinter::addIdentifier(ret, _collUuid.toString());
     ret.emplace_back("`\"");
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index 7d30152f46d..94bc8fa4034 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -41,40 +41,24 @@ namespace sbe {
 /**
  * A stage that scans provided columnar index.
  *
- * Currently the stage produces an object into the 'reconstructedRecordSlot' such that accessing any
- * of the given paths in it would be equivalent to accessing the paths in the corresponding object
- * from the associated row store. In the future the stage will be extended to produce separate
- * outputs for each path without materializing this intermediate object unless requested by the
- * client.
+ * Currently the stage produces an object into the 'recordSlot' such that accessing any of the given
+ * paths in it would be equivalent to accessing the paths in the corresponding object from the
+ * associated row store. In the future the stage will be extended to produce separate outputs for
+ * each path without materializing this intermediate object unless requested by the client.
  *
  * Debug string representation:
  *
- *  COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n]
- *              [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]?
- *              collectionUuid indexName
+ *  COLUMN_SCAN recordSlot|none recordIdSlot|none [path_1, ..., path_n] collectionUuid indexName
  */
 class ColumnScanStage final : public PlanStage {
 public:
-    struct PathFilter {
-        size_t pathIndex;  // index into the paths array the stage will be using
-        std::unique_ptr<EExpression> filterExpr;
-        value::SlotId inputSlotId;
-
-        PathFilter(size_t pathIndex,
-                   std::unique_ptr<EExpression> filterExpr,
-                   value::SlotId inputSlotId)
-            : pathIndex(pathIndex), filterExpr(std::move(filterExpr)), inputSlotId(inputSlotId) {}
-    };
-
     ColumnScanStage(UUID collectionUuid,
                     StringData columnIndexName,
                     std::vector<std::string> paths,
-                    std::vector<bool> includeInOutput,
                     boost::optional<value::SlotId> recordIdSlot,
                     boost::optional<value::SlotId> reconstructedRecordSlot,
                     value::SlotId rowStoreSlot,
                     std::unique_ptr<EExpression> rowStoreExpr,
-                    std::vector<PathFilter> filteredPaths,
                     PlanYieldPolicy* yieldPolicy,
                     PlanNodeId planNodeId,
                     bool participateInTrialRunTracking = true);
@@ -170,9 +154,6 @@ private:
         boost::optional<FullCellView>& lastCell() {
             return _lastCell;
         }
-        const boost::optional<FullCellView>& lastCell() const {
-            return _lastCell;
-        }
 
         size_t numNexts() const {
             return _stats.numNexts;
@@ -203,21 +184,6 @@ private:
 
     void readParentsIntoObj(StringData path, value::Object* out, StringDataSet* pathsReadSetOut);
 
-    bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path);
-
-    // Finds the smallest record ID such that:
-    // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call;
-    // 2) the record with this ID passes the filters of all filtered columns.
-    // Ensures that the cursors are set to this record ID unless it's missing in the column (which
-    // is only possible for the non-filtered columns).
-    RecordId findNextRecordIdForFilteredColumns();
-
-    // Finds the lowest record ID across all cursors. Doesn't move any of the cursors.
-    RecordId findMinRecordId() const;
-
-    // Move cursors to the next record to be processed.
-    RecordId advanceCursors();
-
     // The columnar index this stage is scanning and the associated row store collection.
     const UUID _collUuid;
     const std::string _columnIndexName;
@@ -226,16 +192,13 @@ private:
     boost::optional<uint64_t> _catalogEpoch;     // and are not changed afterwards.
     std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry;
 
-    // Paths to be read from the index. '_includeInOutput' defines which of the fields should be
-    // included into the reconstructed record and the order of paths in '_paths' defines the
-    // orderding of the fields. The two vectors should have the same size. NB: No paths is possible
-    // when no filters are used and only constant computed columns are projected. In this case only
-    // the dense record ID column will be read.
+    // Paths to be read from the index.
     const std::vector<std::string> _paths;
-    const std::vector<bool> _includeInOutput;
 
     // The record id in the row store that is used to connect the per-path entries in the columnar
-    // index and to retrieve the full record from the row store, if necessary.
+    // index and to retrieve the full record from the row store, if necessary. Because we put into
+    // the slot the address of record id, we must guarantee that its lifetime is as long as the
+    // stage's.
     RecordId _recordId;
     const boost::optional<value::SlotId> _recordIdSlot;
 
@@ -255,32 +218,17 @@ private:
     const value::SlotId _rowStoreSlot;
     const std::unique_ptr<EExpression> _rowStoreExpr;
 
-    // Per path filters. The slots must be allocated by the client but downstream stages must not
-    // read from them. Multiple filters form a conjunction where each branch of the AND only passes
-    // when a value exists. Empty '_filteredPaths' means there are no filters.
-    const std::vector<PathFilter> _filteredPaths;
-    ColumnCursor& cursorForFilteredPath(const PathFilter& fp) {
-        return _columnCursors[fp.pathIndex];
-    }
-    size_t _nextUnmatched = 0;  // used when searching for the next matching record
-
     std::unique_ptr<value::OwnedValueAccessor> _reconstructedRecordAccessor;
     std::unique_ptr<value::OwnedValueAccessor> _recordIdAccessor;
     std::unique_ptr<value::OwnedValueAccessor> _rowStoreAccessor;
-    std::vector<value::OwnedValueAccessor> _filterInputAccessors;
-    value::SlotAccessorMap _filterInputAccessorsMap;
 
     vm::ByteCode _bytecode;
     std::unique_ptr<vm::CodeFragment> _rowStoreExprCode;
-    std::vector<std::unique_ptr<vm::CodeFragment>> _filterExprsCode;
 
-    // Cursors to simultaneously read from the sections of the index for each path.
+    // Cursors to simultaneously read from the sections of the index for each path (and, possibly,
+    // auxiliary sections) and from the row store.
     std::vector<ColumnCursor> _columnCursors;
     StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors;
-    // Dense column contains record ids for all records. It is necessary to support projection
-    // semantics for missing values on paths.
-    std::unique_ptr<ColumnCursor> _denseColumnCursor;
-    // Cursor into the associated row store.
     std::unique_ptr<SeekableRecordCursor> _rowStoreCursor;
 
     bool _open{false};
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 79ccf3ecfa0..9f33ae1c38b 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -323,7 +323,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
     // TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
     std::unique_ptr<MatchExpression> residualPredicate;
     StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn;
-    if (params.options) {
+    if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) {
         std::tie(filterSplitByColumn, residualPredicate) =
             expression::splitMatchExpressionForColumns(query.root());
     } else {
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 0f8c4048176..68f0d5a6c7f 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -809,4 +809,31 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde
         }
     })");
 }
+
+TEST_F(QueryPlannerColumnarTest, FullPredicateOption) {
+    params.columnStoreIndexes.emplace_back(kIndexName);
+
+    // Filter that could be pushed down, but isn't due to the lack of the
+    // GENERATE_PER_COLUMN_FILTER flag.
+    auto predicate = fromjson(R"({
+        specialAddress: {$exists: true},
+        doNotContact: {$exists: true}
+    })");
+    runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
+    assertSolutionExists(R"({
+        proj: {
+            spec: {a: 1, _id: 0},
+            node: {
+                column_scan: {
+                    outputFields: ['a'],
+                    matchFields: ['specialAddress', 'doNotContact'],
+                    postAssemblyFilter: {
+                        specialAddress: {$exists: true},
+                        doNotContact: {$exists: true}
+                    }
+                }
+            }
+        }
+    })");
+}
 }  // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index aedddb6ab02..e70f1e80cac 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -726,6 +726,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
             "'postAssemblyFilter' to be used instead.",
             !csn->filter);
 
+    tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty());
+
     PlanStageSlots outputs;
 
     auto reconstructedRecordSlot = _slotIdGenerator.generate();
@@ -766,63 +768,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
     auto abt = builder.generateABT();
     auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone();
 
-    // Get all the paths but make sure "_id" comes first (the order of paths given to the
-    // column_scan stage defines the order of fields in the reconstructed record).
-    std::vector<std::string> paths;
-    paths.reserve(csn->allFields.size());
-    if (csn->allFields.find("_id") != csn->allFields.end()) {
-        paths.push_back("_id");
-    }
-    for (const auto& path : csn->allFields) {
-        if (path != "_id") {
-            paths.push_back(path);
-        }
-    }
-
-    // Identify the filtered columns, if any, and create slots/expressions for them.
-    std::vector<sbe::ColumnScanStage::PathFilter> filteredPaths;
-    filteredPaths.reserve(csn->filtersByPath.size());
-    for (size_t i = 0; i < paths.size(); i++) {
-        auto itFilter = csn->filtersByPath.find(paths[i]);
-        if (itFilter != csn->filtersByPath.end()) {
-            auto filterInputSlot = _slotIdGenerator.generate();
-
-            // TODO SERVER-68285: use native SBE expression instead of the classic matcher.
-            auto expr = makeFunction("applyClassicMatcher",
-                                     makeConstant(sbe::value::TypeTags::classicMatchExpresion,
-                                                  sbe::value::bitcastFrom<const MatchExpression*>(
-                                                      itFilter->second->shallowClone().release())),
-                                     makeVariable(filterInputSlot));
-
-            filteredPaths.emplace_back(i, std::move(expr), filterInputSlot);
-        }
-    }
-
-    // Tag which of the paths should be included into the output.
-    DepsTracker residual;
-    if (csn->postAssemblyFilter) {
-        csn->postAssemblyFilter->addDependencies(&residual);
-    }
-    std::vector<bool> includeInOutput(paths.size(), false);
-    for (size_t i = 0; i < paths.size(); i++) {
-        if (csn->outputFields.find(paths[i]) != csn->outputFields.end() ||
-            residual.fields.find(paths[i]) != residual.fields.end()) {
-            includeInOutput[i] = true;
-        }
-    }
-
-    std::unique_ptr<sbe::PlanStage> stage =
-        std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(),
-                                               csn->indexEntry.catalogName,
-                                               std::move(paths),
-                                               std::move(includeInOutput),
-                                               ridSlot,
-                                               reconstructedRecordSlot,
-                                               rowStoreSlot,
-                                               std::move(rowStoreExpr),
-                                               std::move(filteredPaths),
-                                               _yieldPolicy,
-                                               csn->nodeId());
+    std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(
+        getCurrentCollection(reqs)->uuid(),
+        csn->indexEntry.catalogName,
+        std::vector<std::string>{csn->allFields.begin(), csn->allFields.end()},
+        ridSlot,
+        reconstructedRecordSlot,
+        rowStoreSlot,
+        std::move(rowStoreExpr),
+        _yieldPolicy,
+        csn->nodeId());
 
     // Generate post assembly filter.
     if (csn->postAssemblyFilter) {
@@ -838,7 +793,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
                                                csn->nodeId());
         stage = std::move(outputStage.stage);
     }
-
     return {std::move(stage), std::move(outputs)};
 }
author	Uladzimir Makouski <uladzimir.makouski@mongodb.com>	2022-08-05 10:39:49 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-08-05 13:51:17 +0000
commit	bc1652aba351de2c4596a63dfa3abe6849fd56f3 (patch)
tree	c619f0fdeb13406fa4feb0828b3f29deb0e45df2 /src/mongo/db
parent	bddf4693d7957adc09f0306dfd43508ba226d0c3 (diff)
download	mongo-bc1652aba351de2c4596a63dfa3abe6849fd56f3.tar.gz