SERVER-66102 Change column scan stage to read from true column index

author: Ian Boros <ian.boros@mongodb.com> 2022-05-03 19:18:43 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-05-11 23:03:48 +0000
commit: 83ae5393284912aded0087fd2cdf79edff6f999f (patch)
tree: f5b57ad8688a00fccedfb77883bcbf5b0aeeb100 /src
parent: 51bff7a1d0145afebb57a04e82bd962985801f06 (diff)
download: mongo-83ae5393284912aded0087fd2cdf79edff6f999f.tar.gz
13 files changed, 370 insertions, 396 deletions
diff --git a/src/mongo/db/exec/fake_column_cursor.h b/src/mongo/db/exec/fake_column_cursor.h
deleted file mode 100644
index 7a99381cf9a..00000000000
--- a/src/mongo/db/exec/fake_column_cursor.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/exec/sbe/values/bson.h"
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/index/column_key_generator.h"
-#include "mongo/db/storage/column_store.h"
-
-namespace mongo::sbe {
-
-struct FakeCell {
-    FakeCell(RecordId ridIn,
-             std::string arrayInfoIn,
-             std::vector<value::TypeTags> tagsIn,
-             std::vector<value::Value> valuesIn,
-             bool hasDuplicateFieldsIn,
-             bool hasSubPathsIn,
-             bool isSparseIn,
-             bool hasDoubleNestedArraysIn)
-        : rid(ridIn),
-          arrayInfo(std::move(arrayInfoIn)),
-          tags(std::move(tagsIn)),
-          vals(std::move(valuesIn)),
-          hasDuplicateFields(hasDuplicateFieldsIn),
-          hasSubPaths(hasSubPathsIn),
-          isSparse(isSparseIn),
-          hasDoubleNestedArrays(hasDoubleNestedArraysIn) {}
-    ~FakeCell() {
-        for (size_t i = 0; i < tags.size(); ++i) {
-            value::releaseValue(tags[i], vals[i]);
-        }
-    }
-    FakeCell(const FakeCell&) = delete;
-    FakeCell(FakeCell&&) = default;
-
-    FakeCell& operator=(const FakeCell&) = delete;
-    FakeCell& operator=(FakeCell&&) = default;
-
-
-    // This really doesn't belong in the cell, but it's convenient to put here.
-    RecordId rid;
-
-    std::string arrayInfo;
-
-    // Owned here.
-    std::vector<value::TypeTags> tags;
-    std::vector<value::Value> vals;
-
-    bool hasDuplicateFields;
-    bool hasSubPaths;
-    bool isSparse;
-    bool hasDoubleNestedArrays;
-};
-
-/**
- * Mock cursor for a single path in a column index. We use this instead of mocking the
- * ColumnStore::Cursor class because that allows traversing through the entire index, which
- * requires knowledge of all fields present across all documents.
- */
-class FakeCursorForPath {
-public:
-    FakeCursorForPath(StringData path, std::unique_ptr<SeekableRecordCursor> cursor)
-        : _path(path.toString()), _cursor(std::move(cursor)) {}
-    boost::optional<FakeCell> next() {
-        if (_eof)
-            return {};
-        return nextUntilResultIsFound(nullptr);
-    }
-    boost::optional<FakeCell> seekAtOrPast(RecordId rid) {
-        auto result = _cursor->seekNear(rid);
-        if (!result) {
-            return {};
-        }
-
-        // seekNear() will return the key directionally _before_ the one requested if the requested
-        // key does not exist.
-        if (result->id < rid) {
-            return nextUntilResultIsFound(nullptr);
-        }
-
-        return nextUntilResultIsFound(&*result);
-    }
-    boost::optional<FakeCell> seekExact(RecordId rid) {
-        auto res = _cursor->seekExact(rid);
-        if (res) {
-            if (auto cell = extractCellForRecord(*res)) {
-                return cell;
-            }
-        }
-        return {};
-    }
-
-    void save() {
-        if (_eof) {
-            return saveUnpositioned();
-        }
-        _cursor->save();
-    }
-    void saveUnpositioned() {
-        _eof = true;
-        _cursor->saveUnpositioned();
-    }
-
-    void restore() {
-        _cursor->restore();
-    }
-
-    void detachFromOperationContext() {
-        _cursor->detachFromOperationContext();
-    }
-    void reattachToOperationContext(OperationContext* opCtx) {
-        _cursor->reattachToOperationContext(opCtx);
-    }
-
-    const PathValue& path() const {
-        return _path;
-    }
-
-private:
-    boost::optional<FakeCell> extractCellForRecord(Record record) {
-        boost::optional<FakeCell> ret;
-        BSONObj bsonObj(record.data.data());
-
-        column_keygen::visitCellsForInsert(
-            bsonObj, [&](PathView path, const column_keygen::UnencodedCellView& cellView) {
-                if (path != _path) {
-                    return;
-                }
-
-                std::vector<value::TypeTags> tags;
-                std::vector<value::Value> vals;
-
-                for (auto elem : cellView.vals) {
-                    auto [t, v] = bson::convertFrom<false>(elem);
-                    tags.push_back(t);
-                    vals.push_back(v);
-                }
-
-                ret.emplace(record.id,
-                            cellView.arrayInfo.toString(),
-                            std::move(tags),
-                            std::move(vals),
-                            cellView.hasDuplicateFields,
-                            cellView.hasSubPaths,
-                            cellView.isSparse,
-                            cellView.hasDoubleNestedArrays);
-            });
-        return ret;
-    }
-
-    /*
-     * Iterates the underlying cursor until a result is found which contains _path and can be
-     * returned.
-     *
-     * If 'startRecord' is not null, it is assumed that the cursor is positioned at 'startRecord'
-     * and 'startRecord' will be considered as a possible record to return data from.  Otherwise,
-     * the first record considered will be the next result from the cursor.
-     */
-    boost::optional<FakeCell> nextUntilResultIsFound(Record* startRecord) {
-        boost::optional<Record> currentRecord;
-        if (startRecord) {
-            currentRecord = *startRecord;
-        } else {
-            currentRecord = _cursor->next();
-        }
-
-        while (currentRecord) {
-            if (auto ret = extractCellForRecord(*currentRecord)) {
-                return ret;
-            }
-            currentRecord = _cursor->next();
-        }
-
-        return boost::none;
-    }
-    const PathValue _path;
-    bool _eof = false;
-    const std::unique_ptr<SeekableRecordCursor> _cursor;
-};
-}  // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index ef70102a8c0..0cc99871da1 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -32,10 +32,23 @@
 
 #include "mongo/db/exec/sbe/expressions/expression.h"
 #include "mongo/db/exec/sbe/size_estimator.h"
+#include "mongo/db/exec/sbe/values/column_store_encoder.h"
 #include "mongo/db/exec/sbe/values/columnar.h"
+#include "mongo/db/index/columns_access_method.h"
 
 namespace mongo {
 namespace sbe {
+namespace {
+TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView) {
+    value::ColumnStoreEncoder encoder;
+    SplitCellView::Cursor<value::ColumnStoreEncoder> cellCursor =
+        splitCellView.subcellValuesGenerator<value::ColumnStoreEncoder>(std::move(encoder));
+    return TranslatedCell{splitCellView.arrInfo, path, std::move(cellCursor)};
+}
+
+
+}  // namespace
+
 ColumnScanStage::ColumnScanStage(UUID collectionUuid,
                                  StringData columnIndexName,
                                  value::SlotVector fieldSlots,
@@ -84,7 +97,7 @@ void ColumnScanStage::prepare(CompileCtx& ctx) {
 
     for (size_t idx = 0; idx < _outputFields.size(); ++idx) {
         auto [it, inserted] = _outputFieldsMap.emplace(_fieldSlots[idx], &_outputFields[idx]);
-        uassert(6298601, str::stream() << "duplicate slot: " << _fieldSlots[idx], inserted);
+        uassert(6610212, str::stream() << "duplicate slot: " << _fieldSlots[idx], inserted);
     }
 
     if (_recordSlot) {
@@ -104,8 +117,16 @@ void ColumnScanStage::prepare(CompileCtx& ctx) {
         _pathExprsCode.emplace_back(expr->compile(ctx));
     }
 
-    tassert(6298602, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll);
+    tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll);
     std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid);
+
+    auto indexCatalog = _coll->getIndexCatalog();
+    auto indexDesc = indexCatalog->findIndexByName(_opCtx, _columnIndexName);
+    tassert(6610201,
+            str::stream() << "could not find index named '" << _columnIndexName
+                          << "' in collection '" << _collName << "'",
+            indexDesc);
+    _weakIndexCatalogEntry = indexCatalog->getEntryShared(indexDesc);
 }
 
 value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId slot) {
@@ -128,6 +149,10 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId
 }
 
 void ColumnScanStage::doSaveState(bool relinquishCursor) {
+    for (auto& cursor : _columnCursors) {
+        cursor.makeOwned();
+    }
+
     if (_rowStoreCursor && relinquishCursor) {
         _rowStoreCursor->save();
     }
@@ -137,10 +162,10 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) {
     }
 
     for (auto& cursor : _columnCursors) {
-        cursor.cursor->save();
+        cursor.cursor().save();
     }
     for (auto& [path, cursor] : _parentPathCursors) {
-        cursor->save();
+        cursor->saveUnpositioned();
     }
 
     _coll.reset();
@@ -155,9 +180,14 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
         return;
     }
 
-    tassert(6298603, "Catalog epoch should be initialized", _catalogEpoch);
+    tassert(6610202, "Catalog epoch should be initialized", _catalogEpoch);
     _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch);
 
+    auto indexCatalogEntry = _weakIndexCatalogEntry.lock();
+    uassert(ErrorCodes::QueryPlanKilled,
+            str::stream() << "query plan killed :: index '" << _columnIndexName << "' dropped",
+            indexCatalogEntry && !indexCatalogEntry->isDropped());
+
     if (_rowStoreCursor) {
         if (relinquishCursor) {
             const bool couldRestore = _rowStoreCursor->restore();
@@ -166,7 +196,7 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
     }
 
     for (auto& cursor : _columnCursors) {
-        cursor.cursor->restore();
+        cursor.cursor().restore();
     }
     for (auto& [path, cursor] : _parentPathCursors) {
         cursor->restore();
@@ -178,7 +208,7 @@ void ColumnScanStage::doDetachFromOperationContext() {
         _rowStoreCursor->detachFromOperationContext();
     }
     for (auto& cursor : _columnCursors) {
-        cursor.cursor->detachFromOperationContext();
+        cursor.cursor().detachFromOperationContext();
     }
     for (auto& [path, cursor] : _parentPathCursors) {
         cursor->detachFromOperationContext();
@@ -190,7 +220,7 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) {
         _rowStoreCursor->reattachToOperationContext(opCtx);
     }
     for (auto& cursor : _columnCursors) {
-        cursor.cursor->reattachToOperationContext(opCtx);
+        cursor.cursor().reattachToOperationContext(opCtx);
     }
     for (auto& [path, cursor] : _parentPathCursors) {
         cursor->reattachToOperationContext(opCtx);
@@ -214,18 +244,18 @@ void ColumnScanStage::open(bool reOpen) {
     invariant(_opCtx);
 
     if (_open) {
-        tassert(6298604, "reopened ColumnScanStage but reOpen=false", reOpen);
-        tassert(6298605, "ColumnScanStage is open but _coll is not null", _coll);
-        tassert(6298606, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor);
+        tassert(6610203, "reopened ColumnScanStage but reOpen=false", reOpen);
+        tassert(6610204, "ColumnScanStage is open but _coll is not null", _coll);
+        tassert(6610205, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor);
     } else {
-        tassert(6298607, "first open to ColumnScanStage but reOpen=true", !reOpen);
+        tassert(6610206, "first open to ColumnScanStage but reOpen=true", !reOpen);
         if (!_coll) {
             // We're being opened after 'close()'. We need to re-acquire '_coll' in this case and
             // make some validity checks (the collection has not been dropped, renamed, etc.).
             tassert(
-                6298608, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor);
-            tassert(6298609, "Collection name should be initialized", _collName);
-            tassert(6298610, "Catalog epoch should be initialized", _catalogEpoch);
+                6610207, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor);
+            tassert(6610208, "Collection name should be initialized", _collName);
+            tassert(6610209, "Catalog epoch should be initialized", _catalogEpoch);
             _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch);
         }
     }
@@ -235,24 +265,29 @@ void ColumnScanStage::open(bool reOpen) {
     }
 
     if (_columnCursors.empty()) {
-        // We fake the special Row ID column by just using _id, for now.
-        _columnCursors.push_back(ColumnCursor{
-            std::make_unique<FakeCursorForPath>("_id", _coll->getCursor(_opCtx, true)),
-            boost::none,
-            false /* add to document */
-        });
+        auto entry = _weakIndexCatalogEntry.lock();
+        tassert(6610210,
+                str::stream() << "expected IndexCatalogEntry for index named: " << _columnIndexName,
+                static_cast<bool>(entry));
+
+        auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod());
+
+        // Eventually we can not include this column for the cases where a known dense column (_id)
+        // is being read anyway.
+        _columnCursors.push_back(ColumnCursor(iam->storage()->newCursor(_opCtx, "\xFF"_sd),
+                                              false /* add to document */));
 
         for (auto&& path : _paths) {
-            _columnCursors.push_back(ColumnCursor{
-                std::make_unique<FakeCursorForPath>(path, _coll->getCursor(_opCtx, true)),
-                boost::none,
-                true /* add to document */
-            });
+            _columnCursors.push_back(
+                ColumnCursor(iam->storage()->newCursor(_opCtx, path), true /* add to document */));
         }
     }
 
+    for (auto& columnCursor : _columnCursors) {
+        columnCursor.seekAtOrPast(RecordId());
+    }
+
     _open = true;
-    _firstGetNext = true;
 }
 
 void ColumnScanStage::readParentsIntoObj(StringData path,
@@ -282,21 +317,28 @@ void ColumnScanStage::readParentsIntoObj(StringData path,
     // If we inserted a new entry, replace the null with an actual cursor.
     if (inserted) {
         invariant(it->second == nullptr);
-        it->second = std::make_unique<FakeCursorForPath>(*parent, _coll->getCursor(_opCtx, true));
+
+        auto entry = _weakIndexCatalogEntry.lock();
+        tassert(6610211,
+                str::stream() << "expected IndexCatalogEntry for index named: " << _columnIndexName,
+                static_cast<bool>(entry));
+        auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod());
+        it->second = iam->storage()->newCursor(_opCtx, *parent);
     }
 
-    auto optCell = it->second->seekExact(_recordId);
-    pathsReadSetOut->insert(*parent);
+    boost::optional<SplitCellView> splitCellView;
+    if (auto optCell = it->second->seekExact(_recordId)) {
+        splitCellView = SplitCellView::parse(optCell->value);
+    }
 
-    if (!optCell || optCell->isSparse) {
+    pathsReadSetOut->insert(*parent);
+    if (!splitCellView || splitCellView->isSparse) {
         // We need this cell's parent too.
         readParentsIntoObj(*parent, outObj, pathsReadSetOut, false);
     }
 
-    if (optCell) {
-        auto translatedCell =
-            TranslatedCell{optCell->arrayInfo, *parent, optCell->tags, optCell->vals};
-
+    if (splitCellView) {
+        auto translatedCell = translateCell(*parent, *splitCellView);
         addCellToObject(translatedCell, *outObj);
     }
 }
@@ -311,17 +353,10 @@ PlanState ColumnScanStage::getNext() {
 
     checkForInterrupt(_opCtx);
 
-    if (_firstGetNext) {
-        for (size_t i = 0; i < _columnCursors.size(); ++i) {
-            _columnCursors[i].next();
-        }
-        _firstGetNext = false;
-    }
-
     // Find minimum record ID of all column cursors.
     _recordId = RecordId();
     for (auto& cursor : _columnCursors) {
-        auto& result = cursor.lastCell;
+        auto& result = cursor.lastCell();
         if (result && (_recordId.isNull() || result->rid < _recordId)) {
             _recordId = result->rid;
         }
@@ -338,38 +373,35 @@ PlanState ColumnScanStage::getNext() {
     StringDataSet parentPathsRead;
     bool useRowStore = false;
     for (size_t i = 0; i < _columnCursors.size(); ++i) {
-        auto& lastCell = _columnCursors[i].lastCell;
+        auto& lastCell = _columnCursors[i].lastCell();
+        const auto& path = _columnCursors[i].path();
 
-        const FakeCell* cellForRid =
-            (lastCell && lastCell->rid == _recordId) ? lastCell.get_ptr() : nullptr;
-        const auto& path = _columnCursors[i].cursor->path();
+        boost::optional<SplitCellView> splitCellView;
+        if (lastCell && lastCell->rid == _recordId) {
+            splitCellView = SplitCellView::parse(lastCell->value);
+        }
 
-        if (cellForRid && (cellForRid->hasSubPaths || cellForRid->hasDuplicateFields)) {
+        if (splitCellView && (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) {
             useRowStore = true;
-        } else if (!useRowStore && _columnCursors[i].includeInOutput) {
-            if (!cellForRid || cellForRid->isSparse) {
+        } else if (!useRowStore && _columnCursors[i].includeInOutput()) {
+            if (!splitCellView || splitCellView->isSparse) {
                 // Must read in the parent information first.
                 readParentsIntoObj(path, &outObj, &parentPathsRead);
             }
 
-            if (cellForRid) {
-                auto translatedCell = TranslatedCell{cellForRid->arrayInfo,
-                                                     _columnCursors[i].path(),
-                                                     cellForRid->tags,
-                                                     cellForRid->vals};
+            if (splitCellView) {
+                auto translatedCell = translateCell(path, *splitCellView);
 
                 addCellToObject(translatedCell, outObj);
             }
         }
 
-        if (cellForRid) {
+        if (splitCellView) {
             _columnCursors[i].next();
         }
     }
 
     if (useRowStore) {
-        std::cout << "Using row store for rid " << _recordId << std::endl;
-
         // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do
         // a next() which should be much cheaper.
         auto record = _rowStoreCursor->seekExact(_recordId);
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index e555d10a114..d00d4641171 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -30,10 +30,10 @@
 #pragma once
 
 #include "mongo/config.h"
-#include "mongo/db/exec/fake_column_cursor.h"
 #include "mongo/db/exec/sbe/expressions/expression.h"
 #include "mongo/db/exec/sbe/stages/collection_helpers.h"
 #include "mongo/db/exec/sbe/stages/stages.h"
+#include "mongo/db/storage/column_store.h"
 
 namespace mongo {
 namespace sbe {
@@ -78,29 +78,72 @@ protected:
         TrialRunTracker* tracker, TrialRunTrackerAttachResultMask childrenAttachResult) override;
 
 private:
-    struct ColumnCursor {
-        std::unique_ptr<FakeCursorForPath> cursor;
-        boost::optional<FakeCell> lastCell;
-        bool includeInOutput = false;
+    class ColumnCursor {
+    public:
+        ColumnCursor(std::unique_ptr<ColumnStore::CursorForPath> curs, bool includeInResult)
+            : _cursor(std::move(curs)), _includeInOutput(includeInResult) {}
 
-        boost::optional<FakeCell>& next() {
+
+        boost::optional<FullCellView>& next() {
             // TODO For some reason the destructor of 'lastCell' is not called
             // on my local asan build unless we explicitly reset it. Maybe
             // the same compiler bug Nikita ran into?
-            lastCell.reset();
-            lastCell = cursor->next();
-            return lastCell;
+            _lastCell.reset();
+            _lastCell = _cursor->next();
+            clearOwned();
+            return _lastCell;
         }
 
-        boost::optional<FakeCell>& seekAtOrPast(RecordId id) {
-            lastCell.reset();
-            lastCell = cursor->seekAtOrPast(id);
-            return lastCell;
+        boost::optional<FullCellView>& seekAtOrPast(RecordId id) {
+            _lastCell.reset();
+            _lastCell = _cursor->seekAtOrPast(id);
+            clearOwned();
+            return _lastCell;
         }
 
         const PathValue& path() const {
-            return cursor->path();
+            return _cursor->path();
+        }
+
+        /*
+         * Copies any data owned by the storage engine into a locally owned buffer.
+         */
+        void makeOwned() {
+            if (_lastCell && _pathOwned.empty() && _cellOwned.empty()) {
+                _pathOwned.insert(
+                    _pathOwned.begin(), _lastCell->path.begin(), _lastCell->path.end());
+                _lastCell->path = StringData(_pathOwned);
+
+                _cellOwned.insert(
+                    _cellOwned.begin(), _lastCell->value.begin(), _lastCell->value.end());
+                _lastCell->value = StringData(_cellOwned.data(), _cellOwned.size());
+            }
+        }
+        ColumnStore::CursorForPath& cursor() {
+            return *_cursor;
+        }
+        bool includeInOutput() const {
+            return _includeInOutput;
+        }
+        boost::optional<FullCellView>& lastCell() {
+            return _lastCell;
         }
+
+    private:
+        void clearOwned() {
+            _pathOwned.clear();
+            _cellOwned.clear();
+        }
+
+        std::unique_ptr<ColumnStore::CursorForPath> _cursor;
+        bool _includeInOutput = false;
+
+        boost::optional<FullCellView> _lastCell;
+
+        // These members are used to store owned copies of the path and the cell data when preparing
+        // for yield.
+        std::string _pathOwned;
+        std::vector<char> _cellOwned;
     };
 
     void readParentsIntoObj(StringData path,
@@ -140,15 +183,15 @@ private:
 
     CollectionPtr _coll;
 
+    std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry;
     std::unique_ptr<SeekableRecordCursor> _rowStoreCursor;
 
     std::vector<ColumnCursor> _columnCursors;
-    StringMap<std::unique_ptr<FakeCursorForPath>> _parentPathCursors;
+    StringMap<std::unique_ptr<ColumnStore::CursorForPath>> _parentPathCursors;
 
     RecordId _recordId;
 
     bool _open{false};
-    bool _firstGetNext{false};
 
     // If provided, used during a trial run to accumulate certain execution stats. Once the trial
     // run is complete, this pointer is reset to nullptr.
diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp
index 5292ba54bea..7490d549803 100644
--- a/src/mongo/db/exec/sbe/values/columnar.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar.cpp
@@ -97,9 +97,10 @@ private:
  * Tracks state necessary for reconstructing objects including the array info reader, the values
  * available for extraction, and the path.
  */
+template <class C>
 class AddToDocumentState {
 public:
-    AddToDocumentState(TranslatedCell& translatedCell, ArrInfoReader reader)
+    AddToDocumentState(C& translatedCell, ArrInfoReader reader)
         : cell(translatedCell), arrInfoReader(std::move(reader)) {}
 
     /*
@@ -146,7 +147,7 @@ public:
         return offsetInPath == cell.path.size();
     }
 
-    TranslatedCell& cell;
+    C& cell;
     ArrInfoReader arrInfoReader;
 
 private:
@@ -218,9 +219,10 @@ value::Array* findOrAddArrInArr(size_t idx, sbe::value::Array* arr) {
  * Adds the given tag,val SBE value to the 'out' object, assuming that there are no arrays along
  * the remaining path stored by 'state'.
  */
+template <class C>
 void addToObjectNoArrays(value::TypeTags tag,
                          value::Value val,
-                         AddToDocumentState& state,
+                         AddToDocumentState<C>& state,
                          value::Object& out,
                          size_t idx) {
     state.withNextPathComponent([&](StringData nextPathComponent) {
@@ -235,8 +237,11 @@ void addToObjectNoArrays(value::TypeTags tag,
     });
 }
 
-void addToObject(value::Object& obj, AddToDocumentState& state);
-void addToArray(value::Array& arr, AddToDocumentState& state) {
+template <class C>
+void addToObject(value::Object& obj, AddToDocumentState<C>& state);
+
+template <class C>
+void addToArray(value::Array& arr, AddToDocumentState<C>& state) {
     size_t index = state.arrInfoReader.takeNumber();
     auto ensureArraySize = [&]() {
         while (arr.size() <= index) {
@@ -310,7 +315,8 @@ void addToArray(value::Array& arr, AddToDocumentState& state) {
     }
 }
 
-void addToObject(value::Object& obj, AddToDocumentState& state) {
+template <class C>
+void addToObject(value::Object& obj, AddToDocumentState<C>& state) {
     state.withNextPathComponent([&](StringData field) {
         switch (state.arrInfoReader.takeNextChar()) {
             case '{': {
@@ -332,7 +338,8 @@ void addToObject(value::Object& obj, AddToDocumentState& state) {
     });
 }
 
-void addEmptyObjectIfNotPresent(AddToDocumentState& state, value::Object& out) {
+template <class C>
+void addEmptyObjectIfNotPresent(AddToDocumentState<C>& state, value::Object& out) {
     // Add an object to the path.
     state.withNextPathComponent([&](StringData nextPathComponent) {
         auto* innerObj = findOrAddObjInObj(nextPathComponent, &out);
@@ -342,8 +349,10 @@ void addEmptyObjectIfNotPresent(AddToDocumentState& state, value::Object& out) {
     });
 }
 }  // namespace
-void addCellToObject(TranslatedCell& cell, value::Object& out) {
-    AddToDocumentState state{cell, ArrInfoReader{cell.arrInfo}};
+
+template <class C>
+void addCellToObject(C& cell, value::Object& out) {
+    AddToDocumentState<C> state{cell, ArrInfoReader{cell.arrInfo}};
 
     if (cell.arrInfo.empty()) {
         if (cell.moreValues()) {
@@ -361,4 +370,7 @@ void addCellToObject(TranslatedCell& cell, value::Object& out) {
     addToObject(out, state);
     invariant(!state.arrInfoReader.moreExplicitComponents());
 }
+
+template void addCellToObject<TranslatedCell>(TranslatedCell& cell, value::Object& out);
+template void addCellToObject<MockTranslatedCell>(MockTranslatedCell& cell, value::Object& out);
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/columnar.h b/src/mongo/db/exec/sbe/values/columnar.h
index a6f8e4f651f..ec0fb5f07c8 100644
--- a/src/mongo/db/exec/sbe/values/columnar.h
+++ b/src/mongo/db/exec/sbe/values/columnar.h
@@ -31,6 +31,7 @@
 
 #include "mongo/config.h"
 #include "mongo/db/exec/sbe/expressions/expression.h"
+#include "mongo/db/exec/sbe/values/column_store_encoder.h"
 #include "mongo/db/storage/column_store.h"
 
 /**
@@ -43,7 +44,24 @@ namespace mongo::sbe {
  */
 struct TranslatedCell {
     StringData arrInfo;
+    PathView path;
+
+    SplitCellView::Cursor<value::ColumnStoreEncoder> cursor;
+
+    std::pair<value::TypeTags, value::Value> nextValue() {
+        auto next = cursor.nextValue();
+        invariant(next);
+        return *next;
+    }
 
+    bool moreValues() const {
+        return cursor.hasNext();
+    }
+};
+
+// For testing only.
+struct MockTranslatedCell {
+    StringData arrInfo;
     PathView path;
 
     std::vector<value::TypeTags> types;
@@ -64,10 +82,10 @@ struct TranslatedCell {
     }
 };
 
-
 /*
  * Adds translated cell to an object. This must not be called on an object
  * which has a structure that is incompatible with the structure described in the cell.
  */
-void addCellToObject(TranslatedCell& cell, value::Object& out);
+template <class T>
+void addCellToObject(T& cell, value::Object& out);
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/columnar_test.cpp b/src/mongo/db/exec/sbe/values/columnar_test.cpp
index 6870bc382f4..9dc9e7717d0 100644
--- a/src/mongo/db/exec/sbe/values/columnar_test.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar_test.cpp
@@ -38,13 +38,13 @@
 #include "mongo/db/storage/column_store.h"
 
 namespace mongo::sbe {
-void makeObjFromColumns(std::vector<TranslatedCell>& cells, value::Object& out) {
+void makeObjFromColumns(std::vector<MockTranslatedCell>& cells, value::Object& out) {
     for (auto& cell : cells) {
         addCellToObject(cell, out);
     }
 }
 
-void compareMakeObjWithExpected(std::vector<TranslatedCell>& cells, const BSONObj& expected) {
+void compareMakeObjWithExpected(std::vector<MockTranslatedCell>& cells, const BSONObj& expected) {
     auto [expectedTag, expectedVal] = stage_builder::makeValue(expected);
     value::ValueGuard expectedGuard{expectedTag, expectedVal};
 
@@ -56,72 +56,73 @@ void compareMakeObjWithExpected(std::vector<TranslatedCell>& cells, const BSONOb
                                             expectedVal);
 }
 
-TranslatedCell makeCellOfIntegers(StringData path,
-                                  StringData arrInfo,
-                                  std::vector<unsigned int> vals) {
-    return TranslatedCell{arrInfo,
-                          path,
-                          std::vector<value::TypeTags>(vals.size(), value::TypeTags::NumberInt32),
-                          std::vector<value::Value>(vals.begin(), vals.end())};
+MockTranslatedCell makeCellOfIntegers(StringData path,
+                                      StringData arrInfo,
+                                      std::vector<unsigned int> vals) {
+    return MockTranslatedCell{
+        arrInfo,
+        path,
+        std::vector<value::TypeTags>(vals.size(), value::TypeTags::NumberInt32),
+        std::vector<value::Value>(vals.begin(), vals.end())};
 }
 
 TEST(ColumnarObjTest, MakeObjNoArrTest) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "", {32}),
-                                      makeCellOfIntegers("a.d", "", {36})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "", {32}),
+                                          makeCellOfIntegers("a.d", "", {36})};
     compareMakeObjWithExpected(cells, fromjson("{a: {b: 32, d: 36}}"));
 }
 
 TEST(ColumnarObjTest, MakeObjArrOfScalarsTest) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[", {32, 33, 34, 35, 36})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[", {32, 33, 34, 35, 36})};
     compareMakeObjWithExpected(cells, fromjson("{a: [32, 33, 34, 35, 36]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjSingletonArrayOfObj) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {32})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {32})};
     compareMakeObjWithExpected(cells, fromjson("{a: [{b:32}]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjArrOfObjectsTest) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3})};
 
     compareMakeObjWithExpected(
         cells, BSON("a" << BSON_ARRAY(BSON("b" << 1) << BSON("b" << 2) << BSON("b" << 3))));
 }
 
 TEST(ColumnarObjTest, MakeObjBasicArrOfObjectsWithMultipleFields) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3}),
-                                      makeCellOfIntegers("a.c", "[", {101, 102, 103})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3}),
+                                          makeCellOfIntegers("a.c", "[", {101, 102, 103})};
     compareMakeObjWithExpected(cells,
                                fromjson("{a: [{b:1, c:101}, {b:2, c: 102}, {b:3, c: 103}]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjComplexLeafArray) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {}),
-                                      makeCellOfIntegers("a.b", "", {}),
-                                      makeCellOfIntegers("a.b.c", "{{[[|1][|]", {1, 2, 3, 4})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "", {}),
+                                          makeCellOfIntegers("a.b", "", {}),
+                                          makeCellOfIntegers("a.b.c", "{{[[|1][|]", {1, 2, 3, 4})};
     compareMakeObjWithExpected(cells, fromjson("{a:{b:{c:[[1,2],[3],4]}}}"));
 }
 
 TEST(ColumnarObjTest, MakeObjArrayOfArrays) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[[|1][[|]", {1, 2, 3, 4})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[[|1][[|]", {1, 2, 3, 4})};
     compareMakeObjWithExpected(cells, fromjson("{a: [[1,2], [[3], 4]]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjArrayOfMixed) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[|+2", {1, 4}),
-                                      makeCellOfIntegers("a.b", "[1", {2}),
-                                      makeCellOfIntegers("a.c", "[2", {3})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[|+2", {1, 4}),
+                                          makeCellOfIntegers("a.b", "[1", {2}),
+                                          makeCellOfIntegers("a.c", "[2", {3})};
     compareMakeObjWithExpected(cells, fromjson("{a: [1, {b:2}, {c: 3}, 4]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjArrayOfMixed2) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[|o", {1, 3}),
-                                      makeCellOfIntegers("a.b", "[1{[", {2})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[|o", {1, 3}),
+                                          makeCellOfIntegers("a.b", "[1{[", {2})};
     compareMakeObjWithExpected(cells, fromjson("{a:[1,{b:[2]},3]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjTopLevelArrayOfMixed) {
-    std::vector<TranslatedCell> cells{
+    std::vector<MockTranslatedCell> cells{
         makeCellOfIntegers("a", "[o1|o5", {99}),
         makeCellOfIntegers("a.b", "[{[|3]+2{[[|]]{[", {101, 0, 101, 1, 2, 3, 4, 5, 6}),
         makeCellOfIntegers("a.c", "[1|+1", {0, 1}),
@@ -140,15 +141,16 @@ TEST(ColumnarObjTest, MakeObjTopLevelArrayOfMixed) {
 }
 
 TEST(ColumnarObjTest, MakeObjTopLevelObjWithMixedArrays) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {}),
-                                      makeCellOfIntegers("a.b", "{[[|1][o]", {1, 2, 2}),
-                                      makeCellOfIntegers("a.b.c", "{[1[{[[|1][|]", {1, 2, 99, 2}),
-                                      makeCellOfIntegers("a.x", "", {1})};
+    std::vector<MockTranslatedCell> cells{
+        makeCellOfIntegers("a", "", {}),
+        makeCellOfIntegers("a.b", "{[[|1][o]", {1, 2, 2}),
+        makeCellOfIntegers("a.b.c", "{[1[{[[|1][|]", {1, 2, 99, 2}),
+        makeCellOfIntegers("a.x", "", {1})};
     compareMakeObjWithExpected(cells, fromjson("{a:{b:[[1,2],[{c:[[1,2],[99],2]}],2], x:1}}"));
 }
 
 TEST(ColumnarObjTest, MakeObjTopLevelArrayWithSubArrays) {
-    std::vector<TranslatedCell> cells{
+    std::vector<MockTranslatedCell> cells{
         makeCellOfIntegers("a", "[[|1][o]", {1, 2, 2}),
         makeCellOfIntegers("a.b", "[1[{[[|1][o]", {1, 2, 2}),
         makeCellOfIntegers("a.b.c", "[1[{[1[{[[|1][|]", {1, 2, 3, 4})};
@@ -157,46 +159,46 @@ TEST(ColumnarObjTest, MakeObjTopLevelArrayWithSubArrays) {
 }
 
 TEST(ColumnarObjTest, MakeTopLevelArrayOfObjsSparse) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[o1", {}),
-                                      makeCellOfIntegers("a.b", "[o", {1}),
-                                      makeCellOfIntegers("a.b.c", "[", {1})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[o1", {}),
+                                          makeCellOfIntegers("a.b", "[o", {1}),
+                                          makeCellOfIntegers("a.b.c", "[", {1})};
     compareMakeObjWithExpected(cells, fromjson("{a:[{b:{c:1}},{b:1}]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjEmptyTopLevelField) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("", "[", {1, 2, 3})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("", "[", {1, 2, 3})};
     compareMakeObjWithExpected(cells, fromjson("{'': [1,2,3]}"));
 }
 
 TEST(ColumnarObjTest, MakeObjEmptyFieldWhichIsObject) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("..a", "{{[", {1, 2, 3})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("..a", "{{[", {1, 2, 3})};
     compareMakeObjWithExpected(cells, fromjson("{'': {'': {a: [1,2,3]}}}"));
 }
 
 TEST(ColumnarObjTest, MakeObjEmptyFieldWhichIsLeaf) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.", "{[", {1, 2, 3})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.", "{[", {1, 2, 3})};
     compareMakeObjWithExpected(cells, fromjson("{a: {'': [1,2,3]}}"));
 }
 
 TEST(ColumnarObjTest, AddTopLevelNonLeafCellWithoutArrayInfoToObject) {
     // Cell with no array info or values indicates an object.
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "", {})};
     compareMakeObjWithExpected(cells, fromjson("{a: {}}"));
 }
 
 TEST(ColumnarObjTest, AddNonLeafCellWithoutArrayInfoToObject) {
     // Cell with no array info or values indicates an object.
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "", {})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "", {})};
     compareMakeObjWithExpected(cells, fromjson("{a: {b: {}}}"));
 }
 
 TEST(ColumnarObjTest, AddTopLevelNonLeafCellWithArrayInfoToObject) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[o1", {})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[o1", {})};
     compareMakeObjWithExpected(cells, fromjson("{a: [{}, {}]}"));
 }
 
 TEST(ColumnarObjTest, AddNonLeafCellWithArrayInfoToObject) {
-    std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})};
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})};
     compareMakeObjWithExpected(cells, fromjson("{a: {b: [{}, {}]}}"));
 }
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp
index ce0e5e7225b..015886f4e18 100644
--- a/src/mongo/db/index/columns_access_method.cpp
+++ b/src/mongo/db/index/columns_access_method.cpp
@@ -151,12 +151,20 @@ Status ColumnStoreAccessMethod::BulkBuilder::commit(OperationContext* opCtx,
                                                     const RecordIdHandlerFn& onDuplicateRecord) {
     static constexpr size_t kBufferBlockSize = 1024;
     SharedBufferFragmentBuilder pooledBufferBuilder(kBufferBlockSize);
-    return _columnsAccess->insert(opCtx,
-                                  pooledBufferBuilder,
-                                  collection,
-                                  _deferredInserts,
-                                  InsertDeleteOptions{},
-                                  &_keysInserted);
+
+    WriteUnitOfWork wunit(opCtx);
+    auto status = _columnsAccess->insert(opCtx,
+                                         pooledBufferBuilder,
+                                         collection,
+                                         _deferredInserts,
+                                         InsertDeleteOptions{},
+                                         &_keysInserted);
+    if (!status.isOK()) {
+        return status;
+    }
+
+    wunit.commit();
+    return Status::OK();
 }
 
 void ColumnStoreAccessMethod::insertOne(OperationContext* opCtx,
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 9ca9ce66f56..fa810499cb4 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -218,9 +218,16 @@ void tryToAddColumnScan(const QueryPlannerParams& params,
     }
 
     // TODO SERVER-63123: Check if the columnar index actually provides the fields we need.
-    auto [filterSplitByColumn, residualPredicate] =
-        expression::splitMatchExpressionForColumns(query.root());
-    auto canPushFilters = filterSplitByColumn.size() > 0;
+    std::unique_ptr<MatchExpression> residualPredicate;
+    StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn;
+    if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) {
+        std::tie(filterSplitByColumn, residualPredicate) =
+            expression::splitMatchExpressionForColumns(query.root());
+    } else {
+        residualPredicate = query.root()->shallowClone();
+    }
+    const bool canPushFilters = filterSplitByColumn.size() > 0;
+
     auto columnScan = std::make_unique<ColumnIndexScanNode>(params.columnarIndexes.front(),
                                                             std::move(outputDeps.fields),
                                                             std::move(filterDeps.fields),
@@ -327,6 +334,9 @@ string optionString(size_t options) {
             case QueryPlannerParams::RETURN_OWNED_DATA:
                 ss << "RETURN_OWNED_DATA ";
                 break;
+            case QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS:
+                ss << "GENERATE_PER_COLUMN_FILTERS ";
+                break;
             case QueryPlannerParams::DEFAULT:
                 MONGO_UNREACHABLE;
                 break;
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 679f6374383..7698a24a08e 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -67,8 +67,10 @@ protected:
             kInternalQueryMaxNumberOfFieldsToChooseFilteredColumnScanDefault);
     }
 
-    void addColumnarIndex() {
+    void addColumnarIndexAndEnableFilterSplitting() {
         params.columnarIndexes.emplace_back(kIndexName);
+
+        params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS;
     }
 
     std::vector<std::unique_ptr<InnerPipelineStageInterface>> makeInnerPipelineStages(
@@ -82,7 +84,7 @@ protected:
 };
 
 TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "_id" << 0));
 
@@ -99,7 +101,7 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), fromjson(R"({
                          a: 1,
@@ -124,7 +126,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1));
 
@@ -144,7 +146,7 @@ TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields
 }
 
 TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAndBlockingSort) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSONObj(), BSON("a" << 1), BSON("a" << 1 << "_id" << 0));
 
@@ -164,7 +166,7 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAnd
 }
 
 TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSONObj(), BSON("b" << 1), BSON("a" << 1 << "_id" << 0));
 
@@ -184,7 +186,7 @@ TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan)
 }
 
 TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 0 << "_id" << 0));
     assertNumSolutions(1U);
@@ -192,7 +194,7 @@ TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) {
 }
 
 TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << 1), BSONObj(), BSONObj());
     assertNumSolutions(1U);
@@ -200,7 +202,7 @@ TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1));
@@ -209,7 +211,7 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIn
 }
 
 TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     // This will need 3 fields for the $concat, so should not be able to use a column scan.
@@ -221,7 +223,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUse
 
 // Test with a number of fields equal to the limit.
 TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     runQuerySortProj(
@@ -231,7 +233,7 @@ TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     // Without the '_id' this should be eligible.
@@ -242,7 +244,7 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIn
 }
 
 TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << BSON("c" << 1 << "d" << 1)));
@@ -252,7 +254,7 @@ TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnar
 
 TEST_F(QueryPlannerColumnarTest,
        ProjectionWithTooManyFieldsDoesNotUseColumnarIndexUnsupportedPredicate) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
     runQuerySortProj(BSON("unsupported" << BSON("$exists" << false)),
@@ -263,7 +265,7 @@ TEST_F(QueryPlannerColumnarTest,
 }
 
 TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
     addIndex(BSON("a" << 1));
 
     runQuerySortProj(BSON("a" << 5), BSONObj(), BSON("a" << 1 << "_id" << 0));
@@ -273,7 +275,7 @@ TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) {
 }
 
 TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembly) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << BSONNULL), BSONObj(), BSON("a" << 1 << "_id" << 0));
     assertNumSolutions(1U);
@@ -293,7 +295,7 @@ TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembl
 }
 
 TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "_id" << 0));
     assertNumSolutions(1U);
@@ -313,7 +315,7 @@ TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) {
 
 TEST_F(QueryPlannerColumnarTest,
        TooManyProjectedFieldsDisqualifiesColumnScanEvenWithEligiblePredicates) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2);
     runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1));
@@ -322,7 +324,7 @@ TEST_F(QueryPlannerColumnarTest,
 }
 
 TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2);
     runQuerySortProj(BSON("a" << 2 << "b" << 3 << "c" << 4),
@@ -333,7 +335,7 @@ TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) {
 }
 
 TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColumnScan) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     // The projection only needs 1 field, but the match references '$$ROOT' so needs the whole
     // document.
@@ -345,7 +347,7 @@ TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColum
     assertSolutionExists(R"({proj: {spec: {b: 1, _id: 0}, node: {cscan: {dir: 1}}}})");
 }
 TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualifiesColumnScan) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     // Neither the match nor the project mentions 4 fields, but together they exceed the threshhold.
     internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(4);
@@ -357,7 +359,7 @@ TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualif
 }
 
 TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     // If there are 3 fields referenced in the match and 3 in the projection, but they overlap, we
     // should be OK to use column scan.
@@ -380,7 +382,7 @@ TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) {
     })");
 }
 TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     auto complexPredicate = fromjson(R"({
         a: {$gte: 0, $lt: 10},
@@ -410,7 +412,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     // Same predicate as above, except with exists: false, which disqualifies the whole thing.
     auto complexPredicate = fromjson(R"({
@@ -445,7 +447,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) {
 }
 
 TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0));
     assertNumSolutions(1U);
@@ -458,7 +460,7 @@ TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) {
 }
 
 TEST_F(QueryPlannerColumnarTest, GroupTest) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx);
 
@@ -493,7 +495,7 @@ TEST_F(QueryPlannerColumnarTest, GroupTest) {
 }
 
 TEST_F(QueryPlannerColumnarTest, MatchGroupTest) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx);
 
@@ -524,7 +526,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupTest) {
 }
 
 TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
 
     auto pipeline = Pipeline::parse(
         {fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}, name: {$first: '$name'}}}")}, expCtx);
@@ -557,7 +559,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
     params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
     params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1);
 
@@ -587,7 +589,7 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) {
 }
 
 TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) {
-    addColumnarIndex();
+    addColumnarIndexAndEnableFilterSplitting();
     params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
     params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1);
 
@@ -613,4 +615,28 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) {
         }
     })");
 }
+
+TEST_F(QueryPlannerColumnarTest, FullPredicateOption) {
+    params.columnarIndexes.emplace_back(kIndexName);
+
+    // Filter that could be pushed down, but isn't due to the lack of the
+    // GENERATE_PER_COLUMN_FILTER flag.
+    auto predicate = fromjson(R"({
+        specialAddress: {$exists: true},
+        doNotContact: {$exists: true}
+    })");
+    runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
+    assertSolutionExists(R"({
+        proj: {
+            spec: {a: 1, _id: 0},
+            node: {
+                column_scan: {
+                    outputFields: ['a'],
+                    postAssemblyFilter: {
+                        specialAddress: {$exists: true},
+                        doNotContact: {$exists: true}
+                    },
+                    matchFields:
+                        ['specialAddress', 'doNotContact']}}}})");
+}
 }  // namespace mongo
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 41d096d65d0..af643a632c8 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -139,6 +139,11 @@ struct QueryPlannerParams {
         // Ensure that any plan generated returns data that is "owned." That is, all BSONObjs are
         // in an "owned" state and are not pointing to data that belongs to the storage engine.
         RETURN_OWNED_DATA = 1 << 12,
+
+        // When generating column scan queries, splits match expressions so that the filters can be
+        // applied per-column. This is off by default, since the execution side doesn't support it
+        // yet.
+        GENERATE_PER_COLUMN_FILTERS = 1 << 13,
     };
 
     // See Options enum above.
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 5ec6d124c29..f1e1e3d6e74 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -870,6 +870,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
             "'postAssemblyFilter' to be used instead.",
             !csn->filter);
 
+    tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty());
+
     PlanStageSlots outputs;
 
     auto recordSlot = _slotIdGenerator.generate();
@@ -906,7 +908,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
     slotMap[rootStr] = rowStoreSlot;
     auto abt = builder.generateABT();
     auto exprOut = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone();
-    auto stage = std::make_unique<sbe::ColumnScanStage>(
+    std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(
         getCurrentCollection(reqs)->uuid(),
         csn->indexEntry.catalogName,
         fieldSlotIds,
@@ -919,6 +921,22 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
         _yieldPolicy,
         csn->nodeId());
 
+    // Generate post assembly filter.
+    if (csn->postAssemblyFilter) {
+        auto relevantSlots = sbe::makeSV(recordSlot);
+        if (ridSlot) {
+            relevantSlots.push_back(*ridSlot);
+        }
+        relevantSlots.insert(relevantSlots.end(), fieldSlotIds.begin(), fieldSlotIds.end());
+
+        auto [_, outputStage] = generateFilter(_state,
+                                               csn->postAssemblyFilter.get(),
+                                               {std::move(stage), std::move(relevantSlots)},
+                                               recordSlot,
+                                               csn->nodeId());
+        stage = std::move(outputStage.stage);
+    }
+
     return {std::move(stage), std::move(outputs)};
 }
 
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index 39d3dd384c9..52367496611 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -380,22 +380,29 @@ struct SplitCellView {
     bool hasDoubleNestedArrays = false;
 
     template <class ValueEncoder>
-    auto subcellValuesGenerator(ValueEncoder&& valEncoder) const {
-        struct Cursor {
-            using Out = typename std::remove_reference_t<ValueEncoder>::Out;
-            Out nextValue() {
-                if (elemPtr == end)
-                    return Out();
-
-                invariant(elemPtr < end);
-                return decodeAndAdvance(elemPtr, encoder);
-            }
+    struct Cursor {
+        using Out = typename std::remove_reference_t<ValueEncoder>::Out;
+        Out nextValue() {
+            if (elemPtr == end)
+                return Out();
+
+            invariant(elemPtr < end);
+            return decodeAndAdvance(elemPtr, encoder);
+        }
+        bool hasNext() const {
+            return elemPtr != end;
+        }
 
-            const char* elemPtr;
-            const char* end;
-            ValueEncoder encoder;
-        };
-        return Cursor{firstValuePtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)};
+        const char* elemPtr;
+        const char* end;
+        ValueEncoder encoder;
+    };
+
+
+    template <class ValueEncoder>
+    auto subcellValuesGenerator(ValueEncoder&& valEncoder) const {
+        return Cursor<ValueEncoder>{
+            firstValuePtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)};
     }
 
     static SplitCellView parse(CellView cell) {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
index db5933605ab..9cd9acb83b9 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
@@ -127,7 +127,7 @@ std::string& WiredTigerColumnStore::makeKey(std::string& buffer, PathView path,
         // If we end up reserving more values, the above check should be changed.
         buffer += '\0';
     }
-    rid.withFormat([](RecordId::Null) { /* do nothing */ },
+    rid.withFormat([](RecordId::Null) { /* Do nothing. */ },
                    [&](int64_t num) {
                        num = endian::nativeToBig(num);
                        buffer.append(reinterpret_cast<const char*>(&num), sizeof(num));
@@ -234,6 +234,7 @@ void WiredTigerColumnStore::fullValidate(OperationContext* opCtx,
                                          int64_t* numKeysOut,
                                          IndexValidateResults* fullResults) const {
     // TODO SERVER-65484: Validation for column indexes.
+    // uasserted(ErrorCodes::NotImplemented, "WiredTigerColumnStore::fullValidate()");
     return;
 }
 
@@ -241,14 +242,17 @@ class WiredTigerColumnStore::Cursor final : public ColumnStore::Cursor,
                                             public WiredTigerIndexCursorGeneric {
 public:
     Cursor(OperationContext* opCtx, const WiredTigerColumnStore* idx)
-        : WiredTigerIndexCursorGeneric(opCtx, true /* forward */), _opCtx(opCtx), _idx(*idx) {
+        : WiredTigerIndexCursorGeneric(opCtx, true /* forward */), _idx(*idx) {
         _cursor.emplace(_idx.uri(), _idx._tableId, false, _opCtx);
     }
     boost::optional<FullCellView> next() override {
-        if (_eof)
+        if (_eof) {
             return {};
-        if (!_lastMoveSkippedKey)
+        }
+        if (!_lastMoveSkippedKey) {
             advanceWTCursor();
+        }
+
         return curr();
     }
     boost::optional<FullCellView> seekAtOrPast(PathView path, RecordId rid) override {
@@ -303,7 +307,6 @@ public:
 private:
     void resetCursor() {
         WiredTigerIndexCursorGeneric::resetCursor();
-        _eof = true;
     }
     bool seekWTCursor(bool exactOnly = false) {
         // Ensure an active transaction is open.
@@ -377,8 +380,6 @@ private:
     // false by any operation that moves the cursor, other than subsequent save/restore pairs.
     bool _lastMoveSkippedKey = false;
 
-    OperationContext* _opCtx;
-    boost::optional<WiredTigerCursor> _cursor;
     const WiredTigerColumnStore& _idx;  // not owned
 };
author	Ian Boros <ian.boros@mongodb.com>	2022-05-03 19:18:43 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-05-11 23:03:48 +0000
commit	83ae5393284912aded0087fd2cdf79edff6f999f (patch)
tree	f5b57ad8688a00fccedfb77883bcbf5b0aeeb100 /src
parent	51bff7a1d0145afebb57a04e82bd962985801f06 (diff)
download	mongo-83ae5393284912aded0087fd2cdf79edff6f999f.tar.gz