11 files changed, 1353 insertions, 478 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js
new file mode 100644
index 00000000000..27154426324
--- /dev/null
+++ b/jstests/core/columnstore_index_per_path_filters.js
@@ -0,0 +1,587 @@
+/**
+ * Testing of just the query layer's integration for columnar index when filters are used that
+ * might be pushed down into the column scan stage.
+ *
+ * @tags: [
+ *   # columnstore indexes are new in 6.1.
+ *   requires_fcv_61,
+ *   # Runs explain on an aggregate command which is only compatible with readConcern local.
+ *   assumes_read_concern_unchanged,
+ *   # TODO SERVER-66925 We could potentially need to resume an index build in the event of a
+ *   # stepdown, which is not yet implemented.
+ *   does_not_support_stepdowns,
+ *   uses_column_store_index,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js");  // For "resultsEq."
+load("jstests/libs/analyze_plan.js");         // For "planHasStage."
+load("jstests/libs/sbe_explain_helpers.js");  // For getSbePlanStages.
+load("jstests/libs/sbe_util.js");             // For "checkSBEEnabled.""
+
+const columnstoreEnabled =
+    checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
+if (!columnstoreEnabled) {
+    jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled.");
+    return;
+}
+
+const coll_filters = db.columnstore_index_per_path_filters;
+function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) {
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but
+    // use small datasets to make the test failures more readable.
+    const explain = coll_filters.find(query, projection).explain();
+    const errMsg = " **TEST** " + testDescription + tojson(explain);
+    const actual = coll_filters.find(query, projection).toArray();
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+}
+// Sanity check that without filters, the cursors on other columns are advanced correctly.
+(function testPerPathFilters_SingleColumn_NoFilters() {
+    const docs = [
+        {n: 0, x: 42},
+        {n: 1, y: 42},
+        {n: 2, x: 42},
+        {n: 3},
+        {n: 4, x: 42, y: 42},
+        {n: 5, x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {},
+        projection: {a: 1, _id: 0, n: 1, x: 1, y: 1},
+        expected: docs,
+        testDescription: "SingleColumn_NoFilters"
+    });
+})();
+
+// Checks matching on top-level path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumn_TopLevelField_Match() {
+    const docs = [
+        {n: 0, x: 42},                              // vals: 42, arrInfo: <empty>
+        {n: 1, x: [0, 42]},                         // vals: [0,42], arrInfo: "["
+        {n: 2, x: [[0, 0, 0], 0, 42]},              // vals: [0,0,0,0,42], arrInfo: "[[|2]"
+        {n: 3, x: [{y: 0}, {y: 0}, 42]},            // vals: [42], arrInfo: "[o1"
+        {n: 4, x: [0, 0, {y: 0}, 42]},              // vals: [0,0,42], arrInfo: "[|1o"
+        {n: 5, x: [[0, 0], {y: 0}, [{y: 0}], 42]},  // vals: [0,0,42], arrInfo: "[[|1]o[o]"
+    ];
+    // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a
+    // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns
+    // among all columns involved in the query.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {a: 1, _id: 0, n: 1},
+        expected: [{n: 0}, {n: 1}, {n: 2}, {n: 3}, {n: 4}, {n: 5}],
+        testDescription: "SingleColumn_TopLevelField_Match"
+    });
+})();
+
+(function testPerPathFilters_SingleColumn_TopLevelField_NoMatch() {
+    const docs = [
+        {n: 0, x: {a: 0}},              // vals: <empty>, arrInfo: <empty>
+        {n: 1, x: [[42, 0]]},           // vals: [42, 0], arrInfo: "[["
+        {n: 2, x: [[42, 0, 0], 0, 0]},  // vals: [42,0,0,0,0], arrInfo: "[[|2]"
+        {n: 3, x: [{y: 0}, [42, 0]]},   // vals: [42,0], arrInfo: "[o["
+        {n: 4, x: [[42], {y: 0}]},      // vals: [42], arrInfo: "[[|]o"
+        {n: 5, x: [[42, 42], [42]]},    // vals: [42,42,42], arrInfo: "[[|1]["
+        {
+            n: 6,
+            x: [0, [42, [42, [42], 42], 42], [42]]
+        },  // vals: [0,42,42,42,42,42], arrInfo: "[|[|[|[|]|]|]["
+    ];
+    // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a
+    // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns
+    // among all columns involved in the query.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {a: 1, _id: 0, n: 1},
+        expected: [],
+        testDescription: "SingleColumn_TopLevelField_NoMatch"
+    });
+})();
+
+// Checks matching on sub-path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumn_SubField_Match() {
+    const docs = [
+        {_id: 0, x: {y: {z: 42}}},                           // vals: 42, arrInfo: <empty>
+        {_id: 1, x: {y: {z: [0, 42]}}},                      // vals: [0,42], arrInfo: "{{["
+        {_id: 2, x: {y: {z: [[0, 0], 42]}}},                 // vals: [0,0,42], arrInfo: "{{[[|1]"
+        {_id: 3, x: {y: [{z: 0}, {z: 42}]}},                 // vals: [0,42], arrInfo: "{["
+        {_id: 4, x: {y: [{z: {a: 0}}, {z: [[0, 0], 42]}]}},  // vals: [0,0,42], arrInfo: "{[o{[[|1]"
+        {_id: 5, x: {y: [{a: 0}, {z: 0}, {a: 0}, {z: 42}]}},  // vals: [0,42], arrInfo: "{[1|+1"
+        {
+            _id: 6,
+            x: [{y: {z: 0}}, {y: {z: [[0, 0]]}}, {y: {z: 42}}]
+        },  // vals: [0,0,0,42], arrInfo: "[|{{[[|1]]"
+        {
+            _id: 7,
+            x: [{y: {z: {a: 0}}}, {y: 0}, {y: {z: [0, 42]}}]
+        },  // vals: [0,42], arrInfo: "[o+1{{["
+        {
+            _id: 8,
+            x: [{y: [{z: {a: 0}}, {z: [0]}, {a: 0}, {z: [0, 42]}]}]
+        },                                                // vals: [0,0,42], arrInfo: "[{[o{[|]+1{["
+        {_id: 9, x: [{y: [[{z: 0}], {z: 42}]}]},          // vals: [0,42], arrInfo: "[{[[|]"
+        {_id: 10, x: [[{y: [{z: 0}]}], {y: {z: [42]}}]},  // vals: [0,42], arrInfo: "[[{[|]]{{["
+        {_id: 11, x: [{y: {z: [0]}}, {y: {z: [42]}}]},    // vals: [0,42], arrInfo: "[{{[|]{{["
+    ];
+    let expected = [];
+    for (let i = 0; i < docs.length; i++) {
+        expected.push({_id: i});
+    }
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y.z": 42},
+        projection: {_id: 1},
+        expected: expected,
+        testDescription: "SingleColumn_SubField_Match"
+    });
+})();
+
+// Checks matching on sub-path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumn_SubField_NoMatch() {
+    const docs = [
+        {_id: 0, x: {y: {z: {a: 0}}}},               // vals: <empty>, arrInfo: <empty>
+        {_id: 1, x: {y: {z: [0, [42]]}}},            // vals: [0,42], arrInfo: "{{[|["
+        {_id: 2, x: {y: [{z: 0}, [{z: 42}]]}},       // vals: [0,42], arrInfo: "{[|["
+        {_id: 3, x: [{y: {z: 0}}, [{y: {z: 42}}]]},  // vals: [42], arrInfo: "[1["
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y.z": 42},
+        projection: {_id: 1},
+        expected: [],
+        testDescription: "SingleColumn_SubField_NoMatch"
+    });
+})();
+
+// Check matching of empty arrays which are treated as values.
+// NB: expressions for comparing to whole non-empty arrays aren't split into per-path filters.
+(function testPerPathFilters_EmptyArray() {
+    const docs = [
+        {_id: 0, x: {y: []}},
+        {_id: 1, x: {y: [0, []]}},
+        {_id: 2, x: [{y: 0}, {y: []}]},
+        {_id: 3, x: {y: [0, [0, []]]}},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y": 42},
+        projection: {_id: 1},
+        expected: [],  //[{_id: 0}, {_id: 1}, {_id: 2}],
+        testDescription: "SingleColumn_EmptyArray"
+    });
+})();
+
+// Check matching of empty objects which are treated as values.
+// NB: expressions for comparing to whole non-empty objects aren't split into per-path filters.
+(function testPerPathFilters_EmptyObject() {
+    const docs = [
+        {_id: 0, x: {y: {}}},
+        {_id: 1, x: {y: [0, {}]}},
+        {_id: 2, x: [{y: 0}, {y: {}}]},
+        {_id: 3, x: {y: [0, [0, {}]]}},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y": {}},
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 1}, {_id: 2}],
+        testDescription: "SingleColumn_EmptyArray"
+    });
+})();
+
+// Check that a single filtered column correctly handles matching, no-matching and missing values
+// when moving the cursor.
+(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() {
+    const docs = [
+        {_id: 0, x: 42},
+        {_id: 1, x: 0},
+        {_id: 2, x: 42},
+        {_id: 3, no_x: 0},
+        {_id: 4, x: 42},
+        {_id: 5, x: 0},
+        {_id: 6, no_x: 0},
+        {_id: 7, x: 42},
+        {_id: 8, no_x: 0},
+        {_id: 9, x: 0},
+        {_id: 10, x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}],
+        testDescription: "SingleColumnMatchNoMatchMissingValues"
+    });
+})();
+
+// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the
+// filtered columns are advanced as described here in the comments, but the test attempts to
+// exercise various match/no-match/missing combinations of values across columns.
+(function testPerPathFilters_TwoColumns() {
+    const docs = [
+        {_id: 0, x: 0, y: 0},        // start by iterating x
+        {_id: 1, x: 42, y: 42},      // seek into y and match! - continue iterating on x
+        {_id: 2, x: 42, no_y: 0},    // seeking into y skips to n:3
+        {_id: 3, x: 42, y: 0},       // iterating on y
+        {_id: 4, x: 42, y: 42},      // seek into x and match! - continue iterating on y
+        {_id: 5, no_x: 0, y: 42},    // seek into x skips to n:6
+        {_id: 6, x: 42, y: 0},       // seek into y but no match - iterate on y
+        {_id: 7, x: 0, y: 42},       // seek into x but no match - iterate on x
+        {_id: 8, no_x: 0, no_y: 0},  // skipped by x
+        {_id: 9, x: 42, y: 42},      // seek into y and match!
+    ];
+    // Adding into the projection specification non-existent fields doesn't change the output but
+    // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: 42},
+        projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1},
+        expected: [{_id: 1}, {_id: 4}, {_id: 9}],
+        testDescription: "TwoColumns"
+    });
+})();
+
+// Check zig-zagging of three filters.
+(function testPerPathFilters_ThreeColumns() {
+    const docs = [
+        {_id: 0, x: 0, y: 42, z: 42},     // x
+        {_id: 1, x: 42, y: 42, z: 0},     // x->y->z
+        {_id: 2, x: 0, y: 42, z: 42},     // z->x
+        {_id: 3, x: 0, y: 42, z: 42},     // x
+        {_id: 4, x: 42, no_y: 0, z: 42},  // x->y
+        {_id: 5, x: 42, y: 0, z: 42},     // y
+        {_id: 6, x: 42, y: 42, z: 42},    // match! ->y
+        {_id: 7, x: 42, y: 42, z: 42},    // match! ->y
+        {_id: 8, no_x: 0, y: 42, z: 42},  // y->z->x
+        {_id: 9, x: 42, y: 42, no_z: 0},  // x
+        {_id: 10, x: 42, y: 42, z: 42},   // match!
+    ];
+    // Adding into the projection specification non-existent fields doesn't change the output but
+    // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: 42, z: 42},
+        projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1},
+        expected: [{_id: 6}, {_id: 7}, {_id: 10}],
+        testDescription: "ThreeColumns"
+    });
+})();
+
+// Check projection of filtered columns.
+(function testPerPathFilters_ProjectFilteredColumn() {
+    const docs = [
+        {_id: 0, x: {y: 42}},
+        {_id: 1, x: {y: 42, z: 0}},
+        {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y": 42},
+        projection: {_id: 1, "x.y": 1},
+        expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}],
+        testDescription: "ProjectFilteredColumn"
+    });
+})();
+
+// Check correctness when have both per-path and residual filters.
+(function testPerPathFilters_PerPathAndResidualFilters() {
+    const docs = [
+        {_id: 0, x: 42, no_y: 0},
+        {_id: 1, x: 42, y: 0},
+        {_id: 2, x: 0, no_y: 0},
+        {_id: 3, x: 0, y: 0},
+        {_id: 4, no_x: 0, no_y: 0},
+        {_id: 5, x: 42, no_y: 0},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: {$exists: false}},  // {$exists: false} causes the residual filter
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 5}],
+        testDescription: "PerPathAndResidualFilters"
+    });
+})();
+
+// Check translation of MQL comparison match expressions.
+(function testPerPathFilters_SupportedMatchExpressions_Comparison() {
+    const docs = [
+        {_id: 0, x: NumberInt(5)},
+        {_id: 1, x: 0},
+        {_id: 2, x: null},
+        {_id: 3, no_x: 0},
+        {_id: 4, x: NumberInt(15)},
+    ];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let expected = [];
+    let actual = [];
+    let errMsg = "";
+
+    // MatchExpression::LT
+    actual = coll_filters.find({x: {$lt: 15}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}];
+    errMsg = "SupportedMatchExpressions: $lt";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::GT
+    actual = coll_filters.find({x: {$gt: 5}}, {_id: 1}).toArray();
+    expected = [{_id: 4}];
+    errMsg = "SupportedMatchExpressions: $gt";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::LTE
+    actual = coll_filters.find({x: {$lte: 15}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}, {_id: 4}];
+    errMsg = "SupportedMatchExpressions: $lte";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::GTE
+    actual = coll_filters.find({x: {$gte: 5}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 4}];
+    errMsg = "SupportedMatchExpressions: $gte";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+})();
+
+// Check translation of MQL bitwise match expressions.
+(function testPerPathFilters_SupportedMatchExpressions_Bitwise() {
+    const docs = [
+        {_id: 0, x: NumberInt(1 + 0 * 2 + 1 * 4 + 0 * 8)},
+        {_id: 1, x: 0},
+        {_id: 2, x: null},
+        {_id: 3, no_x: 0},
+        {_id: 4, x: NumberInt(0 + 1 * 2 + 1 * 4 + 1 * 8)},
+        {_id: 5, x: NumberInt(0 + 1 * 2 + 1 * 4 + 0 * 8)},
+    ];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let expected = [];
+    let actual = [];
+    let errMsg = "";
+
+    // MatchExpression::BITS_ALL_SET
+    actual = coll_filters.find({x: {$bitsAllSet: [1, 2]}}, {_id: 1}).toArray();
+    expected = [{_id: 4}, {_id: 5}];
+    errMsg = "SupportedMatchExpressions: $bitsAllSet";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::BITS_ALL_CLEAR
+    actual = coll_filters.find({x: {$bitsAllClear: [1, 3]}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}];
+    errMsg = "SupportedMatchExpressions: $bitsAllClear";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::BITS_ANY_SET
+    actual = coll_filters.find({x: {$bitsAnySet: [0, 3]}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 4}];
+    errMsg = "SupportedMatchExpressions: $bitsAnySet";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    // MatchExpression::BITS_ANY_CLEAR
+    actual = coll_filters.find({x: {$bitsAnyClear: [2, 3]}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}, {_id: 5}];
+    errMsg = "SupportedMatchExpressions: $bitsAnyClear";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+})();
+
+// Check translation of MQL $exists, $not and $ne match expressions.
+// NB: per SERVER-68743 the queries in this test won't be using per-path filters yet, but eventually
+// they should.
+(function testPerPathFilters_SupportedMatchExpressions_Not() {
+    const docs = [
+        {_id: 0, x: 42},
+        {_id: 1, x: 0},
+        {_id: 2, x: null},
+        {_id: 3, no_x: 0},
+        {_id: 4, x: []},
+        {_id: 5, x: {}},
+    ];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let expected = [];
+    let actual = [];
+    let errMsg = "";
+
+    actual = coll_filters.find({x: {$ne: null}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}, {_id: 4}, {_id: 5}];
+    errMsg = "SupportedMatchExpressions: $ne";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    actual = coll_filters.find({x: {$not: {$eq: null}}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}, {_id: 4}, {_id: 5}];
+    errMsg = "SupportedMatchExpressions: $not + $eq";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    actual = coll_filters.find({x: {$exists: true}}, {_id: 1}).toArray();
+    expected = [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 5}];
+    errMsg = "SupportedMatchExpressions: $exists";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+})();
+
+// Check translation of other MQL match expressions.
+(function testPerPathFilters_SupportedMatchExpressions_Oddballs() {
+    const docs = [
+        {_id: 0, x: NumberInt(7 * 3 + 4)},
+        {_id: 1, x: NumberInt(7 * 9 + 2)},
+        {_id: 2, x: "find me"},
+        {_id: 3, x: "find them"},
+    ];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let expected = [];
+    let actual = [];
+    let errMsg = "";
+
+    actual = coll_filters.find({x: {$mod: [7, 2]}}, {_id: 1}).toArray();
+    expected = [{_id: 1}];
+    errMsg = "SupportedMatchExpressions: $mod";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+
+    actual = coll_filters.find({x: {$regex: /me/}}, {_id: 1}).toArray();
+    expected = [{_id: 2}];
+    errMsg = "SupportedMatchExpressions: $regex";
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+})();
+
+// Check degenerate case with no paths.
+(function testPerPathFilters_NoPathsProjected() {
+    const docs = [
+        {_id: 0, x: 42},
+        {_id: 1, x: 0},
+        {_id: 2, no_x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {_id: 0, a: {$literal: 1}},
+        expected: [{a: 1}],
+        testDescription: "NoPathsProjected"
+    });
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {},
+        projection: {_id: 0, a: {$literal: 1}},
+        expected: [{a: 1}, {a: 1}, {a: 1}],
+        testDescription: "NoPathsProjected (and no filter)"
+    });
+})();
+
+// While using columnar indexes doesn't guarantee a specific field ordering in the result objects,
+// we still try to provide a stable experience to the user, so we output "_id" first and other
+// fields in alphabetically ascending order.
+(function testPerPathFilters_FieldOrder() {
+    const docs = [{z: 42, a: 42, _id: 42, _a: 42}];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+    let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`);
+
+    // Having a filter on a path that is also being projected, should not affect the order.
+    res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+    expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`);
+
+    // Omitting the "_id" field should not affect the order.
+    res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]);
+    expected = '{ "_a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`);
+})();
+
+// Sanity test that per-column filtering is meeting the efficiency expectations.
+(function testPerPathFilters_ExecutionStats() {
+    coll_filters.drop();
+
+    const docsCount = 1000;
+    const expectedToMatchCount = 10;
+    for (let i = 0; i < docsCount; i++) {
+        coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)});
+    }
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length,
+              expectedToMatchCount);
+    const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats");
+
+    const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+    assert.gt(columnScanStages.length, 0, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+
+    if (columnScanStages.length > 1) {
+        // The test is being run in sharded environment and the state per shard would depend on
+        // how the data gets distributed.
+        jsTestLog("Skipping testPerPathFilters_ExecutionStats in sharded environment.");
+        return;
+    }
+
+    const columns = columnScanStages[0].columns;
+    assertArrayEq({
+        actual: columnScanStages[0].paths,
+        expected: ["_id", "x", "y"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
+    assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`);
+
+    const _id = columns._id;
+    const x = columns.x;
+    const y = columns.y;
+
+    assert(_id.usedInOutput, "_id column should be used in output");
+    assert(x.usedInOutput, "x column should be used in output");
+    assert(!y.usedInOutput, "y column should be used only for filtering");
+
+    // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is
+    // equal to the number of documents docsCount (NB: in non-filtered search the number of "next"
+    // calls is close to k*docsCount where k is the number of paths).
+    assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column');
+    assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls');
+
+    // The columns with higher selectivity should be preferred by the zig-zag search for driving the
+    // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as
+    // well), after the first match "y" column completely takes over.
+    assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls');
+
+    // We seek into each column to set up the cursors, after that seeks into _id should only happen
+    // on full match, and seeks into x or y should only happen on partial matches.
+    assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column");
+    assert.lt(x.numSeeks + y.numSeeks,
+              2 * expectedToMatchCount,
+              "Number of seeks in filtered columns should be small");
+})();
+})();
diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js
index e137d714514..1f2a60e8253 100644
--- a/jstests/noPassthroughWithMongod/column_scan_explain.js
+++ b/jstests/noPassthroughWithMongod/column_scan_explain.js
@@ -4,6 +4,7 @@
 (function() {
 "use strict";
 
+load("jstests/aggregation/extras/utils.js");  // For assertArrayEq
 load("jstests/libs/analyze_plan.js");         // For planHasStage.
 load("jstests/libs/sbe_util.js");             // For checkSBEEnabled.
 load("jstests/libs/sbe_explain_helpers.js");  // For getSbePlanStages.
@@ -34,35 +35,54 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "x", "y.a"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 4, 'Number of columns should be 4.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`);
+
+    // We seek into each column once, when setting up the cursors. The dense column is the first to
+    // hit EOF after iterating over all documents so other columns iterate at least one time less.
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
-        "x": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+        "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
         "y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
                   sortDoc(expectedObj),
-                  `Mismatching entry for column ${columnName}`);
+                  `Mismatching entry for column ${tojson(columnName)}`);
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(Object.keys(parentColumns).length, 1, 'Number of parent columns should be 1.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(Object.keys(parentColumns).length,
+              1,
+              `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
     // Expecting 4 lookups on the "y" parent column for the 3 docs which didn't have a "y.a"
-    // value and 1 for an unsuccessful call to seek.
+    // value and 1 for an unsuccessful call to seek. We should not iterate over parent columns.
     assert.eq(sortDoc(parentColumns.y),
               {"numNexts": 0, "numSeeks": 4},
-              'Mismatching entry for parent column \'y\'');
-
-    assert.eq(explain.executionStats.totalKeysExamined, 24, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+              'Mismatching entry for parent column "y"');
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts +
+                  columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks +
+                  parentColumns["y"].numNexts + parentColumns["y"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
 }());
 
 // Test the explain output for a scan on a nonexistent field.
@@ -71,29 +91,42 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "z"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
         "z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true},
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
                   sortDoc(expectedObj),
-                  `Mismatching entry for column ${columnName}`);
+                  `Mismatching entry for column "${columnName}"`);
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(parentColumns, {});
-
-    assert.eq(explain.executionStats.totalKeysExamined, 13, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(parentColumns, {}, "Should not access parent columns");
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts +
+                  columns["z"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
 }());
 
 // Test the explain output for a scan on a 2-level nested field.
@@ -102,14 +135,22 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "y.b.c"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
-        "y.b.c": {"numNexts": 2, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+        "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true},
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
@@ -118,22 +159,57 @@ assert.commandWorked(coll.insertMany(docs));
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(Object.keys(parentColumns).length, 2, 'Number of parent columns should be 2.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(Object.keys(parentColumns).length,
+              2,
+              `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
     // Expecting 3 lookups on the "y" parent column for the 2 docs which didn't have a "y.b"
-    // value and 1 unsuccessful call to seek.
+    // value and 1 unsuccessful call to seek. We should not iterate over parent columns.
     assert.eq(sortDoc(parentColumns.y),
               {"numNexts": 0, "numSeeks": 3},
-              'Mismatching entry for parent column \'y\'');
+              'Mismatching entry for parent column "y"');
     // Expecting 4 lookups on the "y.b" parent column for the 3 docs that didn't have a "y.b.c"
     // value and 1 unsuccessful call to seek.
     assert.eq(sortDoc(parentColumns['y.b']),
               {"numNexts": 0, "numSeeks": 4},
-              'Mismatching entry for parent column \'y.b\'');
+              'Mismatching entry for parent column "y.b"');
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts +
+                  columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts +
+                  parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts +
+                  parentColumns["y"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
+}());
 
-    assert.eq(explain.executionStats.totalKeysExamined, 22, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+// Test fallback to the row store.
+(function testWithFallbackToRowstore() {
+    const coll_rowstore = db.column_scan_explain_rowstore;
+    coll_rowstore.drop();
+    assert.commandWorked(coll_rowstore.createIndex({"$**": "columnstore"}));
+
+    const docs_rowstore = [
+        {_id: 0, x: {y: 42, z: 0}},
+        {_id: 1, x: {y: {z: 0}}},  // fallback
+        {_id: 2, x: [{y: 42}, 0]},
+        {_id: 3, x: [{y: 42}, {z: 0}]},
+        {_id: 4, x: [{y: [42, 43]}, {z: 0}]},
+        {_id: 5, x: [{y: [42, {z: 0}]}, {z: 0}]},  // fallback
+        {_id: 6, x: 42},
+    ];
+    coll_rowstore.insert(docs_rowstore);
+    const explain = coll_rowstore.find({}, {_id: 0, "x.y": 1}).explain("executionStats");
+
+    const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+    assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assert.eq(columnScan.numRowStoreFetches, 2, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs_rowstore.length, 'nReturned: should return all docs');
 }());
 }());
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 91a6f02b43b..addf5fe25e3 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -52,10 +52,12 @@ TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView)
 ColumnScanStage::ColumnScanStage(UUID collectionUuid,
                                  StringData columnIndexName,
                                  std::vector<std::string> paths,
+                                 std::vector<bool> includeInOutput,
                                  boost::optional<value::SlotId> recordIdSlot,
                                  boost::optional<value::SlotId> reconstuctedRecordSlot,
                                  value::SlotId rowStoreSlot,
                                  std::unique_ptr<EExpression> rowStoreExpr,
+                                 std::vector<PathFilter> filteredPaths,
                                  PlanYieldPolicy* yieldPolicy,
                                  PlanNodeId nodeId,
                                  bool participateInTrialRunTracking)
@@ -63,26 +65,39 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid,
       _collUuid(collectionUuid),
       _columnIndexName(columnIndexName),
       _paths(std::move(paths)),
+      _includeInOutput(std::move(includeInOutput)),
       _recordIdSlot(recordIdSlot),
       _reconstructedRecordSlot(reconstuctedRecordSlot),
       _rowStoreSlot(rowStoreSlot),
-      _rowStoreExpr(std::move(rowStoreExpr)) {}
+      _rowStoreExpr(std::move(rowStoreExpr)),
+      _filteredPaths(std::move(filteredPaths)) {
+    invariant(_filteredPaths.size() <= _paths.size(),
+              "Filtered paths should be a subset of all paths");
+    invariant(_paths.size() == _includeInOutput.size());
+}
 
 std::unique_ptr<PlanStage> ColumnScanStage::clone() const {
-    std::vector<std::unique_ptr<EExpression>> pathExprs;
+    std::vector<PathFilter> filteredPaths;
+    for (const auto& fp : _filteredPaths) {
+        filteredPaths.emplace_back(fp.pathIndex, fp.filterExpr->clone(), fp.inputSlotId);
+    }
     return std::make_unique<ColumnScanStage>(_collUuid,
                                              _columnIndexName,
                                              _paths,
+                                             _includeInOutput,
                                              _recordIdSlot,
                                              _reconstructedRecordSlot,
                                              _rowStoreSlot,
                                              _rowStoreExpr ? _rowStoreExpr->clone() : nullptr,
+                                             std::move(filteredPaths),
                                              _yieldPolicy,
                                              _commonStats.nodeId,
                                              _participateInTrialRunTracking);
 }
 
 void ColumnScanStage::prepare(CompileCtx& ctx) {
+    ctx.root = this;
+
     if (_reconstructedRecordSlot) {
         _reconstructedRecordAccessor = std::make_unique<value::OwnedValueAccessor>();
     }
@@ -92,10 +107,19 @@ void ColumnScanStage::prepare(CompileCtx& ctx) {
 
     _rowStoreAccessor = std::make_unique<value::OwnedValueAccessor>();
     if (_rowStoreExpr) {
-        ctx.root = this;
         _rowStoreExprCode = _rowStoreExpr->compile(ctx);
     }
 
+    _filterInputAccessors.resize(_filteredPaths.size());
+    for (size_t idx = 0; idx < _filterInputAccessors.size(); ++idx) {
+        auto slot = _filteredPaths[idx].inputSlotId;
+        auto [it, inserted] = _filterInputAccessorsMap.emplace(slot, &_filterInputAccessors[idx]);
+        uassert(6610212, str::stream() << "duplicate slot: " << slot, inserted);
+    }
+    for (auto& filteredPath : _filteredPaths) {
+        _filterExprsCode.emplace_back(filteredPath.filterExpr->compile(ctx));
+    }
+
     tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll);
     std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid);
 
@@ -120,12 +144,23 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId
     if (_rowStoreSlot == slot) {
         return _rowStoreAccessor.get();
     }
+
+    if (auto it = _filterInputAccessorsMap.find(slot); it != _filterInputAccessorsMap.end()) {
+        return it->second;
+    }
+
     return ctx.getAccessor(slot);
 }
 
 void ColumnScanStage::doSaveState(bool relinquishCursor) {
+    if (_denseColumnCursor) {
+        _denseColumnCursor->makeOwned();
+        _denseColumnCursor->cursor().save();
+    }
+
     for (auto& cursor : _columnCursors) {
         cursor.makeOwned();
+        cursor.cursor().save();
     }
 
     if (_rowStoreCursor && relinquishCursor) {
@@ -136,9 +171,6 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) {
         _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor);
     }
 
-    for (auto& cursor : _columnCursors) {
-        cursor.cursor().save();
-    }
     for (auto& [path, cursor] : _parentPathCursors) {
         cursor->cursor().saveUnpositioned();
     }
@@ -170,6 +202,9 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
         }
     }
 
+    if (_denseColumnCursor) {
+        _denseColumnCursor->cursor().restore();
+    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().restore();
     }
@@ -182,6 +217,9 @@ void ColumnScanStage::doDetachFromOperationContext() {
     if (_rowStoreCursor) {
         _rowStoreCursor->detachFromOperationContext();
     }
+    if (_denseColumnCursor) {
+        _denseColumnCursor->cursor().detachFromOperationContext();
+    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().detachFromOperationContext();
     }
@@ -194,6 +232,9 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) {
     if (_rowStoreCursor) {
         _rowStoreCursor->reattachToOperationContext(opCtx);
     }
+    if (_denseColumnCursor) {
+        _denseColumnCursor->cursor().reattachToOperationContext(opCtx);
+    }
     for (auto& cursor : _columnCursors) {
         cursor.cursor().reattachToOperationContext(opCtx);
     }
@@ -247,25 +288,22 @@ void ColumnScanStage::open(bool reOpen) {
 
         auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod());
 
-        // Eventually we can not include this column for the cases where a known dense column (_id)
-        // is being read anyway.
-
-        // Add a stats struct that will be shared by overall ColumnScanStats and individual
-        // cursor.
-        _columnCursors.emplace_back(
-            iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
-            _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), false));
-
-        for (auto&& path : _paths) {
-            _columnCursors.emplace_back(iam->storage()->newCursor(_opCtx, path),
-                                        _specificStats.cursorStats.emplace_back(path, true));
+        // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377:
+        // eventually we can avoid including this column for the cases where a known dense column
+        // such as _id is being read anyway).
+        if (_filteredPaths.empty()) {
+            _denseColumnCursor = std::make_unique<ColumnCursor>(
+                iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
+                _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(),
+                                                        false /*includeInOutput*/));
+        }
+        for (size_t i = 0; i < _paths.size(); i++) {
+            _columnCursors.emplace_back(
+                iam->storage()->newCursor(_opCtx, _paths[i]),
+                _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i]));
         }
     }
-
-    for (auto& columnCursor : _columnCursors) {
-        columnCursor.seekAtOrPast(RecordId());
-    }
-
+    _recordId = RecordId();
     _open = true;
 }
 
@@ -323,44 +361,284 @@ void ColumnScanStage::readParentsIntoObj(StringData path,
     }
 }
 
-PlanState ColumnScanStage::getNext() {
-    auto optTimer(getOptTimer(_opCtx));
+// The result of the filter predicate will be the same regardless of sparseness or sub objects,
+// therefore we don't look at the parents and don't consult the row store.
+// (TODO SERVER-68792) Refactor the iteration over values into its own type.
+bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) {
+    auto splitCellView = SplitCellView::parse(cell);
+    auto translatedCell = translateCell(path, splitCellView);
+
+    if (!translatedCell.moreValues()) {
+        return false;
+    }
+
+    if (translatedCell.arrInfo.empty()) {
+        // Have a single non-nested value -- evaluate the filter on it.
+        // (TODO SERVER-68792) Could we avoid copying by using a non-owning accessor? Same question
+        // for other locations in this function when the predicate is evaluated immediately after
+        // setting the slot.
+        auto [tag, val] = translatedCell.nextValue();
+        auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val);
+        _filterInputAccessors[filterIndex].reset(true /*owned*/, tagCopy, valCopy);
+        return _bytecode.runPredicate(_filterExprsCode[filterIndex].get());
+    } else {
+        ArrInfoReader arrInfoReader{translatedCell.arrInfo};
+        int depth = 0;
+        // (TODO SERVER-68792) Would using a non-heap allocated structure here improve perf?
+        std::stack<bool> inArray;
+        while (arrInfoReader.moreExplicitComponents()) {
+            switch (arrInfoReader.takeNextChar()) {
+                case '{': {
+                    // We consider as nested only the arrays that are elements of other arrays. When
+                    // there is an array of objects and some of the fields of these objects are
+                    // arrays, the latter aren't nested.
+                    inArray.push(false);
+                    break;
+                }
+                case '[': {
+                    // A '[' can be followed by a number if there are objects in the array, that
+                    // should be retrieved from other paths when reconstructing the record. We can
+                    // ignore them as they don't contribute to the values.
+                    (void)arrInfoReader.takeNumber();
+                    if (!inArray.empty() && inArray.top()) {
+                        depth++;
+                    }
+                    inArray.push(true);
+                    break;
+                }
+                case '+': {
+                    // Indicates elements in arrays that are objects that don't have the path. These
+                    // objects don't contribute to the cell's values, so we can ignore them.
+                    (void)arrInfoReader.takeNumber();
+                    break;
+                }
+                case '|': {
+                    auto repeats = arrInfoReader.takeNumber();
+                    for (size_t i = 0; i < repeats + 1; i++) {
+                        auto [tag, val] = translatedCell.nextValue();
+                        if (depth == 0) {
+                            auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val);
+                            _filterInputAccessors[filterIndex].reset(
+                                true /*owned*/, tagCopy, valCopy);
+                            if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) {
+                                return true;
+                            }
+                        }
+                    }
+                    break;
+                }
+                case 'o': {
+                    // Indicates the start of a nested object inside the cell. We don't need to
+                    // track this info because the nested objects don't contribute to the values in
+                    // the cell.
+                    (void)arrInfoReader.takeNumber();
+                    break;
+                }
+                case ']': {
+                    invariant(inArray.size() > 0 && inArray.top());
+                    inArray.pop();
+                    if (inArray.size() > 0 && inArray.top()) {
+                        invariant(depth > 0);
+                        depth--;
+                    }
+
+                    // Closing an array implicitly closes all objects on the path between it and the
+                    // previous array.
+                    while (inArray.size() > 0 && !inArray.top()) {
+                        inArray.pop();
+                    }
+                }
+            }
+        }
+        if (depth == 0) {
+            // For the remaining values the depth isn't going to change so we don't need to advance
+            // the value iterator if the values are too deep.
+            while (translatedCell.moreValues()) {
+                auto [tag, val] = translatedCell.nextValue();
+                auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val);
+                _filterInputAccessors[filterIndex].reset(true /*owned*/, tagCopy, valCopy);
+                if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) {
+                    return true;
+                }
+            }
+        }
+    }
 
-    // We are about to call next() on a storage cursor so do not bother saving our internal state in
-    // case it yields as the state will be completely overwritten after the next() call.
-    disableSlotAccess();
+    // None of the values matched the filter.
+    return false;
+}
 
-    checkForInterrupt(_opCtx);
+RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() {
+    invariant(!_filteredPaths.empty());
 
-    // Find minimum record ID of all column cursors.
-    _recordId = RecordId();
-    for (auto& cursor : _columnCursors) {
+    // Initialize 'targetRecordId' from the filtered cursor we are currently iterating.
+    RecordId targetRecordId;
+    {
+        auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
+        if (!cursor.lastCell()) {
+            return RecordId();  // Have exhausted one of the columns.
+        }
+        targetRecordId = cursor.lastCell()->rid;
+    }
+
+    size_t matchedSinceAdvance = 0;
+    // The loop will terminate because when 'matchedSinceAdvance' is reset the 'targetRecordId'
+    // is guaranteed to advance. It will do no more than N 'next()' calls across all cursors,
+    // where N is the number of records (might do fewer, if for some columns there are missing
+    // values). The number of seeks and filter checks depends on the selectivity of the filters.
+    while (matchedSinceAdvance < _filteredPaths.size()) {
+        auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
+
+        // Avoid seeking into the column that we started with.
         auto& result = cursor.lastCell();
-        if (result && (_recordId.isNull() || result->rid < _recordId)) {
-            _recordId = result->rid;
+        if (result && result->rid < targetRecordId) {
+            result = cursor.seekAtOrPast(targetRecordId);
+        }
+        if (!result) {
+            return RecordId();
+        }
+
+        if (result->rid > targetRecordId) {
+            // The column skipped ahead - have to restart at this new record ID.
+            matchedSinceAdvance = 0;
+            targetRecordId = result->rid;
+        }
+
+        if (!checkFilter(result->value, _nextUnmatched, cursor.path())) {
+            // Advance the column until find a match and restart at this new record ID.
+            do {
+                result = cursor.next();
+                if (!result) {
+                    return RecordId();
+                }
+            } while (!checkFilter(result->value, _nextUnmatched, cursor.path()));
+            matchedSinceAdvance = 0;
+            invariant(result->rid > targetRecordId);
+            targetRecordId = result->rid;
+        }
+        ++matchedSinceAdvance;
+        _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size();
+    }
+    invariant(!targetRecordId.isNull());
+
+    // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors
+    // might skip ahead, which would mean the column is missing a value for this 'recordId'.
+    for (auto& cursor : _columnCursors) {
+        const auto& result = cursor.lastCell();
+        if (result && result->rid < targetRecordId) {
+            cursor.seekAtOrPast(targetRecordId);
+        }
+    }
+
+    return targetRecordId;
+}
+
+RecordId ColumnScanStage::findMinRecordId() const {
+    if (_denseColumnCursor) {
+        // The cursor of the dense column cannot be ahead of any other, so it's always at the
+        // minimum.
+        auto& result = _denseColumnCursor->lastCell();
+        if (!result) {
+            return RecordId();
+        }
+        return result->rid;
+    }
+
+    auto recordId = RecordId();
+    for (const auto& cursor : _columnCursors) {
+        const auto& result = cursor.lastCell();
+        if (result && (recordId.isNull() || result->rid < recordId)) {
+            recordId = result->rid;
+        }
+    }
+    return recordId;
+}
+
+RecordId ColumnScanStage::advanceCursors() {
+    if (_recordId.isNull()) {
+        if (_denseColumnCursor) {
+            _denseColumnCursor->seekAtOrPast(RecordId());
+        }
+        for (auto& columnCursor : _columnCursors) {
+            columnCursor.seekAtOrPast(RecordId());
+        }
+        return _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns();
+    }
+
+    if (!_filteredPaths.empty()) {
+        // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized
+        // by 'findNextRecordIdForFilteredColumns()'.
+        cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next();
+        return findNextRecordIdForFilteredColumns();
+    }
+
+    // In absence of filters all cursors iterate forward on their own. Some of the cursors might
+    // be ahead of the current '_recordId' because there are gaps in their columns - don't move
+    // them but only those that are at '_recordId' and therefore their values have been
+    // consumed. While at it, compute the new min record ID.
+    auto nextRecordId = RecordId();
+    if (_denseColumnCursor) {
+        invariant(_denseColumnCursor->lastCell()->rid == _recordId,
+                  "Dense cursor should always be at the current minimum record ID");
+        auto cell = _denseColumnCursor->next();
+        if (!cell) {
+            return RecordId();
+        }
+        nextRecordId = cell->rid;
+    }
+    for (auto& cursor : _columnCursors) {
+        auto& cell = cursor.lastCell();
+        if (!cell) {
+            continue;  // this column has been exhausted
+        }
+        if (cell->rid == _recordId) {
+            cell = cursor.next();
+        }
+        if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) {
+            invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID");
+            nextRecordId = cell->rid;
         }
     }
+    return nextRecordId;
+}
+
+PlanState ColumnScanStage::getNext() {
+    auto optTimer(getOptTimer(_opCtx));
+
+    // We are about to call next() on a storage cursor so do not bother saving our internal
+    // state in case it yields as the state will be completely overwritten after the next()
+    // call.
+    disableSlotAccess();
 
+    checkForInterrupt(_opCtx);
+
+    _recordId = advanceCursors();
     if (_recordId.isNull()) {
         return trackPlanState(PlanState::IS_EOF);
     }
 
+    bool useRowStore = false;
+
     auto [outTag, outVal] = value::makeNewObject();
     auto& outObj = *value::bitcastTo<value::Object*>(outVal);
     value::ValueGuard materializedObjGuard(outTag, outVal);
 
     StringDataSet pathsRead;
-    bool useRowStore = false;
     for (size_t i = 0; i < _columnCursors.size(); ++i) {
-        auto& lastCell = _columnCursors[i].lastCell();
-        const auto& path = _columnCursors[i].path();
+        if (!_includeInOutput[i]) {
+            continue;
+        }
+        auto& cursor = _columnCursors[i];
+        auto& lastCell = cursor.lastCell();
 
         boost::optional<SplitCellView> splitCellView;
         if (lastCell && lastCell->rid == _recordId) {
             splitCellView = SplitCellView::parse(lastCell->value);
         }
 
-        if (_columnCursors[i].includeInOutput() && !useRowStore) {
+        const auto& path = cursor.path();
+
+        if (!useRowStore) {
             if (splitCellView &&
                 (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) {
                 useRowStore = true;
@@ -376,16 +654,12 @@ PlanState ColumnScanStage::getNext() {
                 }
             }
         }
-
-        if (splitCellView) {
-            _columnCursors[i].next();
-        }
     }
 
     if (useRowStore) {
         ++_specificStats.numRowStoreFetches;
-        // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do
-        // a next() which should be much cheaper.
+        // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead
+        // do a next() which should be much cheaper.
         auto record = _rowStoreCursor->seekExact(_recordId);
 
         // If there's no record, the index is out of sync with the row store.
@@ -396,8 +670,8 @@ PlanState ColumnScanStage::getNext() {
                                  value::bitcastFrom<const char*>(record->data.data()));
 
         if (_reconstructedRecordAccessor) {
-            // TODO: in absence of record expression set the reconstructed record to be the same as
-            // the record, retrieved from the row store.
+            // TODO: in absence of record expression set the reconstructed record to be the same
+            // as the record, retrieved from the row store.
             invariant(_rowStoreExpr);
             auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get());
             _reconstructedRecordAccessor->reset(owned, tag, val);
@@ -416,13 +690,14 @@ PlanState ColumnScanStage::getNext() {
 
     if (_tracker && _tracker->trackProgress<TrialRunTracker::kNumReads>(1)) {
         // If we're collecting execution stats during multi-planning and reached the end of the
-        // trial period because we've performed enough physical reads, bail out from the trial run
-        // by raising a special exception to signal a runtime planner that this candidate plan has
-        // completed its trial run early. Note that a trial period is executed only once per a
-        // PlanStage tree, and once completed never run again on the same tree.
+        // trial period because we've performed enough physical reads, bail out from the trial
+        // run by raising a special exception to signal a runtime planner that this candidate
+        // plan has completed its trial run early. Note that a trial period is executed only
+        // once per a PlanStage tree, and once completed never run again on the same tree.
         _tracker = nullptr;
         uasserted(ErrorCodes::QueryTrialRunCompleted, "Trial run early exit in scan");
     }
+
     return trackPlanState(PlanState::ADVANCED);
 }
 
@@ -507,6 +782,31 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const {
     }
     ret.emplace_back(DebugPrinter::Block("`]"));
 
+    // Print out per-path filters (if any).
+    if (!_filteredPaths.empty()) {
+        ret.emplace_back(DebugPrinter::Block("[`"));
+        for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) {
+            if (idx) {
+                ret.emplace_back(DebugPrinter::Block("`;"));
+            }
+
+            ret.emplace_back(str::stream()
+                             << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": ");
+            DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId);
+            ret.emplace_back(DebugPrinter::Block("`,"));
+            DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint());
+        }
+        ret.emplace_back(DebugPrinter::Block("`]"));
+    }
+
+    if (_rowStoreExpr) {
+        ret.emplace_back(DebugPrinter::Block("[`"));
+        DebugPrinter::addIdentifier(ret, _rowStoreSlot);
+        ret.emplace_back(DebugPrinter::Block("`,"));
+        DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint());
+        ret.emplace_back(DebugPrinter::Block("`]"));
+    }
+
     ret.emplace_back("@\"`");
     DebugPrinter::addIdentifier(ret, _collUuid.toString());
     ret.emplace_back("`\"");
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index 94bc8fa4034..7d30152f46d 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -41,24 +41,40 @@ namespace sbe {
 /**
  * A stage that scans provided columnar index.
  *
- * Currently the stage produces an object into the 'recordSlot' such that accessing any of the given
- * paths in it would be equivalent to accessing the paths in the corresponding object from the
- * associated row store. In the future the stage will be extended to produce separate outputs for
- * each path without materializing this intermediate object unless requested by the client.
+ * Currently the stage produces an object into the 'reconstructedRecordSlot' such that accessing any
+ * of the given paths in it would be equivalent to accessing the paths in the corresponding object
+ * from the associated row store. In the future the stage will be extended to produce separate
+ * outputs for each path without materializing this intermediate object unless requested by the
+ * client.
  *
  * Debug string representation:
  *
- *  COLUMN_SCAN recordSlot|none recordIdSlot|none [path_1, ..., path_n] collectionUuid indexName
+ *  COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n]
+ *              [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]?
+ *              collectionUuid indexName
  */
 class ColumnScanStage final : public PlanStage {
 public:
+    struct PathFilter {
+        size_t pathIndex;  // index into the paths array the stage will be using
+        std::unique_ptr<EExpression> filterExpr;
+        value::SlotId inputSlotId;
+
+        PathFilter(size_t pathIndex,
+                   std::unique_ptr<EExpression> filterExpr,
+                   value::SlotId inputSlotId)
+            : pathIndex(pathIndex), filterExpr(std::move(filterExpr)), inputSlotId(inputSlotId) {}
+    };
+
     ColumnScanStage(UUID collectionUuid,
                     StringData columnIndexName,
                     std::vector<std::string> paths,
+                    std::vector<bool> includeInOutput,
                     boost::optional<value::SlotId> recordIdSlot,
                     boost::optional<value::SlotId> reconstructedRecordSlot,
                     value::SlotId rowStoreSlot,
                     std::unique_ptr<EExpression> rowStoreExpr,
+                    std::vector<PathFilter> filteredPaths,
                     PlanYieldPolicy* yieldPolicy,
                     PlanNodeId planNodeId,
                     bool participateInTrialRunTracking = true);
@@ -154,6 +170,9 @@ private:
         boost::optional<FullCellView>& lastCell() {
             return _lastCell;
         }
+        const boost::optional<FullCellView>& lastCell() const {
+            return _lastCell;
+        }
 
         size_t numNexts() const {
             return _stats.numNexts;
@@ -184,6 +203,21 @@ private:
 
     void readParentsIntoObj(StringData path, value::Object* out, StringDataSet* pathsReadSetOut);
 
+    bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path);
+
+    // Finds the smallest record ID such that:
+    // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call;
+    // 2) the record with this ID passes the filters of all filtered columns.
+    // Ensures that the cursors are set to this record ID unless it's missing in the column (which
+    // is only possible for the non-filtered columns).
+    RecordId findNextRecordIdForFilteredColumns();
+
+    // Finds the lowest record ID across all cursors. Doesn't move any of the cursors.
+    RecordId findMinRecordId() const;
+
+    // Move cursors to the next record to be processed.
+    RecordId advanceCursors();
+
     // The columnar index this stage is scanning and the associated row store collection.
     const UUID _collUuid;
     const std::string _columnIndexName;
@@ -192,13 +226,16 @@ private:
     boost::optional<uint64_t> _catalogEpoch;     // and are not changed afterwards.
     std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry;
 
-    // Paths to be read from the index.
+    // Paths to be read from the index. '_includeInOutput' defines which of the fields should be
+    // included into the reconstructed record and the order of paths in '_paths' defines the
+    // orderding of the fields. The two vectors should have the same size. NB: No paths is possible
+    // when no filters are used and only constant computed columns are projected. In this case only
+    // the dense record ID column will be read.
     const std::vector<std::string> _paths;
+    const std::vector<bool> _includeInOutput;
 
     // The record id in the row store that is used to connect the per-path entries in the columnar
-    // index and to retrieve the full record from the row store, if necessary. Because we put into
-    // the slot the address of record id, we must guarantee that its lifetime is as long as the
-    // stage's.
+    // index and to retrieve the full record from the row store, if necessary.
     RecordId _recordId;
     const boost::optional<value::SlotId> _recordIdSlot;
 
@@ -218,17 +255,32 @@ private:
     const value::SlotId _rowStoreSlot;
     const std::unique_ptr<EExpression> _rowStoreExpr;
 
+    // Per path filters. The slots must be allocated by the client but downstream stages must not
+    // read from them. Multiple filters form a conjunction where each branch of the AND only passes
+    // when a value exists. Empty '_filteredPaths' means there are no filters.
+    const std::vector<PathFilter> _filteredPaths;
+    ColumnCursor& cursorForFilteredPath(const PathFilter& fp) {
+        return _columnCursors[fp.pathIndex];
+    }
+    size_t _nextUnmatched = 0;  // used when searching for the next matching record
+
     std::unique_ptr<value::OwnedValueAccessor> _reconstructedRecordAccessor;
     std::unique_ptr<value::OwnedValueAccessor> _recordIdAccessor;
     std::unique_ptr<value::OwnedValueAccessor> _rowStoreAccessor;
+    std::vector<value::OwnedValueAccessor> _filterInputAccessors;
+    value::SlotAccessorMap _filterInputAccessorsMap;
 
     vm::ByteCode _bytecode;
     std::unique_ptr<vm::CodeFragment> _rowStoreExprCode;
+    std::vector<std::unique_ptr<vm::CodeFragment>> _filterExprsCode;
 
-    // Cursors to simultaneously read from the sections of the index for each path (and, possibly,
-    // auxiliary sections) and from the row store.
+    // Cursors to simultaneously read from the sections of the index for each path.
     std::vector<ColumnCursor> _columnCursors;
     StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors;
+    // Dense column contains record ids for all records. It is necessary to support projection
+    // semantics for missing values on paths.
+    std::unique_ptr<ColumnCursor> _denseColumnCursor;
+    // Cursor into the associated row store.
     std::unique_ptr<SeekableRecordCursor> _rowStoreCursor;
 
     bool _open{false};
diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp
index c1bd51f6b69..c2722ba9437 100644
--- a/src/mongo/db/exec/sbe/values/columnar.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar.cpp
@@ -44,56 +44,6 @@ constexpr value::TypeTags kPlaceHolderType = value::TypeTags::Null;
 constexpr value::Value kPlaceHolderValue = 0;
 
 /*
- * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of
- * individual components.
- */
-class ArrInfoReader {
-public:
-    explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {}
-
-    char nextChar() const {
-        if (_offsetInArrInfo == _arrInfo.size()) {
-            // Reaching the end of the array info means an unlimited number of '|'s.
-            return '|';
-        }
-        return _arrInfo[_offsetInArrInfo];
-    }
-
-    char takeNextChar() {
-        if (_offsetInArrInfo == _arrInfo.size()) {
-            // Reaching the end of the array info means an unlimited number of '|'s.
-            return '|';
-        }
-        return _arrInfo[_offsetInArrInfo++];
-    }
-
-    size_t takeNumber() {
-        return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo);
-    }
-
-    bool empty() const {
-        return _arrInfo.empty();
-    }
-
-    /*
-     * Returns whether more explicit components are yet to be consumed. Since array info logically
-     * ends with an infinite stream of |, this function indicates whether there are more components
-     * which are physically present to be read, not including the infinite sequence of |.
-     */
-    bool moreExplicitComponents() const {
-        return _offsetInArrInfo < _arrInfo.size();
-    }
-
-    StringData rawArrInfo() const {
-        return _arrInfo;
-    }
-
-private:
-    StringData _arrInfo;
-    size_t _offsetInArrInfo = 0;
-};
-
-/*
  * Tracks state necessary for reconstructing objects including the array info reader, the values
  * available for extraction, and the path.
  */
diff --git a/src/mongo/db/exec/sbe/values/columnar.h b/src/mongo/db/exec/sbe/values/columnar.h
index ec0fb5f07c8..01aef2acbce 100644
--- a/src/mongo/db/exec/sbe/values/columnar.h
+++ b/src/mongo/db/exec/sbe/values/columnar.h
@@ -39,6 +39,56 @@
  */
 
 namespace mongo::sbe {
+/*
+ * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of
+ * individual components.
+ */
+class ArrInfoReader {
+public:
+    explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {}
+
+    char nextChar() const {
+        if (_offsetInArrInfo == _arrInfo.size()) {
+            // Reaching the end of the array info means an unlimited number of '|'s.
+            return '|';
+        }
+        return _arrInfo[_offsetInArrInfo];
+    }
+
+    char takeNextChar() {
+        if (_offsetInArrInfo == _arrInfo.size()) {
+            // Reaching the end of the array info means an unlimited number of '|'s.
+            return '|';
+        }
+        return _arrInfo[_offsetInArrInfo++];
+    }
+
+    size_t takeNumber() {
+        return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo);
+    }
+
+    bool empty() const {
+        return _arrInfo.empty();
+    }
+
+    /*
+     * Returns whether more explicit components are yet to be consumed. Since array info logically
+     * ends with an infinite stream of |, this function indicates whether there are more components
+     * which are physically present to be read, not including the infinite sequence of |.
+     */
+    bool moreExplicitComponents() const {
+        return _offsetInArrInfo < _arrInfo.size();
+    }
+
+    StringData rawArrInfo() const {
+        return _arrInfo;
+    }
+
+private:
+    StringData _arrInfo;
+    size_t _offsetInArrInfo = 0;
+};
+
 /**
  * Represents a cell in a columnar index with ability to retrieve values in an SBE native format.
  */
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index 3d08b62e037..90453c66db0 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -476,6 +476,11 @@ std::unique_ptr<MatchExpression> tryAddExpr(StringData path,
     if (FieldRef(path).hasNumericPathComponents())
         return me->shallowClone();
 
+    // (TODO SERVER-68743) addExpr will rightfully create AND for expressions on the same path, but
+    // we cannot translate AND into EExpressions yet.
+    if (out.find(path) != out.end())
+        return me->shallowClone();
+
     addExpr(path, me->shallowClone(), out);
     return nullptr;
 }
@@ -497,7 +502,12 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns(
         // One exception to the above: We can support EQ with empty objects and empty arrays since
         // those are more obviously correct. Maybe could also support LT and LTE, but those don't
         // seem as important so are left for future work.
-        if (elem.type() == BSONType::Array || elem.type() == BSONType::Object) {
+        auto type = elem.type();
+        if (type == BSONType::MinKey || type == BSONType::MaxKey) {
+            // MinKey and MaxKey have special semantics for comparison to objects.
+            return false;
+        }
+        if (type == BSONType::Array || type == BSONType::Object) {
             return isEQ && elem.Obj().isEmpty();
         }
 
@@ -512,12 +522,15 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns(
         case MatchExpression::BITS_ALL_SET:
         case MatchExpression::BITS_ALL_CLEAR:
         case MatchExpression::BITS_ANY_SET:
-        case MatchExpression::BITS_ANY_CLEAR:
-        case MatchExpression::EXISTS: {
+        case MatchExpression::BITS_ANY_CLEAR: {
             // Note: {$exists: false} is represented as {$not: {$exists: true}}.
             auto sub = checked_cast<const PathMatchExpression*>(me);
             return tryAddExpr(sub->path(), me, out);
         }
+        case MatchExpression::EXISTS: {
+            // (TODO SERVER-68743) need expr translation to enable pushing down $exists
+            return me->shallowClone();
+        }
 
         case MatchExpression::LT:
         case MatchExpression::GT:
@@ -530,28 +543,14 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns(
             return tryAddExpr(sub->path(), me, out);
         }
 
-
         case MatchExpression::MATCH_IN: {
-            auto sub = checked_cast<const InMatchExpression*>(me);
-            // Note that $in treats regexes specially and stores them separately than the rest of
-            // the 'equalities'. We actually don't need to look at them here since any regex should
-            // be OK. A regex could only match a string, symbol, or other regex, any of which would
-            // be present in the columnar storage.
-            for (auto&& elem : sub->getEqualities()) {
-                if (!canCompareWith(elem, true))
-                    return me->shallowClone();
-            }
-            return tryAddExpr(sub->path(), me, out);
+            // (TODO SERVER-68743) need expr translation to enable pushing down $in
+            return me->shallowClone();
         }
 
         case MatchExpression::TYPE_OPERATOR: {
-            auto sub = checked_cast<const TypeMatchExpression*>(me);
-            tassert(6430600,
-                    "Not expecting to find EOO in a $type expression",
-                    !sub->typeSet().hasType(BSONType::EOO));
-            if (sub->typeSet().hasType(BSONType::Object) || sub->typeSet().hasType(BSONType::Array))
-                return me->shallowClone();
-            return tryAddExpr(sub->path(), me, out);
+            // (TODO SERVER-68743) need expr translation to enable pushing down $type
+            return me->shallowClone();
         }
 
         case MatchExpression::AND: {
@@ -570,34 +569,8 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns(
                 : std::make_unique<AndMatchExpression>(std::move(newChildren));
         }
 
-
         case MatchExpression::NOT: {
-            // {$ne: null} pattern is known to be important in cases like those in SERVER-27646 and
-            // SERVER-36465.
-            auto notExpr = checked_cast<const NotMatchExpression*>(me);
-            auto withinNot = notExpr->getChild(0);
-
-            // Oddly, we parse {$ne: null} to a NOT -> EQ, but we parse {$not: {$eq: null}} into a
-            // more complex NOT -> AND -> EQ. Let's support both.
-            auto tryAddNENull = [&](const MatchExpression* negatedPred) {
-                if (negatedPred->matchType() != MatchExpression::EQ) {
-                    return false;
-                }
-                auto eqPred = checked_cast<const EqualityMatchExpression*>(negatedPred);
-                if (eqPred->getData().isNull()) {
-                    return tryAddExpr(eqPred->path(), me, out) == nullptr;
-                }
-                return false;
-            };
-            if (tryAddNENull(withinNot)) {
-                // {$ne: null}. We had equality just under NOT.
-                return nullptr;
-            } else if (withinNot->matchType() == MatchExpression::AND &&
-                       withinNot->numChildren() == 1 && tryAddNENull(withinNot->getChild(0))) {
-                // {$not: {$eq: null}}: NOT -> AND -> EQ.
-                return nullptr;
-            }
-            // May be other cases, but left as future work.
+            // (TODO SERVER-68743) need expr translation to enable pushing down $not
             return me->shallowClone();
         }
 
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index 520b2b036b2..73aa479bd2a 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -1692,15 +1692,21 @@ TEST(SplitMatchExpressionForColumns, SplitsSafeEqualities) {
             " kiwi: NumberDecimal('22'),"
             " 'loggerhead shrike': {$minKey: 1},"
             " mallard: {$maxKey: 1}}");
+        ParsedMatchExpression minMaxKeyComps(
+            "{'loggerhead shrike': {$minKey: 1},"
+            " mallard: {$maxKey: 1}}");
+
         auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(mixedEquals.get());
-        ASSERT_EQ(splitUp.size(), 12) << splitUp.size();
+        ASSERT_EQ(splitUp.size(), 12 - 2) << splitUp.size();
         ASSERT(splitUp.contains("albatross"));
         ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::EQ)
             << splitUp.at("albatross")->toString();
         ASSERT(splitUp.contains("blackbird"));
         ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::EQ)
             << splitUp.at("blackbird")->toString();
-        ASSERT(residual == nullptr);
+
+        // Comparison against Min- and MaxKey cannot be pushed down.
+        assertMatchesEqual(minMaxKeyComps, residual);
     }
 }
 
@@ -1740,26 +1746,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportEqualsNull) {
     }
 }
 
-TEST(SplitMatchExpressionForColumns, DoesSupportNotEqualsNull) {
-    {
-        ParsedMatchExpression neNull("{a: {$ne: null}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(neNull.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("a"));
-        ASSERT(splitUp.at("a")->matchType() == MatchExpression::NOT) << splitUp.at("a")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression notEqualsNull("{a: {$not: {$eq: null}}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(notEqualsNull.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("a"));
-        ASSERT(splitUp.at("a")->matchType() == MatchExpression::NOT) << splitUp.at("a")->toString();
-        ASSERT(residual == nullptr);
-    }
-}
-
 TEST(SplitMatchExpressionForColumns, DoesNotSupportCompoundEquals) {
     {
         ParsedMatchExpression implicitEqualsArray("{a: [1, 2]}");
@@ -1853,7 +1839,7 @@ TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) {
             "{"
             " albatross: {$lt: 100},"
             " blackbird: {$gt: 0},"
-            " cowbird: {$gte: 0, $lte: 100}"
+            " cowbird: 50"
             "}");
         auto&& [splitUp, residual] =
             expression::splitMatchExpressionForColumns(combinationPredicate.get());
@@ -1865,7 +1851,7 @@ TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) {
         ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::GT)
             << splitUp.at("blackbird")->toString();
         ASSERT(splitUp.contains("cowbird"));
-        ASSERT(splitUp.at("cowbird")->matchType() == MatchExpression::AND)
+        ASSERT(splitUp.at("cowbird")->matchType() == MatchExpression::EQ)
             << splitUp.at("cowbird")->toString();
         ASSERT(residual == nullptr);
     }
@@ -1958,60 +1944,6 @@ TEST(SplitMatchExpressionForColumns, SupportsTypeSpecificPredicates) {
     ASSERT(residual == nullptr);
 }
 
-TEST(SplitMatchExpressionForColumns, SupportsInWithRegexes) {
-    {
-        // First confirm a $in clause is supported without regexes.
-        ParsedMatchExpression stringInClause("{albatross: {$in: ['big', 'ol', 'bird']}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(stringInClause.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        // Test that $in with regexes is supported also work.
-        ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, /bird/]}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(regexInClause.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        // Test that a mix of both is supported
-        ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, 'bird']}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(regexInClause.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        // Test that it is still disallowed if there's a disqualifying equality such as a null.
-        ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, null, 'bird']}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(regexInClause.get());
-        ASSERT_EQ(splitUp.size(), 0) << splitUp.size();
-        assertMatchesEqual(regexInClause, residual);
-    }
-}
-
-TEST(SplitMatchExpressionForColumns, SupportsExistsTrue) {
-    ParsedMatchExpression existsPredicate("{albatross: {$exists: true}}");
-    auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(existsPredicate.get());
-    ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-    ASSERT(splitUp.contains("albatross"));
-    ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::EXISTS)
-        << splitUp.at("albatross")->toString();
-    ASSERT(residual == nullptr);
-}
-
 TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) {
     ParsedMatchExpression existsPredicate("{albatross: {$exists: false}}");
     auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(existsPredicate.get());
@@ -2019,59 +1951,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) {
     assertMatchesEqual(existsPredicate, residual);
 }
 
-// $in constraints are similar to equality. Most of them should work, exceptions broken out in the
-// next test.
-TEST(SplitMatchExpressionForColumns, SupportsInPredicates) {
-    {
-        ParsedMatchExpression emptyIn("{albatross: {$in: []}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(emptyIn.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression singleElementIn("{albatross: {$in: [4]}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(singleElementIn.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression inWithEmptyArray("{albatross: {$in: [[]]}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(inWithEmptyArray.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression inWithEmptyObject("{albatross: {$in: [{}]}}");
-        auto&& [splitUp, residual] =
-            expression::splitMatchExpressionForColumns(inWithEmptyObject.get());
-        ASSERT_GT(splitUp.size(), 0);
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression mixedTypeIn("{albatross: {$in: [4, {}, [], 'string', /regex/]}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(mixedTypeIn.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-}
-
 // We can't support compound types, just like for equality.
 TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) {
     {
@@ -2102,47 +1981,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) {
     }
 }
 
-TEST(SplitMatchExpressionForColumns, SupportsTypePredicates) {
-    {
-        ParsedMatchExpression intFilter("{albatross: {$type: 'int'}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(intFilter.get());
-        ASSERT_GT(splitUp.size(), 0);
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
-            << splitUp.at("albatross")->toString();
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression numberFilter("{albatross: {$type: 'number'}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(numberFilter.get());
-        ASSERT_GT(splitUp.size(), 0);
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
-            << splitUp.at("albatross")->toString();
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression stringFilter("{albatross: {$type: 'string'}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(stringFilter.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-    {
-        ParsedMatchExpression nullFilter("{albatross: {$type: 'null'}}");
-        auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(nullFilter.get());
-        ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-        ASSERT(splitUp.contains("albatross"));
-        ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
-            << splitUp.at("albatross")->toString();
-        ASSERT(residual == nullptr);
-    }
-}
-
 TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeObject) {
     ParsedMatchExpression objectFilter("{albatross: {$type: 'object'}}");
     auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(objectFilter.get());
@@ -2173,44 +2011,16 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportNotQueries) {
     }
 }
 
-TEST(SplitMatchExpressionForColumns, CanCombinePredicates) {
-    ParsedMatchExpression compoundFilter(
-        "{"
-        " albatross: {$gte: 100},"
-        " albatross: {$mod: [2, 0]}"
-        "}");
-    auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(compoundFilter.get());
-    ASSERT_EQ(splitUp.size(), 1) << splitUp.size();
-    ASSERT(splitUp.contains("albatross"));
-    ASSERT(splitUp["albatross"]->matchType() == MatchExpression::AND)
-        << splitUp.at("albatross")->toString();
-    ASSERT_EQ(splitUp.at("albatross")->numChildren(), 2) << splitUp.at("albatross")->toString();
-    // Don't care about the order.
-    auto andExpr = splitUp.at("albatross").get();
-    auto firstChild = andExpr->getChild(0);
-    if (firstChild->matchType() == MatchExpression::GTE) {
-        ASSERT(firstChild->matchType() == MatchExpression::GTE) << firstChild->toString();
-        ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::MOD) << firstChild->toString();
-    } else {
-        ASSERT(firstChild->matchType() == MatchExpression::MOD) << firstChild->toString();
-        ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::GTE) << firstChild->toString();
-    }
-    ASSERT(residual == nullptr);
-}
-
 TEST(SplitMatchExpressionForColumns, CanSplitPredicate) {
     ParsedMatchExpression complexPredicate(R"({
-        a: {$gte: 0, $lt: 10},
-        "addresses.zip": {$in: ["12345", "01234"]},
+        a: {$gte: 0},
         unsubscribed: false,
         specialAddress: {$exists: false}
         })");
     auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(complexPredicate.get());
-    ASSERT_EQ(splitUp.size(), 3) << splitUp.size();
+    ASSERT_EQ(splitUp.size(), 2) << splitUp.size();
     ASSERT(splitUp.contains("a"));
-    ASSERT(splitUp.at("a")->matchType() == MatchExpression::AND) << splitUp.at("a")->toString();
-    ASSERT_EQ(splitUp.at("a")->numChildren(), 2) << splitUp.at("a")->toString();
-    ASSERT(splitUp.contains("addresses.zip"));
+    ASSERT(splitUp.at("a")->matchType() == MatchExpression::GTE) << splitUp.at("a")->toString();
     ASSERT(splitUp.contains("unsubscribed"));
     ASSERT(!splitUp.contains("specialAddress"));
     ParsedMatchExpression expectedResidual("{specialAddress: {$exists: false}}");
@@ -2224,16 +2034,14 @@ TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) {
         " \"blackbird.feet\": {$mod: [2, 0]},"
         " \"blackbird.softwareUpdates\": {$bitsAllSet: 7},"
         // Stress the path combination logic with some prefixes and suffixes to be sure.
-        " blackbird: {$ne: null},"
-        " bla: {$ne: null},"
-        " blackbirds: {$exists: true},"
-        " \"blackbird.feetsies\": {$ne: null},"
-        " \"cowbird.beakLength\": {$gte: 24, $lt: 40},"
+        " bla: {$eq: \"whatever\"},"
+        " \"blackbird.feetsies\": {$eq: \"whatever\"},"
+        " \"cowbird.beakLength\": {$gte: 24},"
         " \"cowbird.eggSet\": {$bitsAnySet: 7}"
         "}");
     auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(compoundFilter.get());
     ASSERT(residual == nullptr);
-    ASSERT_EQ(splitUp.size(), 9) << splitUp.size();
+    ASSERT_EQ(splitUp.size(), 7) << splitUp.size();
     ASSERT(splitUp.contains("albatross"));
     ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::REGEX)
         << splitUp.at("albatross")->toString();
@@ -2243,17 +2051,9 @@ TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) {
     ASSERT(splitUp.contains("blackbird.softwareUpdates"));
     ASSERT(splitUp.at("blackbird.softwareUpdates")->matchType() == MatchExpression::BITS_ALL_SET)
         << splitUp.at("blackbird.softwareUpdates")->toString();
-    ASSERT(splitUp.contains("blackbird"));
-    ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::NOT)
-        << splitUp.at("blackbird")->toString();
     ASSERT(splitUp.contains("bla"));
-    ASSERT(splitUp.contains("blackbirds"));
-    ASSERT(splitUp.at("blackbirds")->matchType() == MatchExpression::EXISTS)
-        << splitUp.at("blackbirds")->toString();
     ASSERT(splitUp.contains("blackbird.feetsies"));
-    ASSERT(splitUp.at("cowbird.beakLength")->matchType() == MatchExpression::AND)
-        << splitUp.at("cowbird.beakLength")->toString();
-    ASSERT_EQ(splitUp.at("cowbird.beakLength")->numChildren(), 2)
+    ASSERT(splitUp.at("cowbird.beakLength")->matchType() == MatchExpression::GTE)
         << splitUp.at("cowbird.beakLength")->toString();
     ASSERT(splitUp.at("cowbird.eggSet")->matchType() == MatchExpression::BITS_ANY_SET)
         << splitUp.at("cowbird.eggSet")->toString();
@@ -2266,13 +2066,13 @@ TEST(SplitMatchExpressionForColumns, LeavesOriginalMatchExpressionFunctional) {
         "{"
         " albatross: {$lt: 100},"
         " blackbird: {$gt: 0},"
-        " cowbird: {$gte: 0, $lte: 100}"
+        " cowbird: {$gte: 0}"
         "}");
     auto&& [splitUp, residual] =
         expression::splitMatchExpressionForColumns(combinationPredicate.get());
     ASSERT_GT(splitUp.size(), 0);
     ASSERT(residual == nullptr);
-    // Won't bother asserting on the detaiils here - done above.
+    // Won't bother asserting on the details here - done above.
     ASSERT(combinationPredicate.get()->matchesBSON(
         BSON("albatross" << 45 << "blackbird" << 1 << "cowbird" << 2)));
 }
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 9f33ae1c38b..059ba11244f 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -323,12 +323,8 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
     // TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
     std::unique_ptr<MatchExpression> residualPredicate;
     StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn;
-    if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) {
-        std::tie(filterSplitByColumn, residualPredicate) =
-            expression::splitMatchExpressionForColumns(query.root());
-    } else {
-        residualPredicate = query.root()->shallowClone();
-    }
+    std::tie(filterSplitByColumn, residualPredicate) =
+        expression::splitMatchExpressionForColumns(query.root());
     auto fieldLimitStatus =
         checkFieldLimits(filterDeps.fields, outputDeps.fields, filterSplitByColumn);
 
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 68f0d5a6c7f..914b9ae76f4 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -475,10 +475,9 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) {
     addColumnStoreIndexAndEnableFilterSplitting();
 
     auto complexPredicate = fromjson(R"({
-        a: {$gte: 0, $lt: 10},
-        "addresses.zip": {$in: ["12345", "01234"]},
-        unsubscribed: false,
-        specialAddress: {$exists: true}
+        a: {$gte: 0},
+        "addresses.zip": "12345",
+        unsubscribed: false
     })");
     runQuerySortProj(complexPredicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
     assertNumSolutions(1U);
@@ -488,13 +487,12 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) {
             node: {
                 column_scan: {
                     filtersByPath: {
-                        a: {$and: [{a: {$gte: 0}}, {a: {$lt: 10}}]},
-                        'addresses.zip': {'addresses.zip': {$in: ['12345', '01234']}},
-                        unsubscribed: {unsubscribed: {$eq: false}},
-                        specialAddress: {specialAddress: {$exists: true}}
+                        a: {a: {$gte: 0}},
+                        'addresses.zip': {'addresses.zip': {$eq: '12345'}},
+                        unsubscribed: {unsubscribed: {$eq: false}}
                     },
                     outputFields: ['a'],
-                    matchFields: ['a', 'addresses.zip', 'unsubscribed', 'specialAddress']
+                    matchFields: ['a', 'addresses.zip', 'unsubscribed']
                 }
             }
         }
@@ -506,8 +504,8 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) {
 
     // Same predicate as above, except with exists: false, which disqualifies the whole thing.
     auto complexPredicate = fromjson(R"({
-        a: {$gte: 0, $lt: 10},
-        "addresses.zip": {$in: ["12345", "01234"]},
+        a: {$gte: 0},
+        "addresses.zip": "12345",
         unsubscribed: false,
         specialAddress: {$exists: false},
         doNotContact: {$exists: false}
@@ -519,8 +517,8 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) {
             node: {
                 column_scan: {
                     filtersByPath: {
-                        a: {a: {$gte: 0, $lt: 10}},
-                        "addresses.zip": {"addresses.zip": {$in: ['12345', '01234']}},
+                        a: {a: {$gte: 0}},
+                        'addresses.zip': {'addresses.zip': {$eq: '12345'}},
                         unsubscribed: {unsubscribed: false}
                     },
                     outputFields: ['a'],
@@ -809,31 +807,4 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde
         }
     })");
 }
-
-TEST_F(QueryPlannerColumnarTest, FullPredicateOption) {
-    params.columnStoreIndexes.emplace_back(kIndexName);
-
-    // Filter that could be pushed down, but isn't due to the lack of the
-    // GENERATE_PER_COLUMN_FILTER flag.
-    auto predicate = fromjson(R"({
-        specialAddress: {$exists: true},
-        doNotContact: {$exists: true}
-    })");
-    runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
-    assertSolutionExists(R"({
-        proj: {
-            spec: {a: 1, _id: 0},
-            node: {
-                column_scan: {
-                    outputFields: ['a'],
-                    matchFields: ['specialAddress', 'doNotContact'],
-                    postAssemblyFilter: {
-                        specialAddress: {$exists: true},
-                        doNotContact: {$exists: true}
-                    }
-                }
-            }
-        }
-    })");
-}
 }  // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index e70f1e80cac..1855bf1534e 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -58,6 +58,7 @@
 #include "mongo/db/fts/fts_query_impl.h"
 #include "mongo/db/fts/fts_spec.h"
 #include "mongo/db/index/fts_access_method.h"
+#include "mongo/db/matcher/expression_leaf.h"
 #include "mongo/db/pipeline/abt/field_map_builder.h"
 #include "mongo/db/pipeline/expression.h"
 #include "mongo/db/pipeline/expression_visitor.h"
@@ -713,7 +714,85 @@ std::unique_ptr<sbe::EExpression> abtToExpr(optimizer::ABT& abt, optimizer::Slot
     optimizer::SBEExpressionLowering exprLower{env, slotMap};
     return exprLower.optimize(abt);
 }
+
+EvalExpr generatePerColumnFilterExpr(StageBuilderState& state,
+                                     const MatchExpression* me,
+                                     sbe::value::SlotId inputSlot) {
+    switch (me->matchType()) {
+        // These are always safe since they will never match documents missing their field, or where
+        // the element is an object or array.
+        case MatchExpression::REGEX:
+            return generateRegexExpr(
+                state, checked_cast<const RegexMatchExpression*>(me), inputSlot);
+        case MatchExpression::MOD:
+            return generateModExpr(state, checked_cast<const ModMatchExpression*>(me), inputSlot);
+        case MatchExpression::BITS_ALL_SET:
+            return generateBitTestExpr(state,
+                                       checked_cast<const BitTestMatchExpression*>(me),
+                                       sbe::BitTestBehavior::AllSet,
+                                       inputSlot);
+        case MatchExpression::BITS_ALL_CLEAR:
+            return generateBitTestExpr(state,
+                                       checked_cast<const BitTestMatchExpression*>(me),
+                                       sbe::BitTestBehavior::AllClear,
+                                       inputSlot);
+        case MatchExpression::BITS_ANY_SET:
+            return generateBitTestExpr(state,
+                                       checked_cast<const BitTestMatchExpression*>(me),
+                                       sbe::BitTestBehavior::AnySet,
+                                       inputSlot);
+        case MatchExpression::BITS_ANY_CLEAR:
+            return generateBitTestExpr(state,
+                                       checked_cast<const BitTestMatchExpression*>(me),
+                                       sbe::BitTestBehavior::AnyClear,
+                                       inputSlot);
+        case MatchExpression::EXISTS: {
+            uasserted(6733601, "(TODO SERVER-68743) need expr translation to enable $exists");
+        }
+        case MatchExpression::LT:
+            return generateComparisonExpr(state,
+                                          checked_cast<const ComparisonMatchExpression*>(me),
+                                          sbe::EPrimBinary::less,
+                                          inputSlot);
+        case MatchExpression::GT:
+            return generateComparisonExpr(state,
+                                          checked_cast<const ComparisonMatchExpression*>(me),
+                                          sbe::EPrimBinary::greater,
+                                          inputSlot);
+        case MatchExpression::EQ:
+            return generateComparisonExpr(state,
+                                          checked_cast<const ComparisonMatchExpression*>(me),
+                                          sbe::EPrimBinary::eq,
+                                          inputSlot);
+        case MatchExpression::LTE:
+            return generateComparisonExpr(state,
+                                          checked_cast<const ComparisonMatchExpression*>(me),
+                                          sbe::EPrimBinary::lessEq,
+                                          inputSlot);
+        case MatchExpression::GTE:
+            return generateComparisonExpr(state,
+                                          checked_cast<const ComparisonMatchExpression*>(me),
+                                          sbe::EPrimBinary::greaterEq,
+                                          inputSlot);
+        case MatchExpression::MATCH_IN: {
+            uasserted(6733602, "(TODO SERVER-68743) need expr translation to enable $in");
+        }
+        case MatchExpression::TYPE_OPERATOR: {
+            uasserted(6733603, "(TODO SERVER-68743) need expr translation to enable $type");
+        }
+        case MatchExpression::NOT: {
+            uasserted(6733604, "(TODO SERVER-68743) need expr translation to enable $not");
+        }
+
+        default:
+            uasserted(6733605,
+                      std::string("Expression ") + me->serialize().toString() +
+                          " should not be pushed down as a per-column filter");
+    }
+    return {};
+}
 }  // namespace
+
 std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildColumnScan(
     const QuerySolutionNode* root, const PlanStageReqs& reqs) {
     tassert(6312404,
@@ -726,8 +805,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
             "'postAssemblyFilter' to be used instead.",
             !csn->filter);
 
-    tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty());
-
     PlanStageSlots outputs;
 
     auto reconstructedRecordSlot = _slotIdGenerator.generate();
@@ -768,16 +845,58 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
     auto abt = builder.generateABT();
     auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone();
 
-    std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(
-        getCurrentCollection(reqs)->uuid(),
-        csn->indexEntry.catalogName,
-        std::vector<std::string>{csn->allFields.begin(), csn->allFields.end()},
-        ridSlot,
-        reconstructedRecordSlot,
-        rowStoreSlot,
-        std::move(rowStoreExpr),
-        _yieldPolicy,
-        csn->nodeId());
+    // Get all the paths but make sure "_id" comes first (the order of paths given to the
+    // column_scan stage defines the order of fields in the reconstructed record).
+    std::vector<std::string> paths;
+    paths.reserve(csn->allFields.size());
+    if (csn->allFields.find("_id") != csn->allFields.end()) {
+        paths.push_back("_id");
+    }
+    for (const auto& path : csn->allFields) {
+        if (path != "_id") {
+            paths.push_back(path);
+        }
+    }
+
+    // Identify the filtered columns, if any, and create slots/expressions for them.
+    std::vector<sbe::ColumnScanStage::PathFilter> filteredPaths;
+    filteredPaths.reserve(csn->filtersByPath.size());
+    for (size_t i = 0; i < paths.size(); i++) {
+        auto itFilter = csn->filtersByPath.find(paths[i]);
+        if (itFilter != csn->filtersByPath.end()) {
+            auto filterInputSlot = _slotIdGenerator.generate();
+
+            auto expr = generatePerColumnFilterExpr(_state, itFilter->second.get(), filterInputSlot)
+                            .extractExpr();
+            filteredPaths.emplace_back(i, std::move(expr), filterInputSlot);
+        }
+    }
+
+    // Tag which of the paths should be included into the output.
+    DepsTracker residual;
+    if (csn->postAssemblyFilter) {
+        csn->postAssemblyFilter->addDependencies(&residual);
+    }
+    std::vector<bool> includeInOutput(paths.size(), false);
+    for (size_t i = 0; i < paths.size(); i++) {
+        if (csn->outputFields.find(paths[i]) != csn->outputFields.end() ||
+            residual.fields.find(paths[i]) != residual.fields.end()) {
+            includeInOutput[i] = true;
+        }
+    }
+
+    std::unique_ptr<sbe::PlanStage> stage =
+        std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(),
+                                               csn->indexEntry.catalogName,
+                                               std::move(paths),
+                                               std::move(includeInOutput),
+                                               ridSlot,
+                                               reconstructedRecordSlot,
+                                               rowStoreSlot,
+                                               std::move(rowStoreExpr),
+                                               std::move(filteredPaths),
+                                               _yieldPolicy,
+                                               csn->nodeId());
 
     // Generate post assembly filter.
     if (csn->postAssemblyFilter) {
@@ -793,6 +912,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
                                                csn->nodeId());
         stage = std::move(outputStage.stage);
     }
+
     return {std::move(stage), std::move(outputs)};
 }