summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/core/columnstore_index_per_path_filters.js329
-rw-r--r--jstests/noPassthroughWithMongod/column_scan_explain.js158
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.cpp283
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.h74
-rw-r--r--src/mongo/db/query/query_planner.cpp2
-rw-r--r--src/mongo/db/query/query_planner_columnar_test.cpp27
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp70
7 files changed, 816 insertions, 127 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js
new file mode 100644
index 00000000000..c9dd442f098
--- /dev/null
+++ b/jstests/core/columnstore_index_per_path_filters.js
@@ -0,0 +1,329 @@
+/**
+ * Testing of just the query layer's integration for columnar index when filters are used that
+ * might be pushed down into the column scan stage.
+ *
+ * @tags: [
+ * # columnstore indexes are new in 6.1.
+ * requires_fcv_61,
+ * # Runs explain on an aggregate command which is only compatible with readConcern local.
+ * assumes_read_concern_unchanged,
+ * # TODO SERVER-66925 We could potentially need to resume an index build in the event of a
+ * # stepdown, which is not yet implemented.
+ * does_not_support_stepdowns,
+ * uses_column_store_index,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js"); // For "resultsEq."
+load("jstests/libs/analyze_plan.js"); // For "planHasStage."
+load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages.
+load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled.""
+
+const columnstoreEnabled =
+ checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
+if (!columnstoreEnabled) {
+ jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled.");
+ return;
+}
+
+const coll_filters = db.columnstore_index_per_path_filters;
+function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) {
+ coll_filters.drop();
+ coll_filters.insert(docs);
+ assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+ // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but
+ // use small datasets to make the test failures more readable.
+ const explain = coll_filters.find(query, projection).explain();
+ const errMsg = " **TEST** " + testDescription + tojson(explain);
+ const actual = coll_filters.find(query, projection).toArray();
+ assert(resultsEq(actual, expected),
+ `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+}
+// Sanity check that without filters, the cursors on other columns are advanced correctly.
+(function testPerPathFilters_SingleColumnScalarsAndArrays() {
+ const docs = [
+ {n: 0, x: 42},
+ {n: 1, y: 42},
+ {n: 2, x: 42},
+ {n: 3},
+ {n: 4, x: 42, y: 42},
+ {n: 5, x: 42},
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {},
+ projection: {a: 1, _id: 0, n: 1, x: 1, y: 1},
+ expected: docs,
+ testDescription: "SingleColumnScalarsAndArrays"
+ });
+})();
+
+// Checks matching on top-level path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumnScalarsAndArrays() {
+ const docs = [
+ {n: 0, x: 42},
+ {n: 1, x: [[42]]},
+ {n: 2, x: [0, 42]},
+ {n: 3, x: [42, 0]},
+ {n: 4, x: [[42], 0]},
+ {n: 5, x: [{y: 0}, 42]},
+ {n: 6, x: [{y: 0}, [42]]},
+ {n: 7, x: [[0, 1], 42]},
+ {n: 8, x: [[0, 1], [42]]},
+ {n: 9, x: {a: 42}},
+ {n: 10, x: [{a: 42}]},
+ {n: 11, x: [{a: 0}, 42]},
+ ];
+ // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a
+ // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns
+ // among all columns involved in the query.
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42},
+ projection: {a: 1, _id: 0, n: 1},
+ expected: [{n: 0}, {n: 2}, {n: 3}, {n: 5}, {n: 7}, {n: 11}],
+ testDescription: "SingleColumnScalarsAndArrays"
+ });
+})();
+
+// Checks matching on sub-path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumnScalarsAndArrays_SubField() {
+ const docs = [
+ {_id: 0, x: {y: 42}},
+ {_id: 1, x: {y: [0, 42, {z: 1}]}},
+ {_id: 2, x: {y: 0}},
+ {_id: 3, x: [{y: 0}, {y: 42}]},
+ {_id: 4, x: [{y: 0}, {y: 1}]},
+ {_id: 5, x: [42, {y: 0}, {z: 42}]},
+ {_id: 6, x: 0},
+ {_id: 7, x: {z: 0}},
+ {_id: 8, x: {y: {z: 0}}},
+ {_id: 9, x: [{y: [0, 42]}, {y: 1}]},
+ {_id: 10, x: [{y: [[42], 0]}]},
+ {_id: 11, x: [[{y: 42}, 0], 1]}
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {"x.y": 42},
+ projection: {_id: 1},
+ expected: [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 9}],
+ testDescription: "SingleColumnScalarsAndArrays_SubField"
+ });
+})();
+
+// Check that a single filtered column correctly handles matching, no-matching and missing values.
+(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() {
+ const docs = [
+ {_id: 0, x: 42},
+ {_id: 1, x: 0},
+ {_id: 2, x: 42},
+ {_id: 3, no_x: 0},
+ {_id: 4, x: 42},
+ {_id: 5, x: 0},
+ {_id: 6, no_x: 0},
+ {_id: 7, x: 42},
+ {_id: 8, no_x: 0},
+ {_id: 9, x: 0},
+ {_id: 10, x: 42},
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42},
+ projection: {_id: 1},
+ expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}],
+ testDescription: "SingleColumnMatchNoMatchMissingValues"
+ });
+})();
+
+// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the
+// filtered columns are advanced as described here in the comments, but the test attempts to
+// exercise various match/no-match/missing combinations of values across columns.
+(function testPerPathFilters_TwoColumns() {
+ const docs = [
+ {_id: 0, x: 0, y: 0}, // start by iterating x
+ {_id: 1, x: 42, y: 42}, // seek into y and match! - continue iterating on x
+ {_id: 2, x: 42, no_y: 0}, // seeking into y skips to n:3
+ {_id: 3, x: 42, y: 0}, // iterating on y
+ {_id: 4, x: 42, y: 42}, // seek into x and match! - continue iterating on y
+ {_id: 5, no_x: 0, y: 42}, // seek into x skips to n:6
+ {_id: 6, x: 42, y: 0}, // seek into y but no match - iterate on y
+ {_id: 7, x: 0, y: 42}, // seek into x but no match - iterate on x
+ {_id: 8, no_x: 0, no_y: 0}, // skipped by x
+ {_id: 9, x: 42, y: 42}, // seek into y and match!
+ ];
+ // Adding into the projection specification non-existent fields doesn't change the output but
+ // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42, y: 42},
+ projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1},
+ expected: [{_id: 1}, {_id: 4}, {_id: 9}],
+ testDescription: "TwoColumns"
+ });
+})();
+
+// Check zig-zagging of three filters.
+(function testPerPathFilters_ThreeColumns() {
+ const docs = [
+ {_id: 0, x: 0, y: 42, z: 42}, // x
+ {_id: 1, x: 42, y: 42, z: 0}, // x->y->z
+ {_id: 2, x: 0, y: 42, z: 42}, // z->x
+ {_id: 3, x: 0, y: 42, z: 42}, // x
+ {_id: 4, x: 42, no_y: 0, z: 42}, // x->y
+ {_id: 5, x: 42, y: 0, z: 42}, // y
+ {_id: 6, x: 42, y: 42, z: 42}, // match! ->y
+ {_id: 7, x: 42, y: 42, z: 42}, // match! ->y
+ {_id: 8, no_x: 0, y: 42, z: 42}, // y->z->x
+ {_id: 9, x: 42, y: 42, no_z: 0}, // x
+ {_id: 10, x: 42, y: 42, z: 42}, // match!
+ ];
+ // Adding into the projection specification non-existent fields doesn't change the output but
+ // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42, y: 42, z: 42},
+ projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1},
+ expected: [{_id: 6}, {_id: 7}, {_id: 10}],
+ testDescription: "ThreeColumns"
+ });
+})();
+
+// Check projection of filtered columns.
+(function testPerPathFilters_ProjectFilteredColumn() {
+ const docs = [
+ {_id: 0, x: {y: 42}},
+ {_id: 1, x: {y: 42, z: 0}},
+ {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]},
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {"x.y": 42},
+ projection: {_id: 1, "x.y": 1},
+ expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}],
+ testDescription: "ProjectFilteredColumn"
+ });
+})();
+
+// Check correctness when have both per-path and residual filters.
+(function testPerPathFilters_PerPathAndResidualFilters() {
+ const docs = [
+ {_id: 0, x: 42, no_y: 0},
+ {_id: 1, x: 42, y: 0},
+ {_id: 2, x: 0, no_y: 0},
+ {_id: 3, x: 0, y: 0},
+ {_id: 4, no_x: 0, no_y: 0},
+ {_id: 5, x: 42, no_y: 0},
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42, y: {$exists: false}}, // {$exists: false} causes the residual filter
+ projection: {_id: 1},
+ expected: [{_id: 0}, {_id: 5}],
+ testDescription: "PerPathAndResidualFilters"
+ });
+})();
+
+// Check degenerate case with no paths.
+(function testPerPathFilters_NoPathsProjected() {
+ const docs = [
+ {_id: 0, x: 42},
+ {_id: 1, x: 0},
+ {_id: 2, no_x: 42},
+ ];
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {x: 42},
+ projection: {_id: 0, a: {$literal: 1}},
+ expected: [{a: 1}],
+ testDescription: "NoPathsProjected"
+ });
+ runPerPathFiltersTest({
+ docs: docs,
+ query: {},
+ projection: {_id: 0, a: {$literal: 1}},
+ expected: [{a: 1}, {a: 1}, {a: 1}],
+ testDescription: "NoPathsProjected (and no filter)"
+ });
+})();
+
+// While using columnar indexes doesn't guarantee a specific field ordering in the result objects,
+// we still try to provide a stable experience to the user, so we output "_id" first and other
+// fields in alphabetically ascending order.
+(function testPerPathFilters_FieldOrder() {
+ const docs = [{z: 42, a: 42, _id: 42, _a: 42}];
+
+ coll_filters.drop();
+ coll_filters.insert(docs);
+ assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+ let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+ let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+ assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`);
+
+ // Having a filter on a path that is also being projected, should not affect the order.
+ res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+ expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+ assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`);
+
+ // Omitting the "_id" field should not affect the order.
+ res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]);
+ expected = '{ "_a" : 42, "z" : 42 }';
+ assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`);
+})();
+
+// Sanity test that per-column filtering is meeting the efficiency expectations.
+(function testPerPathFilters_FieldOrder() {
+ coll_filters.drop();
+
+ const docsCount = 1000;
+ const expectedToMatchCount = 10;
+ for (let i = 0; i < docsCount; i++) {
+ coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)});
+ }
+ assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+ assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length,
+ expectedToMatchCount);
+ const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats");
+
+ const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+ assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+
+ const columns = columnScanStages[0].columns;
+ assertArrayEq({
+ actual: columnScanStages[0].paths,
+ expected: ["_id", "x", "y"],
+ extraErrorMsg: 'Paths used by column scan stage'
+ });
+ assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`);
+
+ const _id = columns._id;
+ const x = columns.x;
+ const y = columns.y;
+
+ assert(_id.usedInOutput, "_id column should be used in output");
+ assert(x.usedInOutput, "x column should be used in output");
+ assert(!y.usedInOutput, "y column should be used only for filtering");
+
+ // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is
+ // equal to the number of documents docsCount (NB: in non-filtered search the number of "next"
+ // calls is close to k*docsCount where k is the number of paths).
+ assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column');
+ assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls');
+
+ // The columns with higher selectivity should be preferred by the zig-zag search for driving the
+ // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as
+ // well), after the first match "y" column completely takes over.
+ assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls');
+
+ // We seek into each column to set up the cursors, after that seeks into _id should only happen
+ // on full match, and seeks into x or y should only happen on partial matches.
+ assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column");
+ assert.lt(x.numSeeks + y.numSeeks,
+ 2 * expectedToMatchCount,
+ "Number of seeks in filtered columns should be small");
+})();
+})();
diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js
index e137d714514..1f2a60e8253 100644
--- a/jstests/noPassthroughWithMongod/column_scan_explain.js
+++ b/jstests/noPassthroughWithMongod/column_scan_explain.js
@@ -4,6 +4,7 @@
(function() {
"use strict";
+load("jstests/aggregation/extras/utils.js"); // For assertArrayEq
load("jstests/libs/analyze_plan.js"); // For planHasStage.
load("jstests/libs/sbe_util.js"); // For checkSBEEnabled.
load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages.
@@ -34,35 +35,54 @@ assert.commandWorked(coll.insertMany(docs));
const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+ const columnScan = columnScanStages[0];
+
+ assertArrayEq({
+ actual: columnScan.paths,
+ expected: ["_id", "x", "y.a"],
+ extraErrorMsg: 'Paths used by column scan stage'
+ });
// Verifying column fields.
- const columns = columnScanStages[0].columns;
- assert.eq(Object.keys(columns).length, 4, 'Number of columns should be 4.');
+ const columns = columnScan.columns;
+ assert.eq(
+ Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`);
+
+ // We seek into each column once, when setting up the cursors. The dense column is the first to
+ // hit EOF after iterating over all documents so other columns iterate at least one time less.
const expectedColumns = {
- "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
- "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
- "x": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+ "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+ "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+ "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
"y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}
};
for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
assert.eq(sortDoc(columns[columnName]),
sortDoc(expectedObj),
- `Mismatching entry for column ${columnName}`);
+ `Mismatching entry for column ${tojson(columnName)}`);
}
// Verifying parent column fields.
- const parentColumns = columnScanStages[0].parentColumns;
- assert.eq(Object.keys(parentColumns).length, 1, 'Number of parent columns should be 1.');
+ const parentColumns = columnScan.parentColumns;
+ assert.eq(Object.keys(parentColumns).length,
+ 1,
+ `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
// Expecting 4 lookups on the "y" parent column for the 3 docs which didn't have a "y.a"
- // value and 1 for an unsuccessful call to seek.
+ // value and 1 for an unsuccessful call to seek. We should not iterate over parent columns.
assert.eq(sortDoc(parentColumns.y),
{"numNexts": 0, "numSeeks": 4},
- 'Mismatching entry for parent column \'y\'');
-
- assert.eq(explain.executionStats.totalKeysExamined, 24, `Mismatch in totalKeysExamined.`);
- assert.eq(
- columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
- assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+ 'Mismatching entry for parent column "y"');
+
+ // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+ assert.eq(explain.executionStats.totalKeysExamined,
+ columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+ columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts +
+ columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks +
+ parentColumns["y"].numNexts + parentColumns["y"].numSeeks,
+ `Mismatch in totalKeysExamined.`);
+
+ assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+ assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
}());
// Test the explain output for a scan on a nonexistent field.
@@ -71,29 +91,42 @@ assert.commandWorked(coll.insertMany(docs));
const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+ const columnScan = columnScanStages[0];
+
+ assertArrayEq({
+ actual: columnScan.paths,
+ expected: ["_id", "z"],
+ extraErrorMsg: 'Paths used by column scan stage'
+ });
// Verifying column fields.
- const columns = columnScanStages[0].columns;
- assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+ const columns = columnScan.columns;
+ assert.eq(
+ Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
const expectedColumns = {
- "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
- "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+ "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+ "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
"z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true},
};
for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
assert.eq(sortDoc(columns[columnName]),
sortDoc(expectedObj),
- `Mismatching entry for column ${columnName}`);
+ `Mismatching entry for column "${columnName}"`);
}
// Verifying parent column fields.
- const parentColumns = columnScanStages[0].parentColumns;
- assert.eq(parentColumns, {});
-
- assert.eq(explain.executionStats.totalKeysExamined, 13, `Mismatch in totalKeysExamined.`);
- assert.eq(
- columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
- assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+ const parentColumns = columnScan.parentColumns;
+ assert.eq(parentColumns, {}, "Should not access parent columns");
+
+ // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+ assert.eq(explain.executionStats.totalKeysExamined,
+ columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+ columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts +
+ columns["z"].numSeeks,
+ `Mismatch in totalKeysExamined.`);
+
+ assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+ assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
}());
// Test the explain output for a scan on a 2-level nested field.
@@ -102,14 +135,22 @@ assert.commandWorked(coll.insertMany(docs));
const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+ const columnScan = columnScanStages[0];
+
+ assertArrayEq({
+ actual: columnScan.paths,
+ expected: ["_id", "y.b.c"],
+ extraErrorMsg: 'Paths used by column scan stage'
+ });
// Verifying column fields.
- const columns = columnScanStages[0].columns;
- assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+ const columns = columnScan.columns;
+ assert.eq(
+ Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
const expectedColumns = {
- "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
- "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
- "y.b.c": {"numNexts": 2, "numSeeks": 1, "usedInOutput": true},
+ "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+ "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+ "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true},
};
for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
assert.eq(sortDoc(columns[columnName]),
@@ -118,22 +159,57 @@ assert.commandWorked(coll.insertMany(docs));
}
// Verifying parent column fields.
- const parentColumns = columnScanStages[0].parentColumns;
- assert.eq(Object.keys(parentColumns).length, 2, 'Number of parent columns should be 2.');
+ const parentColumns = columnScan.parentColumns;
+ assert.eq(Object.keys(parentColumns).length,
+ 2,
+ `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
// Expecting 3 lookups on the "y" parent column for the 2 docs which didn't have a "y.b"
- // value and 1 unsuccessful call to seek.
+ // value and 1 unsuccessful call to seek. We should not iterate over parent columns.
assert.eq(sortDoc(parentColumns.y),
{"numNexts": 0, "numSeeks": 3},
- 'Mismatching entry for parent column \'y\'');
+ 'Mismatching entry for parent column "y"');
// Expecting 4 lookups on the "y.b" parent column for the 3 docs that didn't have a "y.b.c"
// value and 1 unsuccessful call to seek.
assert.eq(sortDoc(parentColumns['y.b']),
{"numNexts": 0, "numSeeks": 4},
- 'Mismatching entry for parent column \'y.b\'');
+ 'Mismatching entry for parent column "y.b"');
+
+ // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+ assert.eq(explain.executionStats.totalKeysExamined,
+ columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+ columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts +
+ columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts +
+ parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts +
+ parentColumns["y"].numSeeks,
+ `Mismatch in totalKeysExamined.`);
+
+ assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+ assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
+}());
- assert.eq(explain.executionStats.totalKeysExamined, 22, `Mismatch in totalKeysExamined.`);
- assert.eq(
- columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
- assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+// Test fallback to the row store.
+(function testWithFallbackToRowstore() {
+ const coll_rowstore = db.column_scan_explain_rowstore;
+ coll_rowstore.drop();
+ assert.commandWorked(coll_rowstore.createIndex({"$**": "columnstore"}));
+
+ const docs_rowstore = [
+ {_id: 0, x: {y: 42, z: 0}},
+ {_id: 1, x: {y: {z: 0}}}, // fallback
+ {_id: 2, x: [{y: 42}, 0]},
+ {_id: 3, x: [{y: 42}, {z: 0}]},
+ {_id: 4, x: [{y: [42, 43]}, {z: 0}]},
+ {_id: 5, x: [{y: [42, {z: 0}]}, {z: 0}]}, // fallback
+ {_id: 6, x: 42},
+ ];
+ coll_rowstore.insert(docs_rowstore);
+ const explain = coll_rowstore.find({}, {_id: 0, "x.y": 1}).explain("executionStats");
+
+ const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+ assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+ const columnScan = columnScanStages[0];
+
+ assert.eq(columnScan.numRowStoreFetches, 2, 'Mismatch in numRowStoreFetches');
+ assert.eq(columnScan.nReturned, docs_rowstore.length, 'nReturned: should return all docs');
}());
}());
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index a003515427f..35e023bfac3 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -52,10 +52,12 @@ TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView)
ColumnScanStage::ColumnScanStage(UUID collectionUuid,
StringData columnIndexName,
std::vector<std::string> paths,
+ std::vector<bool> includeInOutput,
boost::optional<value::SlotId> recordIdSlot,
boost::optional<value::SlotId> reconstuctedRecordSlot,
value::SlotId rowStoreSlot,
std::unique_ptr<EExpression> rowStoreExpr,
+ std::vector<PathFilter> filteredPaths,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
bool participateInTrialRunTracking)
@@ -63,26 +65,39 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid,
_collUuid(collectionUuid),
_columnIndexName(columnIndexName),
_paths(std::move(paths)),
+ _includeInOutput(std::move(includeInOutput)),
_recordIdSlot(recordIdSlot),
_reconstructedRecordSlot(reconstuctedRecordSlot),
_rowStoreSlot(rowStoreSlot),
- _rowStoreExpr(std::move(rowStoreExpr)) {}
+ _rowStoreExpr(std::move(rowStoreExpr)),
+ _filteredPaths(std::move(filteredPaths)) {
+ invariant(_filteredPaths.size() <= _paths.size(),
+ "Filtered paths should be a subset of all paths");
+ invariant(_paths.size() == _includeInOutput.size());
+}
std::unique_ptr<PlanStage> ColumnScanStage::clone() const {
- std::vector<std::unique_ptr<EExpression>> pathExprs;
+ std::vector<PathFilter> filteredPaths;
+ for (const auto& fp : _filteredPaths) {
+ filteredPaths.emplace_back(fp.pathIndex, fp.filterExpr->clone(), fp.inputSlotId);
+ }
return std::make_unique<ColumnScanStage>(_collUuid,
_columnIndexName,
_paths,
+ _includeInOutput,
_recordIdSlot,
_reconstructedRecordSlot,
_rowStoreSlot,
_rowStoreExpr ? _rowStoreExpr->clone() : nullptr,
+ std::move(filteredPaths),
_yieldPolicy,
_commonStats.nodeId,
_participateInTrialRunTracking);
}
void ColumnScanStage::prepare(CompileCtx& ctx) {
+ ctx.root = this;
+
if (_reconstructedRecordSlot) {
_reconstructedRecordAccessor = std::make_unique<value::OwnedValueAccessor>();
}
@@ -92,10 +107,19 @@ void ColumnScanStage::prepare(CompileCtx& ctx) {
_rowStoreAccessor = std::make_unique<value::OwnedValueAccessor>();
if (_rowStoreExpr) {
- ctx.root = this;
_rowStoreExprCode = _rowStoreExpr->compile(ctx);
}
+ _filterInputAccessors.resize(_filteredPaths.size());
+ for (size_t idx = 0; idx < _filterInputAccessors.size(); ++idx) {
+ auto slot = _filteredPaths[idx].inputSlotId;
+ auto [it, inserted] = _filterInputAccessorsMap.emplace(slot, &_filterInputAccessors[idx]);
+ uassert(6610212, str::stream() << "duplicate slot: " << slot, inserted);
+ }
+ for (auto& filteredPath : _filteredPaths) {
+ _filterExprsCode.emplace_back(filteredPath.filterExpr->compile(ctx));
+ }
+
tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll);
std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid);
@@ -120,12 +144,23 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId
if (_rowStoreSlot == slot) {
return _rowStoreAccessor.get();
}
+
+ if (auto it = _filterInputAccessorsMap.find(slot); it != _filterInputAccessorsMap.end()) {
+ return it->second;
+ }
+
return ctx.getAccessor(slot);
}
void ColumnScanStage::doSaveState(bool relinquishCursor) {
+ if (_denseColumnCursor) {
+ _denseColumnCursor->makeOwned();
+ _denseColumnCursor->cursor().save();
+ }
+
for (auto& cursor : _columnCursors) {
cursor.makeOwned();
+ cursor.cursor().save();
}
if (_rowStoreCursor && relinquishCursor) {
@@ -136,9 +171,6 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) {
_rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor);
}
- for (auto& cursor : _columnCursors) {
- cursor.cursor().save();
- }
for (auto& [path, cursor] : _parentPathCursors) {
cursor->cursor().saveUnpositioned();
}
@@ -170,6 +202,9 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
}
}
+ if (_denseColumnCursor) {
+ _denseColumnCursor->cursor().restore();
+ }
for (auto& cursor : _columnCursors) {
cursor.cursor().restore();
}
@@ -182,6 +217,9 @@ void ColumnScanStage::doDetachFromOperationContext() {
if (_rowStoreCursor) {
_rowStoreCursor->detachFromOperationContext();
}
+ if (_denseColumnCursor) {
+ _denseColumnCursor->cursor().detachFromOperationContext();
+ }
for (auto& cursor : _columnCursors) {
cursor.cursor().detachFromOperationContext();
}
@@ -194,6 +232,9 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) {
if (_rowStoreCursor) {
_rowStoreCursor->reattachToOperationContext(opCtx);
}
+ if (_denseColumnCursor) {
+ _denseColumnCursor->cursor().reattachToOperationContext(opCtx);
+ }
for (auto& cursor : _columnCursors) {
cursor.cursor().reattachToOperationContext(opCtx);
}
@@ -247,24 +288,30 @@ void ColumnScanStage::open(bool reOpen) {
auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod());
- // Eventually we can not include this column for the cases where a known dense column (_id)
- // is being read anyway.
-
- // Add a stats struct that will be shared by overall ColumnScanStats and individual
- // cursor.
- _columnCursors.emplace_back(
- iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
- _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), false));
-
- for (auto&& path : _paths) {
- _columnCursors.emplace_back(iam->storage()->newCursor(_opCtx, path),
- _specificStats.cursorStats.emplace_back(path, true));
+ // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377:
+ // eventually we can avoid including this column for the cases where a known dense column
+ // such as _id is being read anyway).
+ if (_filteredPaths.empty()) {
+ _denseColumnCursor = std::make_unique<ColumnCursor>(
+ iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath),
+ _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(),
+ false /*includeInOutput*/));
+ }
+ for (size_t i = 0; i < _paths.size(); i++) {
+ _columnCursors.emplace_back(
+ iam->storage()->newCursor(_opCtx, _paths[i]),
+ _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i]));
}
}
+ // Set the cursors.
+ if (_denseColumnCursor) {
+ _denseColumnCursor->seekAtOrPast(RecordId());
+ }
for (auto& columnCursor : _columnCursors) {
columnCursor.seekAtOrPast(RecordId());
}
+ _recordId = _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns();
_open = true;
}
@@ -323,6 +370,152 @@ void ColumnScanStage::readParentsIntoObj(StringData path,
}
}
+// The result of the filter predicate will be the same regardless of sparseness or sub objects,
+// therefore we don't look at the parents and don't consult the row store.
+//
+// (TODO SERVER-68285) Currently, the per-path predicates expect an object to run on, so we create
+// one. This is very inefficient (profiles show considerable time spent under Object::push_back) and
+// should be replaced with predicates that run directly on values. The fact that the implementation
+// of the filter depends on the implementation of the expressions passed to the stage indicates a
+// tight coupling. Unfortunately, this dependency can only be discovered at runtime.
+bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) {
+ auto [tag, val] = value::makeNewObject();
+ value::ValueGuard materializedObjGuard(tag, val);
+ auto& obj = *value::bitcastTo<value::Object*>(val);
+
+ auto splitCellView = SplitCellView::parse(cell);
+ auto translatedCell = translateCell(path, splitCellView);
+ addCellToObject(translatedCell, obj);
+
+ _filterInputAccessors[filterIndex].reset(true /*owned*/, tag, val);
+ materializedObjGuard.reset();
+ return _bytecode.runPredicate(_filterExprsCode[filterIndex].get());
+}
+
+RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() {
+ invariant(!_filteredPaths.empty());
+
+ // Initialize 'targetRecordId' from the filtered cursor we are currently iterating.
+ RecordId targetRecordId;
+ {
+ auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
+ if (!cursor.lastCell()) {
+ return RecordId(); // Have exhausted one of the columns.
+ }
+ targetRecordId = cursor.lastCell()->rid;
+ }
+
+ size_t matchedSinceAdvance = 0;
+ // The loop will terminate because when 'matchedSinceAdvance' is reset the 'targetRecordId' is
+ // guaranteed to advance. It will do no more than N 'next()' calls across all cursors, where N
+ // is the number of records (might do fewer, if for some columns there are missing values). The
+ // number of seeks and filter checks depends on the selectivity of the filters.
+ while (matchedSinceAdvance < _filteredPaths.size()) {
+ auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]);
+
+ // Avoid seeking into the column that we started with.
+ auto& result = cursor.lastCell();
+ if (result && result->rid < targetRecordId) {
+ result = cursor.seekAtOrPast(targetRecordId);
+ }
+ if (!result) {
+ return RecordId();
+ }
+
+ if (result->rid > targetRecordId) {
+ // The column skipped ahead - have to restart at this new record ID.
+ matchedSinceAdvance = 0;
+ targetRecordId = result->rid;
+ }
+
+ if (!checkFilter(result->value, _nextUnmatched, cursor.path())) {
+ // Advance the column until find a match and restart at this new record ID.
+ do {
+ result = cursor.next();
+ if (!result) {
+ return RecordId();
+ }
+ } while (!checkFilter(result->value, _nextUnmatched, cursor.path()));
+ matchedSinceAdvance = 0;
+ invariant(result->rid > targetRecordId);
+ targetRecordId = result->rid;
+ }
+ ++matchedSinceAdvance;
+ _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size();
+ }
+ invariant(!targetRecordId.isNull());
+
+ // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors
+ // might skip ahead, which would mean the column is missing a value for this 'recordId'.
+ for (auto& cursor : _columnCursors) {
+ const auto& result = cursor.lastCell();
+ if (result && result->rid < targetRecordId) {
+ cursor.seekAtOrPast(targetRecordId);
+ }
+ }
+
+ return targetRecordId;
+}
+
+RecordId ColumnScanStage::findMinRecordId() const {
+ if (_denseColumnCursor) {
+ // The cursor of the dense column cannot be ahead of any other, so it's always at the
+ // minimum.
+ auto& result = _denseColumnCursor->lastCell();
+ if (!result) {
+ return RecordId();
+ }
+ return result->rid;
+ }
+
+ auto recordId = RecordId();
+ for (const auto& cursor : _columnCursors) {
+ const auto& result = cursor.lastCell();
+ if (result && (recordId.isNull() || result->rid < recordId)) {
+ recordId = result->rid;
+ }
+ }
+ return recordId;
+}
+
+RecordId ColumnScanStage::advanceCursors() {
+ if (!_filteredPaths.empty()) {
+ // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized by
+ // 'findNextRecordIdForFilteredColumns()'.
+ cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next();
+ return findNextRecordIdForFilteredColumns();
+ }
+
+ // In absence of filters all cursors iterate forward on their own. Some of the cursors might be
+ // ahead of the current '_recordId' because there are gaps in their columns - don't move them
+ // but only those that are at '_recordId' and therefore their values have been consumed. While
+ // at it, compute the new min record ID.
+ auto nextRecordId = RecordId();
+ if (_denseColumnCursor) {
+ invariant(_denseColumnCursor->lastCell()->rid == _recordId,
+ "Dense cursor should always be at the current minimum record ID");
+ auto cell = _denseColumnCursor->next();
+ if (!cell) {
+ return RecordId();
+ }
+ nextRecordId = cell->rid;
+ }
+ for (auto& cursor : _columnCursors) {
+ auto& cell = cursor.lastCell();
+ if (!cell) {
+ continue; // this column has been exhausted
+ }
+ if (cell->rid == _recordId) {
+ cell = cursor.next();
+ }
+ if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) {
+ invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID");
+ nextRecordId = cell->rid;
+ }
+ }
+ return nextRecordId;
+}
+
PlanState ColumnScanStage::getNext() {
auto optTimer(getOptTimer(_opCtx));
@@ -332,35 +525,32 @@ PlanState ColumnScanStage::getNext() {
checkForInterrupt(_opCtx);
- // Find minimum record ID of all column cursors.
- _recordId = RecordId();
- for (auto& cursor : _columnCursors) {
- auto& result = cursor.lastCell();
- if (result && (_recordId.isNull() || result->rid < _recordId)) {
- _recordId = result->rid;
- }
- }
-
if (_recordId.isNull()) {
return trackPlanState(PlanState::IS_EOF);
}
+ bool useRowStore = false;
+
auto [outTag, outVal] = value::makeNewObject();
auto& outObj = *value::bitcastTo<value::Object*>(outVal);
value::ValueGuard materializedObjGuard(outTag, outVal);
StringDataSet pathsRead;
- bool useRowStore = false;
for (size_t i = 0; i < _columnCursors.size(); ++i) {
- auto& lastCell = _columnCursors[i].lastCell();
- const auto& path = _columnCursors[i].path();
+ if (!_includeInOutput[i]) {
+ continue;
+ }
+ auto& cursor = _columnCursors[i];
+ auto& lastCell = cursor.lastCell();
boost::optional<SplitCellView> splitCellView;
if (lastCell && lastCell->rid == _recordId) {
splitCellView = SplitCellView::parse(lastCell->value);
}
- if (_columnCursors[i].includeInOutput() && !useRowStore) {
+ const auto& path = cursor.path();
+
+ if (!useRowStore) {
if (splitCellView &&
(splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) {
useRowStore = true;
@@ -376,10 +566,6 @@ PlanState ColumnScanStage::getNext() {
}
}
}
-
- if (splitCellView) {
- _columnCursors[i].next();
- }
}
if (useRowStore) {
@@ -423,6 +609,8 @@ PlanState ColumnScanStage::getNext() {
_tracker = nullptr;
uasserted(ErrorCodes::QueryTrialRunCompleted, "Trial run early exit in scan");
}
+
+ _recordId = advanceCursors();
return trackPlanState(PlanState::ADVANCED);
}
@@ -507,6 +695,31 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const {
}
ret.emplace_back(DebugPrinter::Block("`]"));
+ // Print out per-path filters (if any).
+ if (!_filteredPaths.empty()) {
+ ret.emplace_back(DebugPrinter::Block("[`"));
+ for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) {
+ if (idx) {
+ ret.emplace_back(DebugPrinter::Block("`;"));
+ }
+
+ ret.emplace_back(str::stream()
+ << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": ");
+ DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId);
+ ret.emplace_back(DebugPrinter::Block("`,"));
+ DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint());
+ }
+ ret.emplace_back(DebugPrinter::Block("`]"));
+ }
+
+ if (_rowStoreExpr) {
+ ret.emplace_back(DebugPrinter::Block("[`"));
+ DebugPrinter::addIdentifier(ret, _rowStoreSlot);
+ ret.emplace_back(DebugPrinter::Block("`,"));
+ DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint());
+ ret.emplace_back(DebugPrinter::Block("`]"));
+ }
+
ret.emplace_back("@\"`");
DebugPrinter::addIdentifier(ret, _collUuid.toString());
ret.emplace_back("`\"");
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index 94bc8fa4034..7d30152f46d 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -41,24 +41,40 @@ namespace sbe {
/**
* A stage that scans provided columnar index.
*
- * Currently the stage produces an object into the 'recordSlot' such that accessing any of the given
- * paths in it would be equivalent to accessing the paths in the corresponding object from the
- * associated row store. In the future the stage will be extended to produce separate outputs for
- * each path without materializing this intermediate object unless requested by the client.
+ * Currently the stage produces an object into the 'reconstructedRecordSlot' such that accessing any
+ * of the given paths in it would be equivalent to accessing the paths in the corresponding object
+ * from the associated row store. In the future the stage will be extended to produce separate
+ * outputs for each path without materializing this intermediate object unless requested by the
+ * client.
*
* Debug string representation:
*
- * COLUMN_SCAN recordSlot|none recordIdSlot|none [path_1, ..., path_n] collectionUuid indexName
+ * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n]
+ * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]?
+ * collectionUuid indexName
*/
class ColumnScanStage final : public PlanStage {
public:
+ struct PathFilter {
+ size_t pathIndex; // index into the paths array the stage will be using
+ std::unique_ptr<EExpression> filterExpr;
+ value::SlotId inputSlotId;
+
+ PathFilter(size_t pathIndex,
+ std::unique_ptr<EExpression> filterExpr,
+ value::SlotId inputSlotId)
+ : pathIndex(pathIndex), filterExpr(std::move(filterExpr)), inputSlotId(inputSlotId) {}
+ };
+
ColumnScanStage(UUID collectionUuid,
StringData columnIndexName,
std::vector<std::string> paths,
+ std::vector<bool> includeInOutput,
boost::optional<value::SlotId> recordIdSlot,
boost::optional<value::SlotId> reconstructedRecordSlot,
value::SlotId rowStoreSlot,
std::unique_ptr<EExpression> rowStoreExpr,
+ std::vector<PathFilter> filteredPaths,
PlanYieldPolicy* yieldPolicy,
PlanNodeId planNodeId,
bool participateInTrialRunTracking = true);
@@ -154,6 +170,9 @@ private:
boost::optional<FullCellView>& lastCell() {
return _lastCell;
}
+ const boost::optional<FullCellView>& lastCell() const {
+ return _lastCell;
+ }
size_t numNexts() const {
return _stats.numNexts;
@@ -184,6 +203,21 @@ private:
void readParentsIntoObj(StringData path, value::Object* out, StringDataSet* pathsReadSetOut);
+ bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path);
+
+ // Finds the smallest record ID such that:
+ // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call;
+ // 2) the record with this ID passes the filters of all filtered columns.
+ // Ensures that the cursors are set to this record ID unless it's missing in the column (which
+ // is only possible for the non-filtered columns).
+ RecordId findNextRecordIdForFilteredColumns();
+
+ // Finds the lowest record ID across all cursors. Doesn't move any of the cursors.
+ RecordId findMinRecordId() const;
+
+ // Move cursors to the next record to be processed.
+ RecordId advanceCursors();
+
// The columnar index this stage is scanning and the associated row store collection.
const UUID _collUuid;
const std::string _columnIndexName;
@@ -192,13 +226,16 @@ private:
boost::optional<uint64_t> _catalogEpoch; // and are not changed afterwards.
std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry;
- // Paths to be read from the index.
+ // Paths to be read from the index. '_includeInOutput' defines which of the fields should be
+ // included into the reconstructed record and the order of paths in '_paths' defines the
+ // orderding of the fields. The two vectors should have the same size. NB: No paths is possible
+ // when no filters are used and only constant computed columns are projected. In this case only
+ // the dense record ID column will be read.
const std::vector<std::string> _paths;
+ const std::vector<bool> _includeInOutput;
// The record id in the row store that is used to connect the per-path entries in the columnar
- // index and to retrieve the full record from the row store, if necessary. Because we put into
- // the slot the address of record id, we must guarantee that its lifetime is as long as the
- // stage's.
+ // index and to retrieve the full record from the row store, if necessary.
RecordId _recordId;
const boost::optional<value::SlotId> _recordIdSlot;
@@ -218,17 +255,32 @@ private:
const value::SlotId _rowStoreSlot;
const std::unique_ptr<EExpression> _rowStoreExpr;
+ // Per path filters. The slots must be allocated by the client but downstream stages must not
+ // read from them. Multiple filters form a conjunction where each branch of the AND only passes
+ // when a value exists. Empty '_filteredPaths' means there are no filters.
+ const std::vector<PathFilter> _filteredPaths;
+ ColumnCursor& cursorForFilteredPath(const PathFilter& fp) {
+ return _columnCursors[fp.pathIndex];
+ }
+ size_t _nextUnmatched = 0; // used when searching for the next matching record
+
std::unique_ptr<value::OwnedValueAccessor> _reconstructedRecordAccessor;
std::unique_ptr<value::OwnedValueAccessor> _recordIdAccessor;
std::unique_ptr<value::OwnedValueAccessor> _rowStoreAccessor;
+ std::vector<value::OwnedValueAccessor> _filterInputAccessors;
+ value::SlotAccessorMap _filterInputAccessorsMap;
vm::ByteCode _bytecode;
std::unique_ptr<vm::CodeFragment> _rowStoreExprCode;
+ std::vector<std::unique_ptr<vm::CodeFragment>> _filterExprsCode;
- // Cursors to simultaneously read from the sections of the index for each path (and, possibly,
- // auxiliary sections) and from the row store.
+ // Cursors to simultaneously read from the sections of the index for each path.
std::vector<ColumnCursor> _columnCursors;
StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors;
+ // Dense column contains record ids for all records. It is necessary to support projection
+ // semantics for missing values on paths.
+ std::unique_ptr<ColumnCursor> _denseColumnCursor;
+ // Cursor into the associated row store.
std::unique_ptr<SeekableRecordCursor> _rowStoreCursor;
bool _open{false};
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 9f33ae1c38b..79ccf3ecfa0 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -323,7 +323,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
// TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
std::unique_ptr<MatchExpression> residualPredicate;
StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn;
- if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) {
+ if (params.options) {
std::tie(filterSplitByColumn, residualPredicate) =
expression::splitMatchExpressionForColumns(query.root());
} else {
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 68f0d5a6c7f..0f8c4048176 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -809,31 +809,4 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde
}
})");
}
-
-TEST_F(QueryPlannerColumnarTest, FullPredicateOption) {
- params.columnStoreIndexes.emplace_back(kIndexName);
-
- // Filter that could be pushed down, but isn't due to the lack of the
- // GENERATE_PER_COLUMN_FILTER flag.
- auto predicate = fromjson(R"({
- specialAddress: {$exists: true},
- doNotContact: {$exists: true}
- })");
- runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
- assertSolutionExists(R"({
- proj: {
- spec: {a: 1, _id: 0},
- node: {
- column_scan: {
- outputFields: ['a'],
- matchFields: ['specialAddress', 'doNotContact'],
- postAssemblyFilter: {
- specialAddress: {$exists: true},
- doNotContact: {$exists: true}
- }
- }
- }
- }
- })");
-}
} // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index e70f1e80cac..aedddb6ab02 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -726,8 +726,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
"'postAssemblyFilter' to be used instead.",
!csn->filter);
- tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty());
-
PlanStageSlots outputs;
auto reconstructedRecordSlot = _slotIdGenerator.generate();
@@ -768,16 +766,63 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
auto abt = builder.generateABT();
auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone();
- std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(
- getCurrentCollection(reqs)->uuid(),
- csn->indexEntry.catalogName,
- std::vector<std::string>{csn->allFields.begin(), csn->allFields.end()},
- ridSlot,
- reconstructedRecordSlot,
- rowStoreSlot,
- std::move(rowStoreExpr),
- _yieldPolicy,
- csn->nodeId());
+ // Get all the paths but make sure "_id" comes first (the order of paths given to the
+ // column_scan stage defines the order of fields in the reconstructed record).
+ std::vector<std::string> paths;
+ paths.reserve(csn->allFields.size());
+ if (csn->allFields.find("_id") != csn->allFields.end()) {
+ paths.push_back("_id");
+ }
+ for (const auto& path : csn->allFields) {
+ if (path != "_id") {
+ paths.push_back(path);
+ }
+ }
+
+ // Identify the filtered columns, if any, and create slots/expressions for them.
+ std::vector<sbe::ColumnScanStage::PathFilter> filteredPaths;
+ filteredPaths.reserve(csn->filtersByPath.size());
+ for (size_t i = 0; i < paths.size(); i++) {
+ auto itFilter = csn->filtersByPath.find(paths[i]);
+ if (itFilter != csn->filtersByPath.end()) {
+ auto filterInputSlot = _slotIdGenerator.generate();
+
+ // TODO SERVER-68285: use native SBE expression instead of the classic matcher.
+ auto expr = makeFunction("applyClassicMatcher",
+ makeConstant(sbe::value::TypeTags::classicMatchExpresion,
+ sbe::value::bitcastFrom<const MatchExpression*>(
+ itFilter->second->shallowClone().release())),
+ makeVariable(filterInputSlot));
+
+ filteredPaths.emplace_back(i, std::move(expr), filterInputSlot);
+ }
+ }
+
+ // Tag which of the paths should be included into the output.
+ DepsTracker residual;
+ if (csn->postAssemblyFilter) {
+ csn->postAssemblyFilter->addDependencies(&residual);
+ }
+ std::vector<bool> includeInOutput(paths.size(), false);
+ for (size_t i = 0; i < paths.size(); i++) {
+ if (csn->outputFields.find(paths[i]) != csn->outputFields.end() ||
+ residual.fields.find(paths[i]) != residual.fields.end()) {
+ includeInOutput[i] = true;
+ }
+ }
+
+ std::unique_ptr<sbe::PlanStage> stage =
+ std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(),
+ csn->indexEntry.catalogName,
+ std::move(paths),
+ std::move(includeInOutput),
+ ridSlot,
+ reconstructedRecordSlot,
+ rowStoreSlot,
+ std::move(rowStoreExpr),
+ std::move(filteredPaths),
+ _yieldPolicy,
+ csn->nodeId());
// Generate post assembly filter.
if (csn->postAssemblyFilter) {
@@ -793,6 +838,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
csn->nodeId());
stage = std::move(outputStage.stage);
}
+
return {std::move(stage), std::move(outputs)};
}