diff options
-rw-r--r-- | jstests/core/columnstore_index_per_path_filters.js | 329 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/column_scan_explain.js | 158 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 283 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 74 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_columnar_test.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 70 |
7 files changed, 816 insertions, 127 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js new file mode 100644 index 00000000000..c9dd442f098 --- /dev/null +++ b/jstests/core/columnstore_index_per_path_filters.js @@ -0,0 +1,329 @@ +/** + * Testing of just the query layer's integration for columnar index when filters are used that + * might be pushed down into the column scan stage. + * + * @tags: [ + * # columnstore indexes are new in 6.1. + * requires_fcv_61, + * # Runs explain on an aggregate command which is only compatible with readConcern local. + * assumes_read_concern_unchanged, + * # TODO SERVER-66925 We could potentially need to resume an index build in the event of a + * # stepdown, which is not yet implemented. + * does_not_support_stepdowns, + * uses_column_store_index, + * ] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For "resultsEq." +load("jstests/libs/analyze_plan.js"); // For "planHasStage." +load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. +load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled."" + +const columnstoreEnabled = + checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); +if (!columnstoreEnabled) { + jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled."); + return; +} + +const coll_filters = db.columnstore_index_per_path_filters; +function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) { + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but + // use small datasets to make the test failures more readable. + const explain = coll_filters.find(query, projection).explain(); + const errMsg = " **TEST** " + testDescription + tojson(explain); + const actual = coll_filters.find(query, projection).toArray(); + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +} +// Sanity check that without filters, the cursors on other columns are advanced correctly. +(function testPerPathFilters_SingleColumnScalarsAndArrays() { + const docs = [ + {n: 0, x: 42}, + {n: 1, y: 42}, + {n: 2, x: 42}, + {n: 3}, + {n: 4, x: 42, y: 42}, + {n: 5, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {a: 1, _id: 0, n: 1, x: 1, y: 1}, + expected: docs, + testDescription: "SingleColumnScalarsAndArrays" + }); +})(); + +// Checks matching on top-level path that contains scalars and arrays. +(function testPerPathFilters_SingleColumnScalarsAndArrays() { + const docs = [ + {n: 0, x: 42}, + {n: 1, x: [[42]]}, + {n: 2, x: [0, 42]}, + {n: 3, x: [42, 0]}, + {n: 4, x: [[42], 0]}, + {n: 5, x: [{y: 0}, 42]}, + {n: 6, x: [{y: 0}, [42]]}, + {n: 7, x: [[0, 1], 42]}, + {n: 8, x: [[0, 1], [42]]}, + {n: 9, x: {a: 42}}, + {n: 10, x: [{a: 42}]}, + {n: 11, x: [{a: 0}, 42]}, + ]; + // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a + // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns + // among all columns involved in the query. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {a: 1, _id: 0, n: 1}, + expected: [{n: 0}, {n: 2}, {n: 3}, {n: 5}, {n: 7}, {n: 11}], + testDescription: "SingleColumnScalarsAndArrays" + }); +})(); + +// Checks matching on sub-path that contains scalars and arrays. +(function testPerPathFilters_SingleColumnScalarsAndArrays_SubField() { + const docs = [ + {_id: 0, x: {y: 42}}, + {_id: 1, x: {y: [0, 42, {z: 1}]}}, + {_id: 2, x: {y: 0}}, + {_id: 3, x: [{y: 0}, {y: 42}]}, + {_id: 4, x: [{y: 0}, {y: 1}]}, + {_id: 5, x: [42, {y: 0}, {z: 42}]}, + {_id: 6, x: 0}, + {_id: 7, x: {z: 0}}, + {_id: 8, x: {y: {z: 0}}}, + {_id: 9, x: [{y: [0, 42]}, {y: 1}]}, + {_id: 10, x: [{y: [[42], 0]}]}, + {_id: 11, x: [[{y: 42}, 0], 1]} + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 9}], + testDescription: "SingleColumnScalarsAndArrays_SubField" + }); +})(); + +// Check that a single filtered column correctly handles matching, no-matching and missing values. +(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, x: 42}, + {_id: 3, no_x: 0}, + {_id: 4, x: 42}, + {_id: 5, x: 0}, + {_id: 6, no_x: 0}, + {_id: 7, x: 42}, + {_id: 8, no_x: 0}, + {_id: 9, x: 0}, + {_id: 10, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}], + testDescription: "SingleColumnMatchNoMatchMissingValues" + }); +})(); + +// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the +// filtered columns are advanced as described here in the comments, but the test attempts to +// exercise various match/no-match/missing combinations of values across columns. +(function testPerPathFilters_TwoColumns() { + const docs = [ + {_id: 0, x: 0, y: 0}, // start by iterating x + {_id: 1, x: 42, y: 42}, // seek into y and match! - continue iterating on x + {_id: 2, x: 42, no_y: 0}, // seeking into y skips to n:3 + {_id: 3, x: 42, y: 0}, // iterating on y + {_id: 4, x: 42, y: 42}, // seek into x and match! - continue iterating on y + {_id: 5, no_x: 0, y: 42}, // seek into x skips to n:6 + {_id: 6, x: 42, y: 0}, // seek into y but no match - iterate on y + {_id: 7, x: 0, y: 42}, // seek into x but no match - iterate on x + {_id: 8, no_x: 0, no_y: 0}, // skipped by x + {_id: 9, x: 42, y: 42}, // seek into y and match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42}, + projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1}, + expected: [{_id: 1}, {_id: 4}, {_id: 9}], + testDescription: "TwoColumns" + }); +})(); + +// Check zig-zagging of three filters. +(function testPerPathFilters_ThreeColumns() { + const docs = [ + {_id: 0, x: 0, y: 42, z: 42}, // x + {_id: 1, x: 42, y: 42, z: 0}, // x->y->z + {_id: 2, x: 0, y: 42, z: 42}, // z->x + {_id: 3, x: 0, y: 42, z: 42}, // x + {_id: 4, x: 42, no_y: 0, z: 42}, // x->y + {_id: 5, x: 42, y: 0, z: 42}, // y + {_id: 6, x: 42, y: 42, z: 42}, // match! ->y + {_id: 7, x: 42, y: 42, z: 42}, // match! ->y + {_id: 8, no_x: 0, y: 42, z: 42}, // y->z->x + {_id: 9, x: 42, y: 42, no_z: 0}, // x + {_id: 10, x: 42, y: 42, z: 42}, // match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42, z: 42}, + projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1}, + expected: [{_id: 6}, {_id: 7}, {_id: 10}], + testDescription: "ThreeColumns" + }); +})(); + +// Check projection of filtered columns. +(function testPerPathFilters_ProjectFilteredColumn() { + const docs = [ + {_id: 0, x: {y: 42}}, + {_id: 1, x: {y: 42, z: 0}}, + {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1, "x.y": 1}, + expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}], + testDescription: "ProjectFilteredColumn" + }); +})(); + +// Check correctness when have both per-path and residual filters. +(function testPerPathFilters_PerPathAndResidualFilters() { + const docs = [ + {_id: 0, x: 42, no_y: 0}, + {_id: 1, x: 42, y: 0}, + {_id: 2, x: 0, no_y: 0}, + {_id: 3, x: 0, y: 0}, + {_id: 4, no_x: 0, no_y: 0}, + {_id: 5, x: 42, no_y: 0}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: {$exists: false}}, // {$exists: false} causes the residual filter + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 5}], + testDescription: "PerPathAndResidualFilters" + }); +})(); + +// Check degenerate case with no paths. +(function testPerPathFilters_NoPathsProjected() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, no_x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}], + testDescription: "NoPathsProjected" + }); + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}, {a: 1}, {a: 1}], + testDescription: "NoPathsProjected (and no filter)" + }); +})(); + +// While using columnar indexes doesn't guarantee a specific field ordering in the result objects, +// we still try to provide a stable experience to the user, so we output "_id" first and other +// fields in alphabetically ascending order. +(function testPerPathFilters_FieldOrder() { + const docs = [{z: 42, a: 42, _id: 42, _a: 42}]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]); + let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`); + + // Having a filter on a path that is also being projected, should not affect the order. + res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]); + expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`); + + // Omitting the "_id" field should not affect the order. + res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]); + expected = '{ "_a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`); +})(); + +// Sanity test that per-column filtering is meeting the efficiency expectations. +(function testPerPathFilters_FieldOrder() { + coll_filters.drop(); + + const docsCount = 1000; + const expectedToMatchCount = 10; + for (let i = 0; i < docsCount; i++) { + coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)}); + } + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length, + expectedToMatchCount); + const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats"); + + const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); + assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + + const columns = columnScanStages[0].columns; + assertArrayEq({ + actual: columnScanStages[0].paths, + expected: ["_id", "x", "y"], + extraErrorMsg: 'Paths used by column scan stage' + }); + assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`); + + const _id = columns._id; + const x = columns.x; + const y = columns.y; + + assert(_id.usedInOutput, "_id column should be used in output"); + assert(x.usedInOutput, "x column should be used in output"); + assert(!y.usedInOutput, "y column should be used only for filtering"); + + // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is + // equal to the number of documents docsCount (NB: in non-filtered search the number of "next" + // calls is close to k*docsCount where k is the number of paths). + assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column'); + assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls'); + + // The columns with higher selectivity should be preferred by the zig-zag search for driving the + // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as + // well), after the first match "y" column completely takes over. + assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls'); + + // We seek into each column to set up the cursors, after that seeks into _id should only happen + // on full match, and seeks into x or y should only happen on partial matches. + assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column"); + assert.lt(x.numSeeks + y.numSeeks, + 2 * expectedToMatchCount, + "Number of seeks in filtered columns should be small"); +})(); +})(); diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js index e137d714514..1f2a60e8253 100644 --- a/jstests/noPassthroughWithMongod/column_scan_explain.js +++ b/jstests/noPassthroughWithMongod/column_scan_explain.js @@ -4,6 +4,7 @@ (function() { "use strict"; +load("jstests/aggregation/extras/utils.js"); // For assertArrayEq load("jstests/libs/analyze_plan.js"); // For planHasStage. load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. @@ -34,35 +35,54 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "x", "y.a"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 4, 'Number of columns should be 4.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`); + + // We seek into each column once, when setting up the cursors. The dense column is the first to + // hit EOF after iterating over all documents so other columns iterate at least one time less. const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, - "x": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, "y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true} }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), sortDoc(expectedObj), - `Mismatching entry for column ${columnName}`); + `Mismatching entry for column ${tojson(columnName)}`); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(Object.keys(parentColumns).length, 1, 'Number of parent columns should be 1.'); + const parentColumns = columnScan.parentColumns; + assert.eq(Object.keys(parentColumns).length, + 1, + `Should access 1 parent column but accessed: ${tojson(parentColumns)}`); // Expecting 4 lookups on the "y" parent column for the 3 docs which didn't have a "y.a" - // value and 1 for an unsuccessful call to seek. + // value and 1 for an unsuccessful call to seek. We should not iterate over parent columns. assert.eq(sortDoc(parentColumns.y), {"numNexts": 0, "numSeeks": 4}, - 'Mismatching entry for parent column \'y\''); - - assert.eq(explain.executionStats.totalKeysExamined, 24, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); + 'Mismatching entry for parent column "y"'); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts + + columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks + + parentColumns["y"].numNexts + parentColumns["y"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); }()); // Test the explain output for a scan on a nonexistent field. @@ -71,29 +91,42 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "z"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, "z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), sortDoc(expectedObj), - `Mismatching entry for column ${columnName}`); + `Mismatching entry for column "${columnName}"`); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(parentColumns, {}); - - assert.eq(explain.executionStats.totalKeysExamined, 13, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); + const parentColumns = columnScan.parentColumns; + assert.eq(parentColumns, {}, "Should not access parent columns"); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts + + columns["z"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); }()); // Test the explain output for a scan on a 2-level nested field. @@ -102,14 +135,22 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "y.b.c"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, - "y.b.c": {"numNexts": 2, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), @@ -118,22 +159,57 @@ assert.commandWorked(coll.insertMany(docs)); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(Object.keys(parentColumns).length, 2, 'Number of parent columns should be 2.'); + const parentColumns = columnScan.parentColumns; + assert.eq(Object.keys(parentColumns).length, + 2, + `Should access 1 parent column but accessed: ${tojson(parentColumns)}`); // Expecting 3 lookups on the "y" parent column for the 2 docs which didn't have a "y.b" - // value and 1 unsuccessful call to seek. + // value and 1 unsuccessful call to seek. We should not iterate over parent columns. assert.eq(sortDoc(parentColumns.y), {"numNexts": 0, "numSeeks": 3}, - 'Mismatching entry for parent column \'y\''); + 'Mismatching entry for parent column "y"'); // Expecting 4 lookups on the "y.b" parent column for the 3 docs that didn't have a "y.b.c" // value and 1 unsuccessful call to seek. assert.eq(sortDoc(parentColumns['y.b']), {"numNexts": 0, "numSeeks": 4}, - 'Mismatching entry for parent column \'y.b\''); + 'Mismatching entry for parent column "y.b"'); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts + + columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts + + parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts + + parentColumns["y"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); +}()); - assert.eq(explain.executionStats.totalKeysExamined, 22, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); +// Test fallback to the row store. +(function testWithFallbackToRowstore() { + const coll_rowstore = db.column_scan_explain_rowstore; + coll_rowstore.drop(); + assert.commandWorked(coll_rowstore.createIndex({"$**": "columnstore"})); + + const docs_rowstore = [ + {_id: 0, x: {y: 42, z: 0}}, + {_id: 1, x: {y: {z: 0}}}, // fallback + {_id: 2, x: [{y: 42}, 0]}, + {_id: 3, x: [{y: 42}, {z: 0}]}, + {_id: 4, x: [{y: [42, 43]}, {z: 0}]}, + {_id: 5, x: [{y: [42, {z: 0}]}, {z: 0}]}, // fallback + {_id: 6, x: 42}, + ]; + coll_rowstore.insert(docs_rowstore); + const explain = coll_rowstore.find({}, {_id: 0, "x.y": 1}).explain("executionStats"); + + const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); + assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assert.eq(columnScan.numRowStoreFetches, 2, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs_rowstore.length, 'nReturned: should return all docs'); }()); }()); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index a003515427f..35e023bfac3 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -52,10 +52,12 @@ TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView) ColumnScanStage::ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstuctedRecordSlot, value::SlotId rowStoreSlot, std::unique_ptr<EExpression> rowStoreExpr, + std::vector<PathFilter> filteredPaths, PlanYieldPolicy* yieldPolicy, PlanNodeId nodeId, bool participateInTrialRunTracking) @@ -63,26 +65,39 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid, _collUuid(collectionUuid), _columnIndexName(columnIndexName), _paths(std::move(paths)), + _includeInOutput(std::move(includeInOutput)), _recordIdSlot(recordIdSlot), _reconstructedRecordSlot(reconstuctedRecordSlot), _rowStoreSlot(rowStoreSlot), - _rowStoreExpr(std::move(rowStoreExpr)) {} + _rowStoreExpr(std::move(rowStoreExpr)), + _filteredPaths(std::move(filteredPaths)) { + invariant(_filteredPaths.size() <= _paths.size(), + "Filtered paths should be a subset of all paths"); + invariant(_paths.size() == _includeInOutput.size()); +} std::unique_ptr<PlanStage> ColumnScanStage::clone() const { - std::vector<std::unique_ptr<EExpression>> pathExprs; + std::vector<PathFilter> filteredPaths; + for (const auto& fp : _filteredPaths) { + filteredPaths.emplace_back(fp.pathIndex, fp.filterExpr->clone(), fp.inputSlotId); + } return std::make_unique<ColumnScanStage>(_collUuid, _columnIndexName, _paths, + _includeInOutput, _recordIdSlot, _reconstructedRecordSlot, _rowStoreSlot, _rowStoreExpr ? _rowStoreExpr->clone() : nullptr, + std::move(filteredPaths), _yieldPolicy, _commonStats.nodeId, _participateInTrialRunTracking); } void ColumnScanStage::prepare(CompileCtx& ctx) { + ctx.root = this; + if (_reconstructedRecordSlot) { _reconstructedRecordAccessor = std::make_unique<value::OwnedValueAccessor>(); } @@ -92,10 +107,19 @@ void ColumnScanStage::prepare(CompileCtx& ctx) { _rowStoreAccessor = std::make_unique<value::OwnedValueAccessor>(); if (_rowStoreExpr) { - ctx.root = this; _rowStoreExprCode = _rowStoreExpr->compile(ctx); } + _filterInputAccessors.resize(_filteredPaths.size()); + for (size_t idx = 0; idx < _filterInputAccessors.size(); ++idx) { + auto slot = _filteredPaths[idx].inputSlotId; + auto [it, inserted] = _filterInputAccessorsMap.emplace(slot, &_filterInputAccessors[idx]); + uassert(6610212, str::stream() << "duplicate slot: " << slot, inserted); + } + for (auto& filteredPath : _filteredPaths) { + _filterExprsCode.emplace_back(filteredPath.filterExpr->compile(ctx)); + } + tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll); std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid); @@ -120,12 +144,23 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId if (_rowStoreSlot == slot) { return _rowStoreAccessor.get(); } + + if (auto it = _filterInputAccessorsMap.find(slot); it != _filterInputAccessorsMap.end()) { + return it->second; + } + return ctx.getAccessor(slot); } void ColumnScanStage::doSaveState(bool relinquishCursor) { + if (_denseColumnCursor) { + _denseColumnCursor->makeOwned(); + _denseColumnCursor->cursor().save(); + } + for (auto& cursor : _columnCursors) { cursor.makeOwned(); + cursor.cursor().save(); } if (_rowStoreCursor && relinquishCursor) { @@ -136,9 +171,6 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) { _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor); } - for (auto& cursor : _columnCursors) { - cursor.cursor().save(); - } for (auto& [path, cursor] : _parentPathCursors) { cursor->cursor().saveUnpositioned(); } @@ -170,6 +202,9 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { } } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().restore(); + } for (auto& cursor : _columnCursors) { cursor.cursor().restore(); } @@ -182,6 +217,9 @@ void ColumnScanStage::doDetachFromOperationContext() { if (_rowStoreCursor) { _rowStoreCursor->detachFromOperationContext(); } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().detachFromOperationContext(); + } for (auto& cursor : _columnCursors) { cursor.cursor().detachFromOperationContext(); } @@ -194,6 +232,9 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) { if (_rowStoreCursor) { _rowStoreCursor->reattachToOperationContext(opCtx); } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().reattachToOperationContext(opCtx); + } for (auto& cursor : _columnCursors) { cursor.cursor().reattachToOperationContext(opCtx); } @@ -247,24 +288,30 @@ void ColumnScanStage::open(bool reOpen) { auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod()); - // Eventually we can not include this column for the cases where a known dense column (_id) - // is being read anyway. - - // Add a stats struct that will be shared by overall ColumnScanStats and individual - // cursor. - _columnCursors.emplace_back( - iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), - _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), false)); - - for (auto&& path : _paths) { - _columnCursors.emplace_back(iam->storage()->newCursor(_opCtx, path), - _specificStats.cursorStats.emplace_back(path, true)); + // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377: + // eventually we can avoid including this column for the cases where a known dense column + // such as _id is being read anyway). + if (_filteredPaths.empty()) { + _denseColumnCursor = std::make_unique<ColumnCursor>( + iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), + _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), + false /*includeInOutput*/)); + } + for (size_t i = 0; i < _paths.size(); i++) { + _columnCursors.emplace_back( + iam->storage()->newCursor(_opCtx, _paths[i]), + _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i])); } } + // Set the cursors. + if (_denseColumnCursor) { + _denseColumnCursor->seekAtOrPast(RecordId()); + } for (auto& columnCursor : _columnCursors) { columnCursor.seekAtOrPast(RecordId()); } + _recordId = _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns(); _open = true; } @@ -323,6 +370,152 @@ void ColumnScanStage::readParentsIntoObj(StringData path, } } +// The result of the filter predicate will be the same regardless of sparseness or sub objects, +// therefore we don't look at the parents and don't consult the row store. +// +// (TODO SERVER-68285) Currently, the per-path predicates expect an object to run on, so we create +// one. This is very inefficient (profiles show considerable time spent under Object::push_back) and +// should be replaced with predicates that run directly on values. The fact that the implementation +// of the filter depends on the implementation of the expressions passed to the stage indicates a +// tight coupling. Unfortunately, this dependency can only be discovered at runtime. +bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) { + auto [tag, val] = value::makeNewObject(); + value::ValueGuard materializedObjGuard(tag, val); + auto& obj = *value::bitcastTo<value::Object*>(val); + + auto splitCellView = SplitCellView::parse(cell); + auto translatedCell = translateCell(path, splitCellView); + addCellToObject(translatedCell, obj); + + _filterInputAccessors[filterIndex].reset(true /*owned*/, tag, val); + materializedObjGuard.reset(); + return _bytecode.runPredicate(_filterExprsCode[filterIndex].get()); +} + +RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() { + invariant(!_filteredPaths.empty()); + + // Initialize 'targetRecordId' from the filtered cursor we are currently iterating. + RecordId targetRecordId; + { + auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]); + if (!cursor.lastCell()) { + return RecordId(); // Have exhausted one of the columns. + } + targetRecordId = cursor.lastCell()->rid; + } + + size_t matchedSinceAdvance = 0; + // The loop will terminate because when 'matchedSinceAdvance' is reset the 'targetRecordId' is + // guaranteed to advance. It will do no more than N 'next()' calls across all cursors, where N + // is the number of records (might do fewer, if for some columns there are missing values). The + // number of seeks and filter checks depends on the selectivity of the filters. + while (matchedSinceAdvance < _filteredPaths.size()) { + auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]); + + // Avoid seeking into the column that we started with. + auto& result = cursor.lastCell(); + if (result && result->rid < targetRecordId) { + result = cursor.seekAtOrPast(targetRecordId); + } + if (!result) { + return RecordId(); + } + + if (result->rid > targetRecordId) { + // The column skipped ahead - have to restart at this new record ID. + matchedSinceAdvance = 0; + targetRecordId = result->rid; + } + + if (!checkFilter(result->value, _nextUnmatched, cursor.path())) { + // Advance the column until find a match and restart at this new record ID. + do { + result = cursor.next(); + if (!result) { + return RecordId(); + } + } while (!checkFilter(result->value, _nextUnmatched, cursor.path())); + matchedSinceAdvance = 0; + invariant(result->rid > targetRecordId); + targetRecordId = result->rid; + } + ++matchedSinceAdvance; + _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size(); + } + invariant(!targetRecordId.isNull()); + + // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors + // might skip ahead, which would mean the column is missing a value for this 'recordId'. + for (auto& cursor : _columnCursors) { + const auto& result = cursor.lastCell(); + if (result && result->rid < targetRecordId) { + cursor.seekAtOrPast(targetRecordId); + } + } + + return targetRecordId; +} + +RecordId ColumnScanStage::findMinRecordId() const { + if (_denseColumnCursor) { + // The cursor of the dense column cannot be ahead of any other, so it's always at the + // minimum. + auto& result = _denseColumnCursor->lastCell(); + if (!result) { + return RecordId(); + } + return result->rid; + } + + auto recordId = RecordId(); + for (const auto& cursor : _columnCursors) { + const auto& result = cursor.lastCell(); + if (result && (recordId.isNull() || result->rid < recordId)) { + recordId = result->rid; + } + } + return recordId; +} + +RecordId ColumnScanStage::advanceCursors() { + if (!_filteredPaths.empty()) { + // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized by + // 'findNextRecordIdForFilteredColumns()'. + cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next(); + return findNextRecordIdForFilteredColumns(); + } + + // In absence of filters all cursors iterate forward on their own. Some of the cursors might be + // ahead of the current '_recordId' because there are gaps in their columns - don't move them + // but only those that are at '_recordId' and therefore their values have been consumed. While + // at it, compute the new min record ID. + auto nextRecordId = RecordId(); + if (_denseColumnCursor) { + invariant(_denseColumnCursor->lastCell()->rid == _recordId, + "Dense cursor should always be at the current minimum record ID"); + auto cell = _denseColumnCursor->next(); + if (!cell) { + return RecordId(); + } + nextRecordId = cell->rid; + } + for (auto& cursor : _columnCursors) { + auto& cell = cursor.lastCell(); + if (!cell) { + continue; // this column has been exhausted + } + if (cell->rid == _recordId) { + cell = cursor.next(); + } + if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) { + invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID"); + nextRecordId = cell->rid; + } + } + return nextRecordId; +} + PlanState ColumnScanStage::getNext() { auto optTimer(getOptTimer(_opCtx)); @@ -332,35 +525,32 @@ PlanState ColumnScanStage::getNext() { checkForInterrupt(_opCtx); - // Find minimum record ID of all column cursors. - _recordId = RecordId(); - for (auto& cursor : _columnCursors) { - auto& result = cursor.lastCell(); - if (result && (_recordId.isNull() || result->rid < _recordId)) { - _recordId = result->rid; - } - } - if (_recordId.isNull()) { return trackPlanState(PlanState::IS_EOF); } + bool useRowStore = false; + auto [outTag, outVal] = value::makeNewObject(); auto& outObj = *value::bitcastTo<value::Object*>(outVal); value::ValueGuard materializedObjGuard(outTag, outVal); StringDataSet pathsRead; - bool useRowStore = false; for (size_t i = 0; i < _columnCursors.size(); ++i) { - auto& lastCell = _columnCursors[i].lastCell(); - const auto& path = _columnCursors[i].path(); + if (!_includeInOutput[i]) { + continue; + } + auto& cursor = _columnCursors[i]; + auto& lastCell = cursor.lastCell(); boost::optional<SplitCellView> splitCellView; if (lastCell && lastCell->rid == _recordId) { splitCellView = SplitCellView::parse(lastCell->value); } - if (_columnCursors[i].includeInOutput() && !useRowStore) { + const auto& path = cursor.path(); + + if (!useRowStore) { if (splitCellView && (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) { useRowStore = true; @@ -376,10 +566,6 @@ PlanState ColumnScanStage::getNext() { } } } - - if (splitCellView) { - _columnCursors[i].next(); - } } if (useRowStore) { @@ -423,6 +609,8 @@ PlanState ColumnScanStage::getNext() { _tracker = nullptr; uasserted(ErrorCodes::QueryTrialRunCompleted, "Trial run early exit in scan"); } + + _recordId = advanceCursors(); return trackPlanState(PlanState::ADVANCED); } @@ -507,6 +695,31 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } ret.emplace_back(DebugPrinter::Block("`]")); + // Print out per-path filters (if any). + if (!_filteredPaths.empty()) { + ret.emplace_back(DebugPrinter::Block("[`")); + for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { + if (idx) { + ret.emplace_back(DebugPrinter::Block("`;")); + } + + ret.emplace_back(str::stream() + << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); + DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); + } + ret.emplace_back(DebugPrinter::Block("`]")); + } + + if (_rowStoreExpr) { + ret.emplace_back(DebugPrinter::Block("[`")); + DebugPrinter::addIdentifier(ret, _rowStoreSlot); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint()); + ret.emplace_back(DebugPrinter::Block("`]")); + } + ret.emplace_back("@\"`"); DebugPrinter::addIdentifier(ret, _collUuid.toString()); ret.emplace_back("`\""); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 94bc8fa4034..7d30152f46d 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -41,24 +41,40 @@ namespace sbe { /** * A stage that scans provided columnar index. * - * Currently the stage produces an object into the 'recordSlot' such that accessing any of the given - * paths in it would be equivalent to accessing the paths in the corresponding object from the - * associated row store. In the future the stage will be extended to produce separate outputs for - * each path without materializing this intermediate object unless requested by the client. + * Currently the stage produces an object into the 'reconstructedRecordSlot' such that accessing any + * of the given paths in it would be equivalent to accessing the paths in the corresponding object + * from the associated row store. In the future the stage will be extended to produce separate + * outputs for each path without materializing this intermediate object unless requested by the + * client. * * Debug string representation: * - * COLUMN_SCAN recordSlot|none recordIdSlot|none [path_1, ..., path_n] collectionUuid indexName + * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n] + * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]? + * collectionUuid indexName */ class ColumnScanStage final : public PlanStage { public: + struct PathFilter { + size_t pathIndex; // index into the paths array the stage will be using + std::unique_ptr<EExpression> filterExpr; + value::SlotId inputSlotId; + + PathFilter(size_t pathIndex, + std::unique_ptr<EExpression> filterExpr, + value::SlotId inputSlotId) + : pathIndex(pathIndex), filterExpr(std::move(filterExpr)), inputSlotId(inputSlotId) {} + }; + ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstructedRecordSlot, value::SlotId rowStoreSlot, std::unique_ptr<EExpression> rowStoreExpr, + std::vector<PathFilter> filteredPaths, PlanYieldPolicy* yieldPolicy, PlanNodeId planNodeId, bool participateInTrialRunTracking = true); @@ -154,6 +170,9 @@ private: boost::optional<FullCellView>& lastCell() { return _lastCell; } + const boost::optional<FullCellView>& lastCell() const { + return _lastCell; + } size_t numNexts() const { return _stats.numNexts; @@ -184,6 +203,21 @@ private: void readParentsIntoObj(StringData path, value::Object* out, StringDataSet* pathsReadSetOut); + bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path); + + // Finds the smallest record ID such that: + // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call; + // 2) the record with this ID passes the filters of all filtered columns. + // Ensures that the cursors are set to this record ID unless it's missing in the column (which + // is only possible for the non-filtered columns). + RecordId findNextRecordIdForFilteredColumns(); + + // Finds the lowest record ID across all cursors. Doesn't move any of the cursors. + RecordId findMinRecordId() const; + + // Move cursors to the next record to be processed. + RecordId advanceCursors(); + // The columnar index this stage is scanning and the associated row store collection. const UUID _collUuid; const std::string _columnIndexName; @@ -192,13 +226,16 @@ private: boost::optional<uint64_t> _catalogEpoch; // and are not changed afterwards. std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry; - // Paths to be read from the index. + // Paths to be read from the index. '_includeInOutput' defines which of the fields should be + // included into the reconstructed record and the order of paths in '_paths' defines the + // orderding of the fields. The two vectors should have the same size. NB: No paths is possible + // when no filters are used and only constant computed columns are projected. In this case only + // the dense record ID column will be read. const std::vector<std::string> _paths; + const std::vector<bool> _includeInOutput; // The record id in the row store that is used to connect the per-path entries in the columnar - // index and to retrieve the full record from the row store, if necessary. Because we put into - // the slot the address of record id, we must guarantee that its lifetime is as long as the - // stage's. + // index and to retrieve the full record from the row store, if necessary. RecordId _recordId; const boost::optional<value::SlotId> _recordIdSlot; @@ -218,17 +255,32 @@ private: const value::SlotId _rowStoreSlot; const std::unique_ptr<EExpression> _rowStoreExpr; + // Per path filters. The slots must be allocated by the client but downstream stages must not + // read from them. Multiple filters form a conjunction where each branch of the AND only passes + // when a value exists. Empty '_filteredPaths' means there are no filters. + const std::vector<PathFilter> _filteredPaths; + ColumnCursor& cursorForFilteredPath(const PathFilter& fp) { + return _columnCursors[fp.pathIndex]; + } + size_t _nextUnmatched = 0; // used when searching for the next matching record + std::unique_ptr<value::OwnedValueAccessor> _reconstructedRecordAccessor; std::unique_ptr<value::OwnedValueAccessor> _recordIdAccessor; std::unique_ptr<value::OwnedValueAccessor> _rowStoreAccessor; + std::vector<value::OwnedValueAccessor> _filterInputAccessors; + value::SlotAccessorMap _filterInputAccessorsMap; vm::ByteCode _bytecode; std::unique_ptr<vm::CodeFragment> _rowStoreExprCode; + std::vector<std::unique_ptr<vm::CodeFragment>> _filterExprsCode; - // Cursors to simultaneously read from the sections of the index for each path (and, possibly, - // auxiliary sections) and from the row store. + // Cursors to simultaneously read from the sections of the index for each path. std::vector<ColumnCursor> _columnCursors; StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors; + // Dense column contains record ids for all records. It is necessary to support projection + // semantics for missing values on paths. + std::unique_ptr<ColumnCursor> _denseColumnCursor; + // Cursor into the associated row store. std::unique_ptr<SeekableRecordCursor> _rowStoreCursor; bool _open{false}; diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 9f33ae1c38b..79ccf3ecfa0 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -323,7 +323,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan( // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. std::unique_ptr<MatchExpression> residualPredicate; StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn; - if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) { + if (params.options) { std::tie(filterSplitByColumn, residualPredicate) = expression::splitMatchExpressionForColumns(query.root()); } else { diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp index 68f0d5a6c7f..0f8c4048176 100644 --- a/src/mongo/db/query/query_planner_columnar_test.cpp +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -809,31 +809,4 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde } })"); } - -TEST_F(QueryPlannerColumnarTest, FullPredicateOption) { - params.columnStoreIndexes.emplace_back(kIndexName); - - // Filter that could be pushed down, but isn't due to the lack of the - // GENERATE_PER_COLUMN_FILTER flag. - auto predicate = fromjson(R"({ - specialAddress: {$exists: true}, - doNotContact: {$exists: true} - })"); - runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); - assertSolutionExists(R"({ - proj: { - spec: {a: 1, _id: 0}, - node: { - column_scan: { - outputFields: ['a'], - matchFields: ['specialAddress', 'doNotContact'], - postAssemblyFilter: { - specialAddress: {$exists: true}, - doNotContact: {$exists: true} - } - } - } - } - })"); -} } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index e70f1e80cac..aedddb6ab02 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -726,8 +726,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder "'postAssemblyFilter' to be used instead.", !csn->filter); - tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty()); - PlanStageSlots outputs; auto reconstructedRecordSlot = _slotIdGenerator.generate(); @@ -768,16 +766,63 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto abt = builder.generateABT(); auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone(); - std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>( - getCurrentCollection(reqs)->uuid(), - csn->indexEntry.catalogName, - std::vector<std::string>{csn->allFields.begin(), csn->allFields.end()}, - ridSlot, - reconstructedRecordSlot, - rowStoreSlot, - std::move(rowStoreExpr), - _yieldPolicy, - csn->nodeId()); + // Get all the paths but make sure "_id" comes first (the order of paths given to the + // column_scan stage defines the order of fields in the reconstructed record). + std::vector<std::string> paths; + paths.reserve(csn->allFields.size()); + if (csn->allFields.find("_id") != csn->allFields.end()) { + paths.push_back("_id"); + } + for (const auto& path : csn->allFields) { + if (path != "_id") { + paths.push_back(path); + } + } + + // Identify the filtered columns, if any, and create slots/expressions for them. + std::vector<sbe::ColumnScanStage::PathFilter> filteredPaths; + filteredPaths.reserve(csn->filtersByPath.size()); + for (size_t i = 0; i < paths.size(); i++) { + auto itFilter = csn->filtersByPath.find(paths[i]); + if (itFilter != csn->filtersByPath.end()) { + auto filterInputSlot = _slotIdGenerator.generate(); + + // TODO SERVER-68285: use native SBE expression instead of the classic matcher. + auto expr = makeFunction("applyClassicMatcher", + makeConstant(sbe::value::TypeTags::classicMatchExpresion, + sbe::value::bitcastFrom<const MatchExpression*>( + itFilter->second->shallowClone().release())), + makeVariable(filterInputSlot)); + + filteredPaths.emplace_back(i, std::move(expr), filterInputSlot); + } + } + + // Tag which of the paths should be included into the output. + DepsTracker residual; + if (csn->postAssemblyFilter) { + csn->postAssemblyFilter->addDependencies(&residual); + } + std::vector<bool> includeInOutput(paths.size(), false); + for (size_t i = 0; i < paths.size(); i++) { + if (csn->outputFields.find(paths[i]) != csn->outputFields.end() || + residual.fields.find(paths[i]) != residual.fields.end()) { + includeInOutput[i] = true; + } + } + + std::unique_ptr<sbe::PlanStage> stage = + std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(), + csn->indexEntry.catalogName, + std::move(paths), + std::move(includeInOutput), + ridSlot, + reconstructedRecordSlot, + rowStoreSlot, + std::move(rowStoreExpr), + std::move(filteredPaths), + _yieldPolicy, + csn->nodeId()); // Generate post assembly filter. if (csn->postAssemblyFilter) { @@ -793,6 +838,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder csn->nodeId()); stage = std::move(outputStage.stage); } + return {std::move(stage), std::move(outputs)}; } |