diff options
-rw-r--r-- | jstests/core/columnstore_index_per_path_filters.js | 587 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/column_scan_explain.js | 158 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 400 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 74 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/columnar.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/columnar.h | 50 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 69 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 240 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_columnar_test.cpp | 51 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 144 |
11 files changed, 1353 insertions, 478 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js new file mode 100644 index 00000000000..27154426324 --- /dev/null +++ b/jstests/core/columnstore_index_per_path_filters.js @@ -0,0 +1,587 @@ +/** + * Testing of just the query layer's integration for columnar index when filters are used that + * might be pushed down into the column scan stage. + * + * @tags: [ + * # columnstore indexes are new in 6.1. + * requires_fcv_61, + * # Runs explain on an aggregate command which is only compatible with readConcern local. + * assumes_read_concern_unchanged, + * # TODO SERVER-66925 We could potentially need to resume an index build in the event of a + * # stepdown, which is not yet implemented. + * does_not_support_stepdowns, + * uses_column_store_index, + * ] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For "resultsEq." +load("jstests/libs/analyze_plan.js"); // For "planHasStage." +load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. +load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled."" + +const columnstoreEnabled = + checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); +if (!columnstoreEnabled) { + jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled."); + return; +} + +const coll_filters = db.columnstore_index_per_path_filters; +function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) { + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but + // use small datasets to make the test failures more readable. + const explain = coll_filters.find(query, projection).explain(); + const errMsg = " **TEST** " + testDescription + tojson(explain); + const actual = coll_filters.find(query, projection).toArray(); + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +} +// Sanity check that without filters, the cursors on other columns are advanced correctly. +(function testPerPathFilters_SingleColumn_NoFilters() { + const docs = [ + {n: 0, x: 42}, + {n: 1, y: 42}, + {n: 2, x: 42}, + {n: 3}, + {n: 4, x: 42, y: 42}, + {n: 5, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {a: 1, _id: 0, n: 1, x: 1, y: 1}, + expected: docs, + testDescription: "SingleColumn_NoFilters" + }); +})(); + +// Checks matching on top-level path that contains scalars and arrays. +(function testPerPathFilters_SingleColumn_TopLevelField_Match() { + const docs = [ + {n: 0, x: 42}, // vals: 42, arrInfo: <empty> + {n: 1, x: [0, 42]}, // vals: [0,42], arrInfo: "[" + {n: 2, x: [[0, 0, 0], 0, 42]}, // vals: [0,0,0,0,42], arrInfo: "[[|2]" + {n: 3, x: [{y: 0}, {y: 0}, 42]}, // vals: [42], arrInfo: "[o1" + {n: 4, x: [0, 0, {y: 0}, 42]}, // vals: [0,0,42], arrInfo: "[|1o" + {n: 5, x: [[0, 0], {y: 0}, [{y: 0}], 42]}, // vals: [0,0,42], arrInfo: "[[|1]o[o]" + ]; + // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a + // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns + // among all columns involved in the query. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {a: 1, _id: 0, n: 1}, + expected: [{n: 0}, {n: 1}, {n: 2}, {n: 3}, {n: 4}, {n: 5}], + testDescription: "SingleColumn_TopLevelField_Match" + }); +})(); + +(function testPerPathFilters_SingleColumn_TopLevelField_NoMatch() { + const docs = [ + {n: 0, x: {a: 0}}, // vals: <empty>, arrInfo: <empty> + {n: 1, x: [[42, 0]]}, // vals: [42, 0], arrInfo: "[[" + {n: 2, x: [[42, 0, 0], 0, 0]}, // vals: [42,0,0,0,0], arrInfo: "[[|2]" + {n: 3, x: [{y: 0}, [42, 0]]}, // vals: [42,0], arrInfo: "[o[" + {n: 4, x: [[42], {y: 0}]}, // vals: [42], arrInfo: "[[|]o" + {n: 5, x: [[42, 42], [42]]}, // vals: [42,42,42], arrInfo: "[[|1][" + { + n: 6, + x: [0, [42, [42, [42], 42], 42], [42]] + }, // vals: [0,42,42,42,42,42], arrInfo: "[|[|[|[|]|]|][" + ]; + // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a + // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns + // among all columns involved in the query. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {a: 1, _id: 0, n: 1}, + expected: [], + testDescription: "SingleColumn_TopLevelField_NoMatch" + }); +})(); + +// Checks matching on sub-path that contains scalars and arrays. +(function testPerPathFilters_SingleColumn_SubField_Match() { + const docs = [ + {_id: 0, x: {y: {z: 42}}}, // vals: 42, arrInfo: <empty> + {_id: 1, x: {y: {z: [0, 42]}}}, // vals: [0,42], arrInfo: "{{[" + {_id: 2, x: {y: {z: [[0, 0], 42]}}}, // vals: [0,0,42], arrInfo: "{{[[|1]" + {_id: 3, x: {y: [{z: 0}, {z: 42}]}}, // vals: [0,42], arrInfo: "{[" + {_id: 4, x: {y: [{z: {a: 0}}, {z: [[0, 0], 42]}]}}, // vals: [0,0,42], arrInfo: "{[o{[[|1]" + {_id: 5, x: {y: [{a: 0}, {z: 0}, {a: 0}, {z: 42}]}}, // vals: [0,42], arrInfo: "{[1|+1" + { + _id: 6, + x: [{y: {z: 0}}, {y: {z: [[0, 0]]}}, {y: {z: 42}}] + }, // vals: [0,0,0,42], arrInfo: "[|{{[[|1]]" + { + _id: 7, + x: [{y: {z: {a: 0}}}, {y: 0}, {y: {z: [0, 42]}}] + }, // vals: [0,42], arrInfo: "[o+1{{[" + { + _id: 8, + x: [{y: [{z: {a: 0}}, {z: [0]}, {a: 0}, {z: [0, 42]}]}] + }, // vals: [0,0,42], arrInfo: "[{[o{[|]+1{[" + {_id: 9, x: [{y: [[{z: 0}], {z: 42}]}]}, // vals: [0,42], arrInfo: "[{[[|]" + {_id: 10, x: [[{y: [{z: 0}]}], {y: {z: [42]}}]}, // vals: [0,42], arrInfo: "[[{[|]]{{[" + {_id: 11, x: [{y: {z: [0]}}, {y: {z: [42]}}]}, // vals: [0,42], arrInfo: "[{{[|]{{[" + ]; + let expected = []; + for (let i = 0; i < docs.length; i++) { + expected.push({_id: i}); + } + runPerPathFiltersTest({ + docs: docs, + query: {"x.y.z": 42}, + projection: {_id: 1}, + expected: expected, + testDescription: "SingleColumn_SubField_Match" + }); +})(); + +// Checks matching on sub-path that contains scalars and arrays. +(function testPerPathFilters_SingleColumn_SubField_NoMatch() { + const docs = [ + {_id: 0, x: {y: {z: {a: 0}}}}, // vals: <empty>, arrInfo: <empty> + {_id: 1, x: {y: {z: [0, [42]]}}}, // vals: [0,42], arrInfo: "{{[|[" + {_id: 2, x: {y: [{z: 0}, [{z: 42}]]}}, // vals: [0,42], arrInfo: "{[|[" + {_id: 3, x: [{y: {z: 0}}, [{y: {z: 42}}]]}, // vals: [42], arrInfo: "[1[" + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y.z": 42}, + projection: {_id: 1}, + expected: [], + testDescription: "SingleColumn_SubField_NoMatch" + }); +})(); + +// Check matching of empty arrays which are treated as values. +// NB: expressions for comparing to whole non-empty arrays aren't split into per-path filters. +(function testPerPathFilters_EmptyArray() { + const docs = [ + {_id: 0, x: {y: []}}, + {_id: 1, x: {y: [0, []]}}, + {_id: 2, x: [{y: 0}, {y: []}]}, + {_id: 3, x: {y: [0, [0, []]]}}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1}, + expected: [], //[{_id: 0}, {_id: 1}, {_id: 2}], + testDescription: "SingleColumn_EmptyArray" + }); +})(); + +// Check matching of empty objects which are treated as values. +// NB: expressions for comparing to whole non-empty objects aren't split into per-path filters. +(function testPerPathFilters_EmptyObject() { + const docs = [ + {_id: 0, x: {y: {}}}, + {_id: 1, x: {y: [0, {}]}}, + {_id: 2, x: [{y: 0}, {y: {}}]}, + {_id: 3, x: {y: [0, [0, {}]]}}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": {}}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 1}, {_id: 2}], + testDescription: "SingleColumn_EmptyArray" + }); +})(); + +// Check that a single filtered column correctly handles matching, no-matching and missing values +// when moving the cursor. +(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, x: 42}, + {_id: 3, no_x: 0}, + {_id: 4, x: 42}, + {_id: 5, x: 0}, + {_id: 6, no_x: 0}, + {_id: 7, x: 42}, + {_id: 8, no_x: 0}, + {_id: 9, x: 0}, + {_id: 10, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}], + testDescription: "SingleColumnMatchNoMatchMissingValues" + }); +})(); + +// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the +// filtered columns are advanced as described here in the comments, but the test attempts to +// exercise various match/no-match/missing combinations of values across columns. +(function testPerPathFilters_TwoColumns() { + const docs = [ + {_id: 0, x: 0, y: 0}, // start by iterating x + {_id: 1, x: 42, y: 42}, // seek into y and match! - continue iterating on x + {_id: 2, x: 42, no_y: 0}, // seeking into y skips to n:3 + {_id: 3, x: 42, y: 0}, // iterating on y + {_id: 4, x: 42, y: 42}, // seek into x and match! - continue iterating on y + {_id: 5, no_x: 0, y: 42}, // seek into x skips to n:6 + {_id: 6, x: 42, y: 0}, // seek into y but no match - iterate on y + {_id: 7, x: 0, y: 42}, // seek into x but no match - iterate on x + {_id: 8, no_x: 0, no_y: 0}, // skipped by x + {_id: 9, x: 42, y: 42}, // seek into y and match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42}, + projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1}, + expected: [{_id: 1}, {_id: 4}, {_id: 9}], + testDescription: "TwoColumns" + }); +})(); + +// Check zig-zagging of three filters. +(function testPerPathFilters_ThreeColumns() { + const docs = [ + {_id: 0, x: 0, y: 42, z: 42}, // x + {_id: 1, x: 42, y: 42, z: 0}, // x->y->z + {_id: 2, x: 0, y: 42, z: 42}, // z->x + {_id: 3, x: 0, y: 42, z: 42}, // x + {_id: 4, x: 42, no_y: 0, z: 42}, // x->y + {_id: 5, x: 42, y: 0, z: 42}, // y + {_id: 6, x: 42, y: 42, z: 42}, // match! ->y + {_id: 7, x: 42, y: 42, z: 42}, // match! ->y + {_id: 8, no_x: 0, y: 42, z: 42}, // y->z->x + {_id: 9, x: 42, y: 42, no_z: 0}, // x + {_id: 10, x: 42, y: 42, z: 42}, // match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42, z: 42}, + projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1}, + expected: [{_id: 6}, {_id: 7}, {_id: 10}], + testDescription: "ThreeColumns" + }); +})(); + +// Check projection of filtered columns. +(function testPerPathFilters_ProjectFilteredColumn() { + const docs = [ + {_id: 0, x: {y: 42}}, + {_id: 1, x: {y: 42, z: 0}}, + {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1, "x.y": 1}, + expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}], + testDescription: "ProjectFilteredColumn" + }); +})(); + +// Check correctness when have both per-path and residual filters. +(function testPerPathFilters_PerPathAndResidualFilters() { + const docs = [ + {_id: 0, x: 42, no_y: 0}, + {_id: 1, x: 42, y: 0}, + {_id: 2, x: 0, no_y: 0}, + {_id: 3, x: 0, y: 0}, + {_id: 4, no_x: 0, no_y: 0}, + {_id: 5, x: 42, no_y: 0}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: {$exists: false}}, // {$exists: false} causes the residual filter + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 5}], + testDescription: "PerPathAndResidualFilters" + }); +})(); + +// Check translation of MQL comparison match expressions. +(function testPerPathFilters_SupportedMatchExpressions_Comparison() { + const docs = [ + {_id: 0, x: NumberInt(5)}, + {_id: 1, x: 0}, + {_id: 2, x: null}, + {_id: 3, no_x: 0}, + {_id: 4, x: NumberInt(15)}, + ]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let expected = []; + let actual = []; + let errMsg = ""; + + // MatchExpression::LT + actual = coll_filters.find({x: {$lt: 15}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}]; + errMsg = "SupportedMatchExpressions: $lt"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::GT + actual = coll_filters.find({x: {$gt: 5}}, {_id: 1}).toArray(); + expected = [{_id: 4}]; + errMsg = "SupportedMatchExpressions: $gt"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::LTE + actual = coll_filters.find({x: {$lte: 15}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}, {_id: 4}]; + errMsg = "SupportedMatchExpressions: $lte"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::GTE + actual = coll_filters.find({x: {$gte: 5}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 4}]; + errMsg = "SupportedMatchExpressions: $gte"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +})(); + +// Check translation of MQL bitwise match expressions. +(function testPerPathFilters_SupportedMatchExpressions_Bitwise() { + const docs = [ + {_id: 0, x: NumberInt(1 + 0 * 2 + 1 * 4 + 0 * 8)}, + {_id: 1, x: 0}, + {_id: 2, x: null}, + {_id: 3, no_x: 0}, + {_id: 4, x: NumberInt(0 + 1 * 2 + 1 * 4 + 1 * 8)}, + {_id: 5, x: NumberInt(0 + 1 * 2 + 1 * 4 + 0 * 8)}, + ]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let expected = []; + let actual = []; + let errMsg = ""; + + // MatchExpression::BITS_ALL_SET + actual = coll_filters.find({x: {$bitsAllSet: [1, 2]}}, {_id: 1}).toArray(); + expected = [{_id: 4}, {_id: 5}]; + errMsg = "SupportedMatchExpressions: $bitsAllSet"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::BITS_ALL_CLEAR + actual = coll_filters.find({x: {$bitsAllClear: [1, 3]}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}]; + errMsg = "SupportedMatchExpressions: $bitsAllClear"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::BITS_ANY_SET + actual = coll_filters.find({x: {$bitsAnySet: [0, 3]}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 4}]; + errMsg = "SupportedMatchExpressions: $bitsAnySet"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + // MatchExpression::BITS_ANY_CLEAR + actual = coll_filters.find({x: {$bitsAnyClear: [2, 3]}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}, {_id: 5}]; + errMsg = "SupportedMatchExpressions: $bitsAnyClear"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +})(); + +// Check translation of MQL $exists, $not and $ne match expressions. +// NB: per SERVER-68743 the queries in this test won't be using per-path filters yet, but eventually +// they should. +(function testPerPathFilters_SupportedMatchExpressions_Not() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, x: null}, + {_id: 3, no_x: 0}, + {_id: 4, x: []}, + {_id: 5, x: {}}, + ]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let expected = []; + let actual = []; + let errMsg = ""; + + actual = coll_filters.find({x: {$ne: null}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}, {_id: 4}, {_id: 5}]; + errMsg = "SupportedMatchExpressions: $ne"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + actual = coll_filters.find({x: {$not: {$eq: null}}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}, {_id: 4}, {_id: 5}]; + errMsg = "SupportedMatchExpressions: $not + $eq"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + actual = coll_filters.find({x: {$exists: true}}, {_id: 1}).toArray(); + expected = [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 5}]; + errMsg = "SupportedMatchExpressions: $exists"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +})(); + +// Check translation of other MQL match expressions. +(function testPerPathFilters_SupportedMatchExpressions_Oddballs() { + const docs = [ + {_id: 0, x: NumberInt(7 * 3 + 4)}, + {_id: 1, x: NumberInt(7 * 9 + 2)}, + {_id: 2, x: "find me"}, + {_id: 3, x: "find them"}, + ]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let expected = []; + let actual = []; + let errMsg = ""; + + actual = coll_filters.find({x: {$mod: [7, 2]}}, {_id: 1}).toArray(); + expected = [{_id: 1}]; + errMsg = "SupportedMatchExpressions: $mod"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); + + actual = coll_filters.find({x: {$regex: /me/}}, {_id: 1}).toArray(); + expected = [{_id: 2}]; + errMsg = "SupportedMatchExpressions: $regex"; + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +})(); + +// Check degenerate case with no paths. +(function testPerPathFilters_NoPathsProjected() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, no_x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}], + testDescription: "NoPathsProjected" + }); + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}, {a: 1}, {a: 1}], + testDescription: "NoPathsProjected (and no filter)" + }); +})(); + +// While using columnar indexes doesn't guarantee a specific field ordering in the result objects, +// we still try to provide a stable experience to the user, so we output "_id" first and other +// fields in alphabetically ascending order. +(function testPerPathFilters_FieldOrder() { + const docs = [{z: 42, a: 42, _id: 42, _a: 42}]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]); + let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`); + + // Having a filter on a path that is also being projected, should not affect the order. + res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]); + expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`); + + // Omitting the "_id" field should not affect the order. + res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]); + expected = '{ "_a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`); +})(); + +// Sanity test that per-column filtering is meeting the efficiency expectations. +(function testPerPathFilters_ExecutionStats() { + coll_filters.drop(); + + const docsCount = 1000; + const expectedToMatchCount = 10; + for (let i = 0; i < docsCount; i++) { + coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)}); + } + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length, + expectedToMatchCount); + const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats"); + + const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); + assert.gt(columnScanStages.length, 0, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + + if (columnScanStages.length > 1) { + // The test is being run in sharded environment and the state per shard would depend on + // how the data gets distributed. + jsTestLog("Skipping testPerPathFilters_ExecutionStats in sharded environment."); + return; + } + + const columns = columnScanStages[0].columns; + assertArrayEq({ + actual: columnScanStages[0].paths, + expected: ["_id", "x", "y"], + extraErrorMsg: 'Paths used by column scan stage' + }); + assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`); + + const _id = columns._id; + const x = columns.x; + const y = columns.y; + + assert(_id.usedInOutput, "_id column should be used in output"); + assert(x.usedInOutput, "x column should be used in output"); + assert(!y.usedInOutput, "y column should be used only for filtering"); + + // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is + // equal to the number of documents docsCount (NB: in non-filtered search the number of "next" + // calls is close to k*docsCount where k is the number of paths). + assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column'); + assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls'); + + // The columns with higher selectivity should be preferred by the zig-zag search for driving the + // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as + // well), after the first match "y" column completely takes over. + assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls'); + + // We seek into each column to set up the cursors, after that seeks into _id should only happen + // on full match, and seeks into x or y should only happen on partial matches. + assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column"); + assert.lt(x.numSeeks + y.numSeeks, + 2 * expectedToMatchCount, + "Number of seeks in filtered columns should be small"); +})(); +})(); diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js index e137d714514..1f2a60e8253 100644 --- a/jstests/noPassthroughWithMongod/column_scan_explain.js +++ b/jstests/noPassthroughWithMongod/column_scan_explain.js @@ -4,6 +4,7 @@ (function() { "use strict"; +load("jstests/aggregation/extras/utils.js"); // For assertArrayEq load("jstests/libs/analyze_plan.js"); // For planHasStage. load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. @@ -34,35 +35,54 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "x", "y.a"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 4, 'Number of columns should be 4.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`); + + // We seek into each column once, when setting up the cursors. The dense column is the first to + // hit EOF after iterating over all documents so other columns iterate at least one time less. const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, - "x": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, "y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true} }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), sortDoc(expectedObj), - `Mismatching entry for column ${columnName}`); + `Mismatching entry for column ${tojson(columnName)}`); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(Object.keys(parentColumns).length, 1, 'Number of parent columns should be 1.'); + const parentColumns = columnScan.parentColumns; + assert.eq(Object.keys(parentColumns).length, + 1, + `Should access 1 parent column but accessed: ${tojson(parentColumns)}`); // Expecting 4 lookups on the "y" parent column for the 3 docs which didn't have a "y.a" - // value and 1 for an unsuccessful call to seek. + // value and 1 for an unsuccessful call to seek. We should not iterate over parent columns. assert.eq(sortDoc(parentColumns.y), {"numNexts": 0, "numSeeks": 4}, - 'Mismatching entry for parent column \'y\''); - - assert.eq(explain.executionStats.totalKeysExamined, 24, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); + 'Mismatching entry for parent column "y"'); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts + + columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks + + parentColumns["y"].numNexts + parentColumns["y"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); }()); // Test the explain output for a scan on a nonexistent field. @@ -71,29 +91,42 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "z"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, "z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), sortDoc(expectedObj), - `Mismatching entry for column ${columnName}`); + `Mismatching entry for column "${columnName}"`); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(parentColumns, {}); - - assert.eq(explain.executionStats.totalKeysExamined, 13, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); + const parentColumns = columnScan.parentColumns; + assert.eq(parentColumns, {}, "Should not access parent columns"); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts + + columns["z"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); }()); // Test the explain output for a scan on a 2-level nested field. @@ -102,14 +135,22 @@ assert.commandWorked(coll.insertMany(docs)); const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assertArrayEq({ + actual: columnScan.paths, + expected: ["_id", "y.b.c"], + extraErrorMsg: 'Paths used by column scan stage' + }); // Verifying column fields. - const columns = columnScanStages[0].columns; - assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.'); + const columns = columnScan.columns; + assert.eq( + Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); const expectedColumns = { - "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true}, - "y.b.c": {"numNexts": 2, "numSeeks": 1, "usedInOutput": true}, + "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, + "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { assert.eq(sortDoc(columns[columnName]), @@ -118,22 +159,57 @@ assert.commandWorked(coll.insertMany(docs)); } // Verifying parent column fields. - const parentColumns = columnScanStages[0].parentColumns; - assert.eq(Object.keys(parentColumns).length, 2, 'Number of parent columns should be 2.'); + const parentColumns = columnScan.parentColumns; + assert.eq(Object.keys(parentColumns).length, + 2, + `Should access 1 parent column but accessed: ${tojson(parentColumns)}`); // Expecting 3 lookups on the "y" parent column for the 2 docs which didn't have a "y.b" - // value and 1 unsuccessful call to seek. + // value and 1 unsuccessful call to seek. We should not iterate over parent columns. assert.eq(sortDoc(parentColumns.y), {"numNexts": 0, "numSeeks": 3}, - 'Mismatching entry for parent column \'y\''); + 'Mismatching entry for parent column "y"'); // Expecting 4 lookups on the "y.b" parent column for the 3 docs that didn't have a "y.b.c" // value and 1 unsuccessful call to seek. assert.eq(sortDoc(parentColumns['y.b']), {"numNexts": 0, "numSeeks": 4}, - 'Mismatching entry for parent column \'y.b\''); + 'Mismatching entry for parent column "y.b"'); + + // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns. + assert.eq(explain.executionStats.totalKeysExamined, + columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts + + columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts + + parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts + + parentColumns["y"].numSeeks, + `Mismatch in totalKeysExamined.`); + + assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs'); +}()); - assert.eq(explain.executionStats.totalKeysExamined, 22, `Mismatch in totalKeysExamined.`); - assert.eq( - columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.'); - assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.'); +// Test fallback to the row store. +(function testWithFallbackToRowstore() { + const coll_rowstore = db.column_scan_explain_rowstore; + coll_rowstore.drop(); + assert.commandWorked(coll_rowstore.createIndex({"$**": "columnstore"})); + + const docs_rowstore = [ + {_id: 0, x: {y: 42, z: 0}}, + {_id: 1, x: {y: {z: 0}}}, // fallback + {_id: 2, x: [{y: 42}, 0]}, + {_id: 3, x: [{y: 42}, {z: 0}]}, + {_id: 4, x: [{y: [42, 43]}, {z: 0}]}, + {_id: 5, x: [{y: [42, {z: 0}]}, {z: 0}]}, // fallback + {_id: 6, x: 42}, + ]; + coll_rowstore.insert(docs_rowstore); + const explain = coll_rowstore.find({}, {_id: 0, "x.y": 1}).explain("executionStats"); + + const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); + assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + const columnScan = columnScanStages[0]; + + assert.eq(columnScan.numRowStoreFetches, 2, 'Mismatch in numRowStoreFetches'); + assert.eq(columnScan.nReturned, docs_rowstore.length, 'nReturned: should return all docs'); }()); }()); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index 91a6f02b43b..addf5fe25e3 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -52,10 +52,12 @@ TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView) ColumnScanStage::ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstuctedRecordSlot, value::SlotId rowStoreSlot, std::unique_ptr<EExpression> rowStoreExpr, + std::vector<PathFilter> filteredPaths, PlanYieldPolicy* yieldPolicy, PlanNodeId nodeId, bool participateInTrialRunTracking) @@ -63,26 +65,39 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid, _collUuid(collectionUuid), _columnIndexName(columnIndexName), _paths(std::move(paths)), + _includeInOutput(std::move(includeInOutput)), _recordIdSlot(recordIdSlot), _reconstructedRecordSlot(reconstuctedRecordSlot), _rowStoreSlot(rowStoreSlot), - _rowStoreExpr(std::move(rowStoreExpr)) {} + _rowStoreExpr(std::move(rowStoreExpr)), + _filteredPaths(std::move(filteredPaths)) { + invariant(_filteredPaths.size() <= _paths.size(), + "Filtered paths should be a subset of all paths"); + invariant(_paths.size() == _includeInOutput.size()); +} std::unique_ptr<PlanStage> ColumnScanStage::clone() const { - std::vector<std::unique_ptr<EExpression>> pathExprs; + std::vector<PathFilter> filteredPaths; + for (const auto& fp : _filteredPaths) { + filteredPaths.emplace_back(fp.pathIndex, fp.filterExpr->clone(), fp.inputSlotId); + } return std::make_unique<ColumnScanStage>(_collUuid, _columnIndexName, _paths, + _includeInOutput, _recordIdSlot, _reconstructedRecordSlot, _rowStoreSlot, _rowStoreExpr ? _rowStoreExpr->clone() : nullptr, + std::move(filteredPaths), _yieldPolicy, _commonStats.nodeId, _participateInTrialRunTracking); } void ColumnScanStage::prepare(CompileCtx& ctx) { + ctx.root = this; + if (_reconstructedRecordSlot) { _reconstructedRecordAccessor = std::make_unique<value::OwnedValueAccessor>(); } @@ -92,10 +107,19 @@ void ColumnScanStage::prepare(CompileCtx& ctx) { _rowStoreAccessor = std::make_unique<value::OwnedValueAccessor>(); if (_rowStoreExpr) { - ctx.root = this; _rowStoreExprCode = _rowStoreExpr->compile(ctx); } + _filterInputAccessors.resize(_filteredPaths.size()); + for (size_t idx = 0; idx < _filterInputAccessors.size(); ++idx) { + auto slot = _filteredPaths[idx].inputSlotId; + auto [it, inserted] = _filterInputAccessorsMap.emplace(slot, &_filterInputAccessors[idx]); + uassert(6610212, str::stream() << "duplicate slot: " << slot, inserted); + } + for (auto& filteredPath : _filteredPaths) { + _filterExprsCode.emplace_back(filteredPath.filterExpr->compile(ctx)); + } + tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll); std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid); @@ -120,12 +144,23 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId if (_rowStoreSlot == slot) { return _rowStoreAccessor.get(); } + + if (auto it = _filterInputAccessorsMap.find(slot); it != _filterInputAccessorsMap.end()) { + return it->second; + } + return ctx.getAccessor(slot); } void ColumnScanStage::doSaveState(bool relinquishCursor) { + if (_denseColumnCursor) { + _denseColumnCursor->makeOwned(); + _denseColumnCursor->cursor().save(); + } + for (auto& cursor : _columnCursors) { cursor.makeOwned(); + cursor.cursor().save(); } if (_rowStoreCursor && relinquishCursor) { @@ -136,9 +171,6 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) { _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor); } - for (auto& cursor : _columnCursors) { - cursor.cursor().save(); - } for (auto& [path, cursor] : _parentPathCursors) { cursor->cursor().saveUnpositioned(); } @@ -170,6 +202,9 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { } } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().restore(); + } for (auto& cursor : _columnCursors) { cursor.cursor().restore(); } @@ -182,6 +217,9 @@ void ColumnScanStage::doDetachFromOperationContext() { if (_rowStoreCursor) { _rowStoreCursor->detachFromOperationContext(); } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().detachFromOperationContext(); + } for (auto& cursor : _columnCursors) { cursor.cursor().detachFromOperationContext(); } @@ -194,6 +232,9 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) { if (_rowStoreCursor) { _rowStoreCursor->reattachToOperationContext(opCtx); } + if (_denseColumnCursor) { + _denseColumnCursor->cursor().reattachToOperationContext(opCtx); + } for (auto& cursor : _columnCursors) { cursor.cursor().reattachToOperationContext(opCtx); } @@ -247,25 +288,22 @@ void ColumnScanStage::open(bool reOpen) { auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod()); - // Eventually we can not include this column for the cases where a known dense column (_id) - // is being read anyway. - - // Add a stats struct that will be shared by overall ColumnScanStats and individual - // cursor. - _columnCursors.emplace_back( - iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), - _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), false)); - - for (auto&& path : _paths) { - _columnCursors.emplace_back(iam->storage()->newCursor(_opCtx, path), - _specificStats.cursorStats.emplace_back(path, true)); + // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377: + // eventually we can avoid including this column for the cases where a known dense column + // such as _id is being read anyway). + if (_filteredPaths.empty()) { + _denseColumnCursor = std::make_unique<ColumnCursor>( + iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), + _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), + false /*includeInOutput*/)); + } + for (size_t i = 0; i < _paths.size(); i++) { + _columnCursors.emplace_back( + iam->storage()->newCursor(_opCtx, _paths[i]), + _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i])); } } - - for (auto& columnCursor : _columnCursors) { - columnCursor.seekAtOrPast(RecordId()); - } - + _recordId = RecordId(); _open = true; } @@ -323,44 +361,284 @@ void ColumnScanStage::readParentsIntoObj(StringData path, } } -PlanState ColumnScanStage::getNext() { - auto optTimer(getOptTimer(_opCtx)); +// The result of the filter predicate will be the same regardless of sparseness or sub objects, +// therefore we don't look at the parents and don't consult the row store. +// (TODO SERVER-68792) Refactor the iteration over values into its own type. +bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathValue& path) { + auto splitCellView = SplitCellView::parse(cell); + auto translatedCell = translateCell(path, splitCellView); + + if (!translatedCell.moreValues()) { + return false; + } + + if (translatedCell.arrInfo.empty()) { + // Have a single non-nested value -- evaluate the filter on it. + // (TODO SERVER-68792) Could we avoid copying by using a non-owning accessor? Same question + // for other locations in this function when the predicate is evaluated immediately after + // setting the slot. + auto [tag, val] = translatedCell.nextValue(); + auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val); + _filterInputAccessors[filterIndex].reset(true /*owned*/, tagCopy, valCopy); + return _bytecode.runPredicate(_filterExprsCode[filterIndex].get()); + } else { + ArrInfoReader arrInfoReader{translatedCell.arrInfo}; + int depth = 0; + // (TODO SERVER-68792) Would using a non-heap allocated structure here improve perf? + std::stack<bool> inArray; + while (arrInfoReader.moreExplicitComponents()) { + switch (arrInfoReader.takeNextChar()) { + case '{': { + // We consider as nested only the arrays that are elements of other arrays. When + // there is an array of objects and some of the fields of these objects are + // arrays, the latter aren't nested. + inArray.push(false); + break; + } + case '[': { + // A '[' can be followed by a number if there are objects in the array, that + // should be retrieved from other paths when reconstructing the record. We can + // ignore them as they don't contribute to the values. + (void)arrInfoReader.takeNumber(); + if (!inArray.empty() && inArray.top()) { + depth++; + } + inArray.push(true); + break; + } + case '+': { + // Indicates elements in arrays that are objects that don't have the path. These + // objects don't contribute to the cell's values, so we can ignore them. + (void)arrInfoReader.takeNumber(); + break; + } + case '|': { + auto repeats = arrInfoReader.takeNumber(); + for (size_t i = 0; i < repeats + 1; i++) { + auto [tag, val] = translatedCell.nextValue(); + if (depth == 0) { + auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val); + _filterInputAccessors[filterIndex].reset( + true /*owned*/, tagCopy, valCopy); + if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) { + return true; + } + } + } + break; + } + case 'o': { + // Indicates the start of a nested object inside the cell. We don't need to + // track this info because the nested objects don't contribute to the values in + // the cell. + (void)arrInfoReader.takeNumber(); + break; + } + case ']': { + invariant(inArray.size() > 0 && inArray.top()); + inArray.pop(); + if (inArray.size() > 0 && inArray.top()) { + invariant(depth > 0); + depth--; + } + + // Closing an array implicitly closes all objects on the path between it and the + // previous array. + while (inArray.size() > 0 && !inArray.top()) { + inArray.pop(); + } + } + } + } + if (depth == 0) { + // For the remaining values the depth isn't going to change so we don't need to advance + // the value iterator if the values are too deep. + while (translatedCell.moreValues()) { + auto [tag, val] = translatedCell.nextValue(); + auto [tagCopy, valCopy] = sbe::value::copyValue(tag, val); + _filterInputAccessors[filterIndex].reset(true /*owned*/, tagCopy, valCopy); + if (_bytecode.runPredicate(_filterExprsCode[filterIndex].get())) { + return true; + } + } + } + } - // We are about to call next() on a storage cursor so do not bother saving our internal state in - // case it yields as the state will be completely overwritten after the next() call. - disableSlotAccess(); + // None of the values matched the filter. + return false; +} - checkForInterrupt(_opCtx); +RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() { + invariant(!_filteredPaths.empty()); - // Find minimum record ID of all column cursors. - _recordId = RecordId(); - for (auto& cursor : _columnCursors) { + // Initialize 'targetRecordId' from the filtered cursor we are currently iterating. + RecordId targetRecordId; + { + auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]); + if (!cursor.lastCell()) { + return RecordId(); // Have exhausted one of the columns. + } + targetRecordId = cursor.lastCell()->rid; + } + + size_t matchedSinceAdvance = 0; + // The loop will terminate because when 'matchedSinceAdvance' is reset the 'targetRecordId' + // is guaranteed to advance. It will do no more than N 'next()' calls across all cursors, + // where N is the number of records (might do fewer, if for some columns there are missing + // values). The number of seeks and filter checks depends on the selectivity of the filters. + while (matchedSinceAdvance < _filteredPaths.size()) { + auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]); + + // Avoid seeking into the column that we started with. auto& result = cursor.lastCell(); - if (result && (_recordId.isNull() || result->rid < _recordId)) { - _recordId = result->rid; + if (result && result->rid < targetRecordId) { + result = cursor.seekAtOrPast(targetRecordId); + } + if (!result) { + return RecordId(); + } + + if (result->rid > targetRecordId) { + // The column skipped ahead - have to restart at this new record ID. + matchedSinceAdvance = 0; + targetRecordId = result->rid; + } + + if (!checkFilter(result->value, _nextUnmatched, cursor.path())) { + // Advance the column until find a match and restart at this new record ID. + do { + result = cursor.next(); + if (!result) { + return RecordId(); + } + } while (!checkFilter(result->value, _nextUnmatched, cursor.path())); + matchedSinceAdvance = 0; + invariant(result->rid > targetRecordId); + targetRecordId = result->rid; + } + ++matchedSinceAdvance; + _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size(); + } + invariant(!targetRecordId.isNull()); + + // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors + // might skip ahead, which would mean the column is missing a value for this 'recordId'. + for (auto& cursor : _columnCursors) { + const auto& result = cursor.lastCell(); + if (result && result->rid < targetRecordId) { + cursor.seekAtOrPast(targetRecordId); + } + } + + return targetRecordId; +} + +RecordId ColumnScanStage::findMinRecordId() const { + if (_denseColumnCursor) { + // The cursor of the dense column cannot be ahead of any other, so it's always at the + // minimum. + auto& result = _denseColumnCursor->lastCell(); + if (!result) { + return RecordId(); + } + return result->rid; + } + + auto recordId = RecordId(); + for (const auto& cursor : _columnCursors) { + const auto& result = cursor.lastCell(); + if (result && (recordId.isNull() || result->rid < recordId)) { + recordId = result->rid; + } + } + return recordId; +} + +RecordId ColumnScanStage::advanceCursors() { + if (_recordId.isNull()) { + if (_denseColumnCursor) { + _denseColumnCursor->seekAtOrPast(RecordId()); + } + for (auto& columnCursor : _columnCursors) { + columnCursor.seekAtOrPast(RecordId()); + } + return _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns(); + } + + if (!_filteredPaths.empty()) { + // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized + // by 'findNextRecordIdForFilteredColumns()'. + cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next(); + return findNextRecordIdForFilteredColumns(); + } + + // In absence of filters all cursors iterate forward on their own. Some of the cursors might + // be ahead of the current '_recordId' because there are gaps in their columns - don't move + // them but only those that are at '_recordId' and therefore their values have been + // consumed. While at it, compute the new min record ID. + auto nextRecordId = RecordId(); + if (_denseColumnCursor) { + invariant(_denseColumnCursor->lastCell()->rid == _recordId, + "Dense cursor should always be at the current minimum record ID"); + auto cell = _denseColumnCursor->next(); + if (!cell) { + return RecordId(); + } + nextRecordId = cell->rid; + } + for (auto& cursor : _columnCursors) { + auto& cell = cursor.lastCell(); + if (!cell) { + continue; // this column has been exhausted + } + if (cell->rid == _recordId) { + cell = cursor.next(); + } + if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) { + invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID"); + nextRecordId = cell->rid; } } + return nextRecordId; +} + +PlanState ColumnScanStage::getNext() { + auto optTimer(getOptTimer(_opCtx)); + + // We are about to call next() on a storage cursor so do not bother saving our internal + // state in case it yields as the state will be completely overwritten after the next() + // call. + disableSlotAccess(); + checkForInterrupt(_opCtx); + + _recordId = advanceCursors(); if (_recordId.isNull()) { return trackPlanState(PlanState::IS_EOF); } + bool useRowStore = false; + auto [outTag, outVal] = value::makeNewObject(); auto& outObj = *value::bitcastTo<value::Object*>(outVal); value::ValueGuard materializedObjGuard(outTag, outVal); StringDataSet pathsRead; - bool useRowStore = false; for (size_t i = 0; i < _columnCursors.size(); ++i) { - auto& lastCell = _columnCursors[i].lastCell(); - const auto& path = _columnCursors[i].path(); + if (!_includeInOutput[i]) { + continue; + } + auto& cursor = _columnCursors[i]; + auto& lastCell = cursor.lastCell(); boost::optional<SplitCellView> splitCellView; if (lastCell && lastCell->rid == _recordId) { splitCellView = SplitCellView::parse(lastCell->value); } - if (_columnCursors[i].includeInOutput() && !useRowStore) { + const auto& path = cursor.path(); + + if (!useRowStore) { if (splitCellView && (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) { useRowStore = true; @@ -376,16 +654,12 @@ PlanState ColumnScanStage::getNext() { } } } - - if (splitCellView) { - _columnCursors[i].next(); - } } if (useRowStore) { ++_specificStats.numRowStoreFetches; - // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do - // a next() which should be much cheaper. + // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead + // do a next() which should be much cheaper. auto record = _rowStoreCursor->seekExact(_recordId); // If there's no record, the index is out of sync with the row store. @@ -396,8 +670,8 @@ PlanState ColumnScanStage::getNext() { value::bitcastFrom<const char*>(record->data.data())); if (_reconstructedRecordAccessor) { - // TODO: in absence of record expression set the reconstructed record to be the same as - // the record, retrieved from the row store. + // TODO: in absence of record expression set the reconstructed record to be the same + // as the record, retrieved from the row store. invariant(_rowStoreExpr); auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get()); _reconstructedRecordAccessor->reset(owned, tag, val); @@ -416,13 +690,14 @@ PlanState ColumnScanStage::getNext() { if (_tracker && _tracker->trackProgress<TrialRunTracker::kNumReads>(1)) { // If we're collecting execution stats during multi-planning and reached the end of the - // trial period because we've performed enough physical reads, bail out from the trial run - // by raising a special exception to signal a runtime planner that this candidate plan has - // completed its trial run early. Note that a trial period is executed only once per a - // PlanStage tree, and once completed never run again on the same tree. + // trial period because we've performed enough physical reads, bail out from the trial + // run by raising a special exception to signal a runtime planner that this candidate + // plan has completed its trial run early. Note that a trial period is executed only + // once per a PlanStage tree, and once completed never run again on the same tree. _tracker = nullptr; uasserted(ErrorCodes::QueryTrialRunCompleted, "Trial run early exit in scan"); } + return trackPlanState(PlanState::ADVANCED); } @@ -507,6 +782,31 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } ret.emplace_back(DebugPrinter::Block("`]")); + // Print out per-path filters (if any). + if (!_filteredPaths.empty()) { + ret.emplace_back(DebugPrinter::Block("[`")); + for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { + if (idx) { + ret.emplace_back(DebugPrinter::Block("`;")); + } + + ret.emplace_back(str::stream() + << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); + DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); + } + ret.emplace_back(DebugPrinter::Block("`]")); + } + + if (_rowStoreExpr) { + ret.emplace_back(DebugPrinter::Block("[`")); + DebugPrinter::addIdentifier(ret, _rowStoreSlot); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint()); + ret.emplace_back(DebugPrinter::Block("`]")); + } + ret.emplace_back("@\"`"); DebugPrinter::addIdentifier(ret, _collUuid.toString()); ret.emplace_back("`\""); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 94bc8fa4034..7d30152f46d 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -41,24 +41,40 @@ namespace sbe { /** * A stage that scans provided columnar index. * - * Currently the stage produces an object into the 'recordSlot' such that accessing any of the given - * paths in it would be equivalent to accessing the paths in the corresponding object from the - * associated row store. In the future the stage will be extended to produce separate outputs for - * each path without materializing this intermediate object unless requested by the client. + * Currently the stage produces an object into the 'reconstructedRecordSlot' such that accessing any + * of the given paths in it would be equivalent to accessing the paths in the corresponding object + * from the associated row store. In the future the stage will be extended to produce separate + * outputs for each path without materializing this intermediate object unless requested by the + * client. * * Debug string representation: * - * COLUMN_SCAN recordSlot|none recordIdSlot|none [path_1, ..., path_n] collectionUuid indexName + * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n] + * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]? + * collectionUuid indexName */ class ColumnScanStage final : public PlanStage { public: + struct PathFilter { + size_t pathIndex; // index into the paths array the stage will be using + std::unique_ptr<EExpression> filterExpr; + value::SlotId inputSlotId; + + PathFilter(size_t pathIndex, + std::unique_ptr<EExpression> filterExpr, + value::SlotId inputSlotId) + : pathIndex(pathIndex), filterExpr(std::move(filterExpr)), inputSlotId(inputSlotId) {} + }; + ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstructedRecordSlot, value::SlotId rowStoreSlot, std::unique_ptr<EExpression> rowStoreExpr, + std::vector<PathFilter> filteredPaths, PlanYieldPolicy* yieldPolicy, PlanNodeId planNodeId, bool participateInTrialRunTracking = true); @@ -154,6 +170,9 @@ private: boost::optional<FullCellView>& lastCell() { return _lastCell; } + const boost::optional<FullCellView>& lastCell() const { + return _lastCell; + } size_t numNexts() const { return _stats.numNexts; @@ -184,6 +203,21 @@ private: void readParentsIntoObj(StringData path, value::Object* out, StringDataSet* pathsReadSetOut); + bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path); + + // Finds the smallest record ID such that: + // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call; + // 2) the record with this ID passes the filters of all filtered columns. + // Ensures that the cursors are set to this record ID unless it's missing in the column (which + // is only possible for the non-filtered columns). + RecordId findNextRecordIdForFilteredColumns(); + + // Finds the lowest record ID across all cursors. Doesn't move any of the cursors. + RecordId findMinRecordId() const; + + // Move cursors to the next record to be processed. + RecordId advanceCursors(); + // The columnar index this stage is scanning and the associated row store collection. const UUID _collUuid; const std::string _columnIndexName; @@ -192,13 +226,16 @@ private: boost::optional<uint64_t> _catalogEpoch; // and are not changed afterwards. std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry; - // Paths to be read from the index. + // Paths to be read from the index. '_includeInOutput' defines which of the fields should be + // included into the reconstructed record and the order of paths in '_paths' defines the + // orderding of the fields. The two vectors should have the same size. NB: No paths is possible + // when no filters are used and only constant computed columns are projected. In this case only + // the dense record ID column will be read. const std::vector<std::string> _paths; + const std::vector<bool> _includeInOutput; // The record id in the row store that is used to connect the per-path entries in the columnar - // index and to retrieve the full record from the row store, if necessary. Because we put into - // the slot the address of record id, we must guarantee that its lifetime is as long as the - // stage's. + // index and to retrieve the full record from the row store, if necessary. RecordId _recordId; const boost::optional<value::SlotId> _recordIdSlot; @@ -218,17 +255,32 @@ private: const value::SlotId _rowStoreSlot; const std::unique_ptr<EExpression> _rowStoreExpr; + // Per path filters. The slots must be allocated by the client but downstream stages must not + // read from them. Multiple filters form a conjunction where each branch of the AND only passes + // when a value exists. Empty '_filteredPaths' means there are no filters. + const std::vector<PathFilter> _filteredPaths; + ColumnCursor& cursorForFilteredPath(const PathFilter& fp) { + return _columnCursors[fp.pathIndex]; + } + size_t _nextUnmatched = 0; // used when searching for the next matching record + std::unique_ptr<value::OwnedValueAccessor> _reconstructedRecordAccessor; std::unique_ptr<value::OwnedValueAccessor> _recordIdAccessor; std::unique_ptr<value::OwnedValueAccessor> _rowStoreAccessor; + std::vector<value::OwnedValueAccessor> _filterInputAccessors; + value::SlotAccessorMap _filterInputAccessorsMap; vm::ByteCode _bytecode; std::unique_ptr<vm::CodeFragment> _rowStoreExprCode; + std::vector<std::unique_ptr<vm::CodeFragment>> _filterExprsCode; - // Cursors to simultaneously read from the sections of the index for each path (and, possibly, - // auxiliary sections) and from the row store. + // Cursors to simultaneously read from the sections of the index for each path. std::vector<ColumnCursor> _columnCursors; StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors; + // Dense column contains record ids for all records. It is necessary to support projection + // semantics for missing values on paths. + std::unique_ptr<ColumnCursor> _denseColumnCursor; + // Cursor into the associated row store. std::unique_ptr<SeekableRecordCursor> _rowStoreCursor; bool _open{false}; diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp index c1bd51f6b69..c2722ba9437 100644 --- a/src/mongo/db/exec/sbe/values/columnar.cpp +++ b/src/mongo/db/exec/sbe/values/columnar.cpp @@ -44,56 +44,6 @@ constexpr value::TypeTags kPlaceHolderType = value::TypeTags::Null; constexpr value::Value kPlaceHolderValue = 0; /* - * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of - * individual components. - */ -class ArrInfoReader { -public: - explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {} - - char nextChar() const { - if (_offsetInArrInfo == _arrInfo.size()) { - // Reaching the end of the array info means an unlimited number of '|'s. - return '|'; - } - return _arrInfo[_offsetInArrInfo]; - } - - char takeNextChar() { - if (_offsetInArrInfo == _arrInfo.size()) { - // Reaching the end of the array info means an unlimited number of '|'s. - return '|'; - } - return _arrInfo[_offsetInArrInfo++]; - } - - size_t takeNumber() { - return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo); - } - - bool empty() const { - return _arrInfo.empty(); - } - - /* - * Returns whether more explicit components are yet to be consumed. Since array info logically - * ends with an infinite stream of |, this function indicates whether there are more components - * which are physically present to be read, not including the infinite sequence of |. - */ - bool moreExplicitComponents() const { - return _offsetInArrInfo < _arrInfo.size(); - } - - StringData rawArrInfo() const { - return _arrInfo; - } - -private: - StringData _arrInfo; - size_t _offsetInArrInfo = 0; -}; - -/* * Tracks state necessary for reconstructing objects including the array info reader, the values * available for extraction, and the path. */ diff --git a/src/mongo/db/exec/sbe/values/columnar.h b/src/mongo/db/exec/sbe/values/columnar.h index ec0fb5f07c8..01aef2acbce 100644 --- a/src/mongo/db/exec/sbe/values/columnar.h +++ b/src/mongo/db/exec/sbe/values/columnar.h @@ -39,6 +39,56 @@ */ namespace mongo::sbe { +/* + * Array info reader based on a string representation of arrayInfo. Allows for reading/peeking of + * individual components. + */ +class ArrInfoReader { +public: + explicit ArrInfoReader(StringData arrInfoStr) : _arrInfo(arrInfoStr) {} + + char nextChar() const { + if (_offsetInArrInfo == _arrInfo.size()) { + // Reaching the end of the array info means an unlimited number of '|'s. + return '|'; + } + return _arrInfo[_offsetInArrInfo]; + } + + char takeNextChar() { + if (_offsetInArrInfo == _arrInfo.size()) { + // Reaching the end of the array info means an unlimited number of '|'s. + return '|'; + } + return _arrInfo[_offsetInArrInfo++]; + } + + size_t takeNumber() { + return ColumnStore::readArrInfoNumber(_arrInfo, &_offsetInArrInfo); + } + + bool empty() const { + return _arrInfo.empty(); + } + + /* + * Returns whether more explicit components are yet to be consumed. Since array info logically + * ends with an infinite stream of |, this function indicates whether there are more components + * which are physically present to be read, not including the infinite sequence of |. + */ + bool moreExplicitComponents() const { + return _offsetInArrInfo < _arrInfo.size(); + } + + StringData rawArrInfo() const { + return _arrInfo; + } + +private: + StringData _arrInfo; + size_t _offsetInArrInfo = 0; +}; + /** * Represents a cell in a columnar index with ability to retrieve values in an SBE native format. */ diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 3d08b62e037..90453c66db0 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -476,6 +476,11 @@ std::unique_ptr<MatchExpression> tryAddExpr(StringData path, if (FieldRef(path).hasNumericPathComponents()) return me->shallowClone(); + // (TODO SERVER-68743) addExpr will rightfully create AND for expressions on the same path, but + // we cannot translate AND into EExpressions yet. + if (out.find(path) != out.end()) + return me->shallowClone(); + addExpr(path, me->shallowClone(), out); return nullptr; } @@ -497,7 +502,12 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns( // One exception to the above: We can support EQ with empty objects and empty arrays since // those are more obviously correct. Maybe could also support LT and LTE, but those don't // seem as important so are left for future work. - if (elem.type() == BSONType::Array || elem.type() == BSONType::Object) { + auto type = elem.type(); + if (type == BSONType::MinKey || type == BSONType::MaxKey) { + // MinKey and MaxKey have special semantics for comparison to objects. + return false; + } + if (type == BSONType::Array || type == BSONType::Object) { return isEQ && elem.Obj().isEmpty(); } @@ -512,12 +522,15 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns( case MatchExpression::BITS_ALL_SET: case MatchExpression::BITS_ALL_CLEAR: case MatchExpression::BITS_ANY_SET: - case MatchExpression::BITS_ANY_CLEAR: - case MatchExpression::EXISTS: { + case MatchExpression::BITS_ANY_CLEAR: { // Note: {$exists: false} is represented as {$not: {$exists: true}}. auto sub = checked_cast<const PathMatchExpression*>(me); return tryAddExpr(sub->path(), me, out); } + case MatchExpression::EXISTS: { + // (TODO SERVER-68743) need expr translation to enable pushing down $exists + return me->shallowClone(); + } case MatchExpression::LT: case MatchExpression::GT: @@ -530,28 +543,14 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns( return tryAddExpr(sub->path(), me, out); } - case MatchExpression::MATCH_IN: { - auto sub = checked_cast<const InMatchExpression*>(me); - // Note that $in treats regexes specially and stores them separately than the rest of - // the 'equalities'. We actually don't need to look at them here since any regex should - // be OK. A regex could only match a string, symbol, or other regex, any of which would - // be present in the columnar storage. - for (auto&& elem : sub->getEqualities()) { - if (!canCompareWith(elem, true)) - return me->shallowClone(); - } - return tryAddExpr(sub->path(), me, out); + // (TODO SERVER-68743) need expr translation to enable pushing down $in + return me->shallowClone(); } case MatchExpression::TYPE_OPERATOR: { - auto sub = checked_cast<const TypeMatchExpression*>(me); - tassert(6430600, - "Not expecting to find EOO in a $type expression", - !sub->typeSet().hasType(BSONType::EOO)); - if (sub->typeSet().hasType(BSONType::Object) || sub->typeSet().hasType(BSONType::Array)) - return me->shallowClone(); - return tryAddExpr(sub->path(), me, out); + // (TODO SERVER-68743) need expr translation to enable pushing down $type + return me->shallowClone(); } case MatchExpression::AND: { @@ -570,34 +569,8 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns( : std::make_unique<AndMatchExpression>(std::move(newChildren)); } - case MatchExpression::NOT: { - // {$ne: null} pattern is known to be important in cases like those in SERVER-27646 and - // SERVER-36465. - auto notExpr = checked_cast<const NotMatchExpression*>(me); - auto withinNot = notExpr->getChild(0); - - // Oddly, we parse {$ne: null} to a NOT -> EQ, but we parse {$not: {$eq: null}} into a - // more complex NOT -> AND -> EQ. Let's support both. - auto tryAddNENull = [&](const MatchExpression* negatedPred) { - if (negatedPred->matchType() != MatchExpression::EQ) { - return false; - } - auto eqPred = checked_cast<const EqualityMatchExpression*>(negatedPred); - if (eqPred->getData().isNull()) { - return tryAddExpr(eqPred->path(), me, out) == nullptr; - } - return false; - }; - if (tryAddNENull(withinNot)) { - // {$ne: null}. We had equality just under NOT. - return nullptr; - } else if (withinNot->matchType() == MatchExpression::AND && - withinNot->numChildren() == 1 && tryAddNENull(withinNot->getChild(0))) { - // {$not: {$eq: null}}: NOT -> AND -> EQ. - return nullptr; - } - // May be other cases, but left as future work. + // (TODO SERVER-68743) need expr translation to enable pushing down $not return me->shallowClone(); } diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index 520b2b036b2..73aa479bd2a 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -1692,15 +1692,21 @@ TEST(SplitMatchExpressionForColumns, SplitsSafeEqualities) { " kiwi: NumberDecimal('22')," " 'loggerhead shrike': {$minKey: 1}," " mallard: {$maxKey: 1}}"); + ParsedMatchExpression minMaxKeyComps( + "{'loggerhead shrike': {$minKey: 1}," + " mallard: {$maxKey: 1}}"); + auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(mixedEquals.get()); - ASSERT_EQ(splitUp.size(), 12) << splitUp.size(); + ASSERT_EQ(splitUp.size(), 12 - 2) << splitUp.size(); ASSERT(splitUp.contains("albatross")); ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::EQ) << splitUp.at("albatross")->toString(); ASSERT(splitUp.contains("blackbird")); ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::EQ) << splitUp.at("blackbird")->toString(); - ASSERT(residual == nullptr); + + // Comparison against Min- and MaxKey cannot be pushed down. + assertMatchesEqual(minMaxKeyComps, residual); } } @@ -1740,26 +1746,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportEqualsNull) { } } -TEST(SplitMatchExpressionForColumns, DoesSupportNotEqualsNull) { - { - ParsedMatchExpression neNull("{a: {$ne: null}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(neNull.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("a")); - ASSERT(splitUp.at("a")->matchType() == MatchExpression::NOT) << splitUp.at("a")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression notEqualsNull("{a: {$not: {$eq: null}}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(notEqualsNull.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("a")); - ASSERT(splitUp.at("a")->matchType() == MatchExpression::NOT) << splitUp.at("a")->toString(); - ASSERT(residual == nullptr); - } -} - TEST(SplitMatchExpressionForColumns, DoesNotSupportCompoundEquals) { { ParsedMatchExpression implicitEqualsArray("{a: [1, 2]}"); @@ -1853,7 +1839,7 @@ TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) { "{" " albatross: {$lt: 100}," " blackbird: {$gt: 0}," - " cowbird: {$gte: 0, $lte: 100}" + " cowbird: 50" "}"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(combinationPredicate.get()); @@ -1865,7 +1851,7 @@ TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) { ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::GT) << splitUp.at("blackbird")->toString(); ASSERT(splitUp.contains("cowbird")); - ASSERT(splitUp.at("cowbird")->matchType() == MatchExpression::AND) + ASSERT(splitUp.at("cowbird")->matchType() == MatchExpression::EQ) << splitUp.at("cowbird")->toString(); ASSERT(residual == nullptr); } @@ -1958,60 +1944,6 @@ TEST(SplitMatchExpressionForColumns, SupportsTypeSpecificPredicates) { ASSERT(residual == nullptr); } -TEST(SplitMatchExpressionForColumns, SupportsInWithRegexes) { - { - // First confirm a $in clause is supported without regexes. - ParsedMatchExpression stringInClause("{albatross: {$in: ['big', 'ol', 'bird']}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(stringInClause.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - // Test that $in with regexes is supported also work. - ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, /bird/]}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(regexInClause.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - // Test that a mix of both is supported - ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, 'bird']}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(regexInClause.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - // Test that it is still disallowed if there's a disqualifying equality such as a null. - ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, null, 'bird']}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(regexInClause.get()); - ASSERT_EQ(splitUp.size(), 0) << splitUp.size(); - assertMatchesEqual(regexInClause, residual); - } -} - -TEST(SplitMatchExpressionForColumns, SupportsExistsTrue) { - ParsedMatchExpression existsPredicate("{albatross: {$exists: true}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(existsPredicate.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::EXISTS) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); -} - TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) { ParsedMatchExpression existsPredicate("{albatross: {$exists: false}}"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(existsPredicate.get()); @@ -2019,59 +1951,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) { assertMatchesEqual(existsPredicate, residual); } -// $in constraints are similar to equality. Most of them should work, exceptions broken out in the -// next test. -TEST(SplitMatchExpressionForColumns, SupportsInPredicates) { - { - ParsedMatchExpression emptyIn("{albatross: {$in: []}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(emptyIn.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression singleElementIn("{albatross: {$in: [4]}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(singleElementIn.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression inWithEmptyArray("{albatross: {$in: [[]]}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(inWithEmptyArray.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression inWithEmptyObject("{albatross: {$in: [{}]}}"); - auto&& [splitUp, residual] = - expression::splitMatchExpressionForColumns(inWithEmptyObject.get()); - ASSERT_GT(splitUp.size(), 0); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression mixedTypeIn("{albatross: {$in: [4, {}, [], 'string', /regex/]}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(mixedTypeIn.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::MATCH_IN) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } -} - // We can't support compound types, just like for equality. TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) { { @@ -2102,47 +1981,6 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) { } } -TEST(SplitMatchExpressionForColumns, SupportsTypePredicates) { - { - ParsedMatchExpression intFilter("{albatross: {$type: 'int'}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(intFilter.get()); - ASSERT_GT(splitUp.size(), 0); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) - << splitUp.at("albatross")->toString(); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression numberFilter("{albatross: {$type: 'number'}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(numberFilter.get()); - ASSERT_GT(splitUp.size(), 0); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) - << splitUp.at("albatross")->toString(); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression stringFilter("{albatross: {$type: 'string'}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(stringFilter.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } - { - ParsedMatchExpression nullFilter("{albatross: {$type: 'null'}}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(nullFilter.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) - << splitUp.at("albatross")->toString(); - ASSERT(residual == nullptr); - } -} - TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeObject) { ParsedMatchExpression objectFilter("{albatross: {$type: 'object'}}"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(objectFilter.get()); @@ -2173,44 +2011,16 @@ TEST(SplitMatchExpressionForColumns, DoesNotSupportNotQueries) { } } -TEST(SplitMatchExpressionForColumns, CanCombinePredicates) { - ParsedMatchExpression compoundFilter( - "{" - " albatross: {$gte: 100}," - " albatross: {$mod: [2, 0]}" - "}"); - auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(compoundFilter.get()); - ASSERT_EQ(splitUp.size(), 1) << splitUp.size(); - ASSERT(splitUp.contains("albatross")); - ASSERT(splitUp["albatross"]->matchType() == MatchExpression::AND) - << splitUp.at("albatross")->toString(); - ASSERT_EQ(splitUp.at("albatross")->numChildren(), 2) << splitUp.at("albatross")->toString(); - // Don't care about the order. - auto andExpr = splitUp.at("albatross").get(); - auto firstChild = andExpr->getChild(0); - if (firstChild->matchType() == MatchExpression::GTE) { - ASSERT(firstChild->matchType() == MatchExpression::GTE) << firstChild->toString(); - ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::MOD) << firstChild->toString(); - } else { - ASSERT(firstChild->matchType() == MatchExpression::MOD) << firstChild->toString(); - ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::GTE) << firstChild->toString(); - } - ASSERT(residual == nullptr); -} - TEST(SplitMatchExpressionForColumns, CanSplitPredicate) { ParsedMatchExpression complexPredicate(R"({ - a: {$gte: 0, $lt: 10}, - "addresses.zip": {$in: ["12345", "01234"]}, + a: {$gte: 0}, unsubscribed: false, specialAddress: {$exists: false} })"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(complexPredicate.get()); - ASSERT_EQ(splitUp.size(), 3) << splitUp.size(); + ASSERT_EQ(splitUp.size(), 2) << splitUp.size(); ASSERT(splitUp.contains("a")); - ASSERT(splitUp.at("a")->matchType() == MatchExpression::AND) << splitUp.at("a")->toString(); - ASSERT_EQ(splitUp.at("a")->numChildren(), 2) << splitUp.at("a")->toString(); - ASSERT(splitUp.contains("addresses.zip")); + ASSERT(splitUp.at("a")->matchType() == MatchExpression::GTE) << splitUp.at("a")->toString(); ASSERT(splitUp.contains("unsubscribed")); ASSERT(!splitUp.contains("specialAddress")); ParsedMatchExpression expectedResidual("{specialAddress: {$exists: false}}"); @@ -2224,16 +2034,14 @@ TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) { " \"blackbird.feet\": {$mod: [2, 0]}," " \"blackbird.softwareUpdates\": {$bitsAllSet: 7}," // Stress the path combination logic with some prefixes and suffixes to be sure. - " blackbird: {$ne: null}," - " bla: {$ne: null}," - " blackbirds: {$exists: true}," - " \"blackbird.feetsies\": {$ne: null}," - " \"cowbird.beakLength\": {$gte: 24, $lt: 40}," + " bla: {$eq: \"whatever\"}," + " \"blackbird.feetsies\": {$eq: \"whatever\"}," + " \"cowbird.beakLength\": {$gte: 24}," " \"cowbird.eggSet\": {$bitsAnySet: 7}" "}"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(compoundFilter.get()); ASSERT(residual == nullptr); - ASSERT_EQ(splitUp.size(), 9) << splitUp.size(); + ASSERT_EQ(splitUp.size(), 7) << splitUp.size(); ASSERT(splitUp.contains("albatross")); ASSERT(splitUp.at("albatross")->matchType() == MatchExpression::REGEX) << splitUp.at("albatross")->toString(); @@ -2243,17 +2051,9 @@ TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) { ASSERT(splitUp.contains("blackbird.softwareUpdates")); ASSERT(splitUp.at("blackbird.softwareUpdates")->matchType() == MatchExpression::BITS_ALL_SET) << splitUp.at("blackbird.softwareUpdates")->toString(); - ASSERT(splitUp.contains("blackbird")); - ASSERT(splitUp.at("blackbird")->matchType() == MatchExpression::NOT) - << splitUp.at("blackbird")->toString(); ASSERT(splitUp.contains("bla")); - ASSERT(splitUp.contains("blackbirds")); - ASSERT(splitUp.at("blackbirds")->matchType() == MatchExpression::EXISTS) - << splitUp.at("blackbirds")->toString(); ASSERT(splitUp.contains("blackbird.feetsies")); - ASSERT(splitUp.at("cowbird.beakLength")->matchType() == MatchExpression::AND) - << splitUp.at("cowbird.beakLength")->toString(); - ASSERT_EQ(splitUp.at("cowbird.beakLength")->numChildren(), 2) + ASSERT(splitUp.at("cowbird.beakLength")->matchType() == MatchExpression::GTE) << splitUp.at("cowbird.beakLength")->toString(); ASSERT(splitUp.at("cowbird.eggSet")->matchType() == MatchExpression::BITS_ANY_SET) << splitUp.at("cowbird.eggSet")->toString(); @@ -2266,13 +2066,13 @@ TEST(SplitMatchExpressionForColumns, LeavesOriginalMatchExpressionFunctional) { "{" " albatross: {$lt: 100}," " blackbird: {$gt: 0}," - " cowbird: {$gte: 0, $lte: 100}" + " cowbird: {$gte: 0}" "}"); auto&& [splitUp, residual] = expression::splitMatchExpressionForColumns(combinationPredicate.get()); ASSERT_GT(splitUp.size(), 0); ASSERT(residual == nullptr); - // Won't bother asserting on the detaiils here - done above. + // Won't bother asserting on the details here - done above. ASSERT(combinationPredicate.get()->matchesBSON( BSON("albatross" << 45 << "blackbird" << 1 << "cowbird" << 2))); } diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 9f33ae1c38b..059ba11244f 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -323,12 +323,8 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan( // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. std::unique_ptr<MatchExpression> residualPredicate; StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn; - if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) { - std::tie(filterSplitByColumn, residualPredicate) = - expression::splitMatchExpressionForColumns(query.root()); - } else { - residualPredicate = query.root()->shallowClone(); - } + std::tie(filterSplitByColumn, residualPredicate) = + expression::splitMatchExpressionForColumns(query.root()); auto fieldLimitStatus = checkFieldLimits(filterDeps.fields, outputDeps.fields, filterSplitByColumn); diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp index 68f0d5a6c7f..914b9ae76f4 100644 --- a/src/mongo/db/query/query_planner_columnar_test.cpp +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -475,10 +475,9 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { addColumnStoreIndexAndEnableFilterSplitting(); auto complexPredicate = fromjson(R"({ - a: {$gte: 0, $lt: 10}, - "addresses.zip": {$in: ["12345", "01234"]}, - unsubscribed: false, - specialAddress: {$exists: true} + a: {$gte: 0}, + "addresses.zip": "12345", + unsubscribed: false })"); runQuerySortProj(complexPredicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -488,13 +487,12 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { node: { column_scan: { filtersByPath: { - a: {$and: [{a: {$gte: 0}}, {a: {$lt: 10}}]}, - 'addresses.zip': {'addresses.zip': {$in: ['12345', '01234']}}, - unsubscribed: {unsubscribed: {$eq: false}}, - specialAddress: {specialAddress: {$exists: true}} + a: {a: {$gte: 0}}, + 'addresses.zip': {'addresses.zip': {$eq: '12345'}}, + unsubscribed: {unsubscribed: {$eq: false}} }, outputFields: ['a'], - matchFields: ['a', 'addresses.zip', 'unsubscribed', 'specialAddress'] + matchFields: ['a', 'addresses.zip', 'unsubscribed'] } } } @@ -506,8 +504,8 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { // Same predicate as above, except with exists: false, which disqualifies the whole thing. auto complexPredicate = fromjson(R"({ - a: {$gte: 0, $lt: 10}, - "addresses.zip": {$in: ["12345", "01234"]}, + a: {$gte: 0}, + "addresses.zip": "12345", unsubscribed: false, specialAddress: {$exists: false}, doNotContact: {$exists: false} @@ -519,8 +517,8 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { node: { column_scan: { filtersByPath: { - a: {a: {$gte: 0, $lt: 10}}, - "addresses.zip": {"addresses.zip": {$in: ['12345', '01234']}}, + a: {a: {$gte: 0}}, + 'addresses.zip': {'addresses.zip': {$eq: '12345'}}, unsubscribed: {unsubscribed: false} }, outputFields: ['a'], @@ -809,31 +807,4 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde } })"); } - -TEST_F(QueryPlannerColumnarTest, FullPredicateOption) { - params.columnStoreIndexes.emplace_back(kIndexName); - - // Filter that could be pushed down, but isn't due to the lack of the - // GENERATE_PER_COLUMN_FILTER flag. - auto predicate = fromjson(R"({ - specialAddress: {$exists: true}, - doNotContact: {$exists: true} - })"); - runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); - assertSolutionExists(R"({ - proj: { - spec: {a: 1, _id: 0}, - node: { - column_scan: { - outputFields: ['a'], - matchFields: ['specialAddress', 'doNotContact'], - postAssemblyFilter: { - specialAddress: {$exists: true}, - doNotContact: {$exists: true} - } - } - } - } - })"); -} } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index e70f1e80cac..1855bf1534e 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -58,6 +58,7 @@ #include "mongo/db/fts/fts_query_impl.h" #include "mongo/db/fts/fts_spec.h" #include "mongo/db/index/fts_access_method.h" +#include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/pipeline/abt/field_map_builder.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_visitor.h" @@ -713,7 +714,85 @@ std::unique_ptr<sbe::EExpression> abtToExpr(optimizer::ABT& abt, optimizer::Slot optimizer::SBEExpressionLowering exprLower{env, slotMap}; return exprLower.optimize(abt); } + +EvalExpr generatePerColumnFilterExpr(StageBuilderState& state, + const MatchExpression* me, + sbe::value::SlotId inputSlot) { + switch (me->matchType()) { + // These are always safe since they will never match documents missing their field, or where + // the element is an object or array. + case MatchExpression::REGEX: + return generateRegexExpr( + state, checked_cast<const RegexMatchExpression*>(me), inputSlot); + case MatchExpression::MOD: + return generateModExpr(state, checked_cast<const ModMatchExpression*>(me), inputSlot); + case MatchExpression::BITS_ALL_SET: + return generateBitTestExpr(state, + checked_cast<const BitTestMatchExpression*>(me), + sbe::BitTestBehavior::AllSet, + inputSlot); + case MatchExpression::BITS_ALL_CLEAR: + return generateBitTestExpr(state, + checked_cast<const BitTestMatchExpression*>(me), + sbe::BitTestBehavior::AllClear, + inputSlot); + case MatchExpression::BITS_ANY_SET: + return generateBitTestExpr(state, + checked_cast<const BitTestMatchExpression*>(me), + sbe::BitTestBehavior::AnySet, + inputSlot); + case MatchExpression::BITS_ANY_CLEAR: + return generateBitTestExpr(state, + checked_cast<const BitTestMatchExpression*>(me), + sbe::BitTestBehavior::AnyClear, + inputSlot); + case MatchExpression::EXISTS: { + uasserted(6733601, "(TODO SERVER-68743) need expr translation to enable $exists"); + } + case MatchExpression::LT: + return generateComparisonExpr(state, + checked_cast<const ComparisonMatchExpression*>(me), + sbe::EPrimBinary::less, + inputSlot); + case MatchExpression::GT: + return generateComparisonExpr(state, + checked_cast<const ComparisonMatchExpression*>(me), + sbe::EPrimBinary::greater, + inputSlot); + case MatchExpression::EQ: + return generateComparisonExpr(state, + checked_cast<const ComparisonMatchExpression*>(me), + sbe::EPrimBinary::eq, + inputSlot); + case MatchExpression::LTE: + return generateComparisonExpr(state, + checked_cast<const ComparisonMatchExpression*>(me), + sbe::EPrimBinary::lessEq, + inputSlot); + case MatchExpression::GTE: + return generateComparisonExpr(state, + checked_cast<const ComparisonMatchExpression*>(me), + sbe::EPrimBinary::greaterEq, + inputSlot); + case MatchExpression::MATCH_IN: { + uasserted(6733602, "(TODO SERVER-68743) need expr translation to enable $in"); + } + case MatchExpression::TYPE_OPERATOR: { + uasserted(6733603, "(TODO SERVER-68743) need expr translation to enable $type"); + } + case MatchExpression::NOT: { + uasserted(6733604, "(TODO SERVER-68743) need expr translation to enable $not"); + } + + default: + uasserted(6733605, + std::string("Expression ") + me->serialize().toString() + + " should not be pushed down as a per-column filter"); + } + return {}; +} } // namespace + std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildColumnScan( const QuerySolutionNode* root, const PlanStageReqs& reqs) { tassert(6312404, @@ -726,8 +805,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder "'postAssemblyFilter' to be used instead.", !csn->filter); - tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty()); - PlanStageSlots outputs; auto reconstructedRecordSlot = _slotIdGenerator.generate(); @@ -768,16 +845,58 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto abt = builder.generateABT(); auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone(); - std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>( - getCurrentCollection(reqs)->uuid(), - csn->indexEntry.catalogName, - std::vector<std::string>{csn->allFields.begin(), csn->allFields.end()}, - ridSlot, - reconstructedRecordSlot, - rowStoreSlot, - std::move(rowStoreExpr), - _yieldPolicy, - csn->nodeId()); + // Get all the paths but make sure "_id" comes first (the order of paths given to the + // column_scan stage defines the order of fields in the reconstructed record). + std::vector<std::string> paths; + paths.reserve(csn->allFields.size()); + if (csn->allFields.find("_id") != csn->allFields.end()) { + paths.push_back("_id"); + } + for (const auto& path : csn->allFields) { + if (path != "_id") { + paths.push_back(path); + } + } + + // Identify the filtered columns, if any, and create slots/expressions for them. + std::vector<sbe::ColumnScanStage::PathFilter> filteredPaths; + filteredPaths.reserve(csn->filtersByPath.size()); + for (size_t i = 0; i < paths.size(); i++) { + auto itFilter = csn->filtersByPath.find(paths[i]); + if (itFilter != csn->filtersByPath.end()) { + auto filterInputSlot = _slotIdGenerator.generate(); + + auto expr = generatePerColumnFilterExpr(_state, itFilter->second.get(), filterInputSlot) + .extractExpr(); + filteredPaths.emplace_back(i, std::move(expr), filterInputSlot); + } + } + + // Tag which of the paths should be included into the output. + DepsTracker residual; + if (csn->postAssemblyFilter) { + csn->postAssemblyFilter->addDependencies(&residual); + } + std::vector<bool> includeInOutput(paths.size(), false); + for (size_t i = 0; i < paths.size(); i++) { + if (csn->outputFields.find(paths[i]) != csn->outputFields.end() || + residual.fields.find(paths[i]) != residual.fields.end()) { + includeInOutput[i] = true; + } + } + + std::unique_ptr<sbe::PlanStage> stage = + std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(), + csn->indexEntry.catalogName, + std::move(paths), + std::move(includeInOutput), + ridSlot, + reconstructedRecordSlot, + rowStoreSlot, + std::move(rowStoreExpr), + std::move(filteredPaths), + _yieldPolicy, + csn->nodeId()); // Generate post assembly filter. if (csn->postAssemblyFilter) { @@ -793,6 +912,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder csn->nodeId()); stage = std::move(outputStage.stage); } + return {std::move(stage), std::move(outputs)}; } |