diff options
author | Irina Yatsenko <irina.yatsenko@mongodb.com> | 2022-08-04 22:50:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-04 23:58:18 +0000 |
commit | ede6b2adff822dd767cef40c5e1351ee8ed27aaa (patch) | |
tree | 2bffb9eb6be4bcf887914ed5af0c5972a4a75708 /jstests/core | |
parent | 441237232383286003f6cf231889fffe93225317 (diff) | |
download | mongo-ede6b2adff822dd767cef40c5e1351ee8ed27aaa.tar.gz |
SERVER-67336 Per-path filters
Diffstat (limited to 'jstests/core')
-rw-r--r-- | jstests/core/columnstore_index_per_path_filters.js | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js new file mode 100644 index 00000000000..c9dd442f098 --- /dev/null +++ b/jstests/core/columnstore_index_per_path_filters.js @@ -0,0 +1,329 @@ +/** + * Testing of just the query layer's integration for columnar index when filters are used that + * might be pushed down into the column scan stage. + * + * @tags: [ + * # columnstore indexes are new in 6.1. + * requires_fcv_61, + * # Runs explain on an aggregate command which is only compatible with readConcern local. + * assumes_read_concern_unchanged, + * # TODO SERVER-66925 We could potentially need to resume an index build in the event of a + * # stepdown, which is not yet implemented. + * does_not_support_stepdowns, + * uses_column_store_index, + * ] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For "resultsEq." +load("jstests/libs/analyze_plan.js"); // For "planHasStage." +load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. +load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled."" + +const columnstoreEnabled = + checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); +if (!columnstoreEnabled) { + jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled."); + return; +} + +const coll_filters = db.columnstore_index_per_path_filters; +function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) { + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but + // use small datasets to make the test failures more readable. + const explain = coll_filters.find(query, projection).explain(); + const errMsg = " **TEST** " + testDescription + tojson(explain); + const actual = coll_filters.find(query, projection).toArray(); + assert(resultsEq(actual, expected), + `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`); +} +// Sanity check that without filters, the cursors on other columns are advanced correctly. +(function testPerPathFilters_SingleColumnScalarsAndArrays() { + const docs = [ + {n: 0, x: 42}, + {n: 1, y: 42}, + {n: 2, x: 42}, + {n: 3}, + {n: 4, x: 42, y: 42}, + {n: 5, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {a: 1, _id: 0, n: 1, x: 1, y: 1}, + expected: docs, + testDescription: "SingleColumnScalarsAndArrays" + }); +})(); + +// Checks matching on top-level path that contains scalars and arrays. +(function testPerPathFilters_SingleColumnScalarsAndArrays() { + const docs = [ + {n: 0, x: 42}, + {n: 1, x: [[42]]}, + {n: 2, x: [0, 42]}, + {n: 3, x: [42, 0]}, + {n: 4, x: [[42], 0]}, + {n: 5, x: [{y: 0}, 42]}, + {n: 6, x: [{y: 0}, [42]]}, + {n: 7, x: [[0, 1], 42]}, + {n: 8, x: [[0, 1], [42]]}, + {n: 9, x: {a: 42}}, + {n: 10, x: [{a: 42}]}, + {n: 11, x: [{a: 0}, 42]}, + ]; + // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a + // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns + // among all columns involved in the query. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {a: 1, _id: 0, n: 1}, + expected: [{n: 0}, {n: 2}, {n: 3}, {n: 5}, {n: 7}, {n: 11}], + testDescription: "SingleColumnScalarsAndArrays" + }); +})(); + +// Checks matching on sub-path that contains scalars and arrays. +(function testPerPathFilters_SingleColumnScalarsAndArrays_SubField() { + const docs = [ + {_id: 0, x: {y: 42}}, + {_id: 1, x: {y: [0, 42, {z: 1}]}}, + {_id: 2, x: {y: 0}}, + {_id: 3, x: [{y: 0}, {y: 42}]}, + {_id: 4, x: [{y: 0}, {y: 1}]}, + {_id: 5, x: [42, {y: 0}, {z: 42}]}, + {_id: 6, x: 0}, + {_id: 7, x: {z: 0}}, + {_id: 8, x: {y: {z: 0}}}, + {_id: 9, x: [{y: [0, 42]}, {y: 1}]}, + {_id: 10, x: [{y: [[42], 0]}]}, + {_id: 11, x: [[{y: 42}, 0], 1]} + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 9}], + testDescription: "SingleColumnScalarsAndArrays_SubField" + }); +})(); + +// Check that a single filtered column correctly handles matching, no-matching and missing values. +(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, x: 42}, + {_id: 3, no_x: 0}, + {_id: 4, x: 42}, + {_id: 5, x: 0}, + {_id: 6, no_x: 0}, + {_id: 7, x: 42}, + {_id: 8, no_x: 0}, + {_id: 9, x: 0}, + {_id: 10, x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}], + testDescription: "SingleColumnMatchNoMatchMissingValues" + }); +})(); + +// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the +// filtered columns are advanced as described here in the comments, but the test attempts to +// exercise various match/no-match/missing combinations of values across columns. +(function testPerPathFilters_TwoColumns() { + const docs = [ + {_id: 0, x: 0, y: 0}, // start by iterating x + {_id: 1, x: 42, y: 42}, // seek into y and match! - continue iterating on x + {_id: 2, x: 42, no_y: 0}, // seeking into y skips to n:3 + {_id: 3, x: 42, y: 0}, // iterating on y + {_id: 4, x: 42, y: 42}, // seek into x and match! - continue iterating on y + {_id: 5, no_x: 0, y: 42}, // seek into x skips to n:6 + {_id: 6, x: 42, y: 0}, // seek into y but no match - iterate on y + {_id: 7, x: 0, y: 42}, // seek into x but no match - iterate on x + {_id: 8, no_x: 0, no_y: 0}, // skipped by x + {_id: 9, x: 42, y: 42}, // seek into y and match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42}, + projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1}, + expected: [{_id: 1}, {_id: 4}, {_id: 9}], + testDescription: "TwoColumns" + }); +})(); + +// Check zig-zagging of three filters. +(function testPerPathFilters_ThreeColumns() { + const docs = [ + {_id: 0, x: 0, y: 42, z: 42}, // x + {_id: 1, x: 42, y: 42, z: 0}, // x->y->z + {_id: 2, x: 0, y: 42, z: 42}, // z->x + {_id: 3, x: 0, y: 42, z: 42}, // x + {_id: 4, x: 42, no_y: 0, z: 42}, // x->y + {_id: 5, x: 42, y: 0, z: 42}, // y + {_id: 6, x: 42, y: 42, z: 42}, // match! ->y + {_id: 7, x: 42, y: 42, z: 42}, // match! ->y + {_id: 8, no_x: 0, y: 42, z: 42}, // y->z->x + {_id: 9, x: 42, y: 42, no_z: 0}, // x + {_id: 10, x: 42, y: 42, z: 42}, // match! + ]; + // Adding into the projection specification non-existent fields doesn't change the output but + // helps to flush out bugs with incorrect indexing of filtered paths among all others. + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: 42, z: 42}, + projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1}, + expected: [{_id: 6}, {_id: 7}, {_id: 10}], + testDescription: "ThreeColumns" + }); +})(); + +// Check projection of filtered columns. +(function testPerPathFilters_ProjectFilteredColumn() { + const docs = [ + {_id: 0, x: {y: 42}}, + {_id: 1, x: {y: 42, z: 0}}, + {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {"x.y": 42}, + projection: {_id: 1, "x.y": 1}, + expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}], + testDescription: "ProjectFilteredColumn" + }); +})(); + +// Check correctness when have both per-path and residual filters. +(function testPerPathFilters_PerPathAndResidualFilters() { + const docs = [ + {_id: 0, x: 42, no_y: 0}, + {_id: 1, x: 42, y: 0}, + {_id: 2, x: 0, no_y: 0}, + {_id: 3, x: 0, y: 0}, + {_id: 4, no_x: 0, no_y: 0}, + {_id: 5, x: 42, no_y: 0}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42, y: {$exists: false}}, // {$exists: false} causes the residual filter + projection: {_id: 1}, + expected: [{_id: 0}, {_id: 5}], + testDescription: "PerPathAndResidualFilters" + }); +})(); + +// Check degenerate case with no paths. +(function testPerPathFilters_NoPathsProjected() { + const docs = [ + {_id: 0, x: 42}, + {_id: 1, x: 0}, + {_id: 2, no_x: 42}, + ]; + runPerPathFiltersTest({ + docs: docs, + query: {x: 42}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}], + testDescription: "NoPathsProjected" + }); + runPerPathFiltersTest({ + docs: docs, + query: {}, + projection: {_id: 0, a: {$literal: 1}}, + expected: [{a: 1}, {a: 1}, {a: 1}], + testDescription: "NoPathsProjected (and no filter)" + }); +})(); + +// While using columnar indexes doesn't guarantee a specific field ordering in the result objects, +// we still try to provide a stable experience to the user, so we output "_id" first and other +// fields in alphabetically ascending order. +(function testPerPathFilters_FieldOrder() { + const docs = [{z: 42, a: 42, _id: 42, _a: 42}]; + + coll_filters.drop(); + coll_filters.insert(docs); + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]); + let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`); + + // Having a filter on a path that is also being projected, should not affect the order. + res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]); + expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`); + + // Omitting the "_id" field should not affect the order. + res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]); + expected = '{ "_a" : 42, "z" : 42 }'; + assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`); +})(); + +// Sanity test that per-column filtering is meeting the efficiency expectations. +(function testPerPathFilters_FieldOrder() { + coll_filters.drop(); + + const docsCount = 1000; + const expectedToMatchCount = 10; + for (let i = 0; i < docsCount; i++) { + coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)}); + } + assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"})); + + assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length, + expectedToMatchCount); + const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats"); + + const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN"); + assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); + + const columns = columnScanStages[0].columns; + assertArrayEq({ + actual: columnScanStages[0].paths, + expected: ["_id", "x", "y"], + extraErrorMsg: 'Paths used by column scan stage' + }); + assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`); + + const _id = columns._id; + const x = columns.x; + const y = columns.y; + + assert(_id.usedInOutput, "_id column should be used in output"); + assert(x.usedInOutput, "x column should be used in output"); + assert(!y.usedInOutput, "y column should be used only for filtering"); + + // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is + // equal to the number of documents docsCount (NB: in non-filtered search the number of "next" + // calls is close to k*docsCount where k is the number of paths). + assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column'); + assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls'); + + // The columns with higher selectivity should be preferred by the zig-zag search for driving the + // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as + // well), after the first match "y" column completely takes over. + assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls'); + + // We seek into each column to set up the cursors, after that seeks into _id should only happen + // on full match, and seeks into x or y should only happen on partial matches. + assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column"); + assert.lt(x.numSeeks + y.numSeeks, + 2 * expectedToMatchCount, + "Number of seeks in filtered columns should be small"); +})(); +})(); |