SERVER-67336 Per-path filters

author: Irina Yatsenko <irina.yatsenko@mongodb.com> 2022-08-04 22:50:44 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-08-04 23:58:18 +0000
commit: ede6b2adff822dd767cef40c5e1351ee8ed27aaa (patch)
tree: 2bffb9eb6be4bcf887914ed5af0c5972a4a75708 /jstests
parent: 441237232383286003f6cf231889fffe93225317 (diff)
download: mongo-ede6b2adff822dd767cef40c5e1351ee8ed27aaa.tar.gz
2 files changed, 446 insertions, 41 deletions
diff --git a/jstests/core/columnstore_index_per_path_filters.js b/jstests/core/columnstore_index_per_path_filters.js
new file mode 100644
index 00000000000..c9dd442f098
--- /dev/null
+++ b/jstests/core/columnstore_index_per_path_filters.js
@@ -0,0 +1,329 @@
+/**
+ * Testing of just the query layer's integration for columnar index when filters are used that
+ * might be pushed down into the column scan stage.
+ *
+ * @tags: [
+ *   # columnstore indexes are new in 6.1.
+ *   requires_fcv_61,
+ *   # Runs explain on an aggregate command which is only compatible with readConcern local.
+ *   assumes_read_concern_unchanged,
+ *   # TODO SERVER-66925 We could potentially need to resume an index build in the event of a
+ *   # stepdown, which is not yet implemented.
+ *   does_not_support_stepdowns,
+ *   uses_column_store_index,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js");  // For "resultsEq."
+load("jstests/libs/analyze_plan.js");         // For "planHasStage."
+load("jstests/libs/sbe_explain_helpers.js");  // For getSbePlanStages.
+load("jstests/libs/sbe_util.js");             // For "checkSBEEnabled.""
+
+const columnstoreEnabled =
+    checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
+if (!columnstoreEnabled) {
+    jsTestLog("Skipping columnstore index validation test since the feature flag is not enabled.");
+    return;
+}
+
+const coll_filters = db.columnstore_index_per_path_filters;
+function runPerPathFiltersTest({docs, query, projection, expected, testDescription}) {
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    // Order of the elements within the result and 'expected' is not significant for 'resultsEq' but
+    // use small datasets to make the test failures more readable.
+    const explain = coll_filters.find(query, projection).explain();
+    const errMsg = " **TEST** " + testDescription + tojson(explain);
+    const actual = coll_filters.find(query, projection).toArray();
+    assert(resultsEq(actual, expected),
+           `actual=${tojson(actual)}, expected=${tojson(expected)}${errMsg}`);
+}
+// Sanity check that without filters, the cursors on other columns are advanced correctly.
+(function testPerPathFilters_SingleColumnScalarsAndArrays() {
+    const docs = [
+        {n: 0, x: 42},
+        {n: 1, y: 42},
+        {n: 2, x: 42},
+        {n: 3},
+        {n: 4, x: 42, y: 42},
+        {n: 5, x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {},
+        projection: {a: 1, _id: 0, n: 1, x: 1, y: 1},
+        expected: docs,
+        testDescription: "SingleColumnScalarsAndArrays"
+    });
+})();
+
+// Checks matching on top-level path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumnScalarsAndArrays() {
+    const docs = [
+        {n: 0, x: 42},
+        {n: 1, x: [[42]]},
+        {n: 2, x: [0, 42]},
+        {n: 3, x: [42, 0]},
+        {n: 4, x: [[42], 0]},
+        {n: 5, x: [{y: 0}, 42]},
+        {n: 6, x: [{y: 0}, [42]]},
+        {n: 7, x: [[0, 1], 42]},
+        {n: 8, x: [[0, 1], [42]]},
+        {n: 9, x: {a: 42}},
+        {n: 10, x: [{a: 42}]},
+        {n: 11, x: [{a: 0}, 42]},
+    ];
+    // Projecting "_id" out and instead projecting as the first (in alphabetical order) column a
+    // non-existing field helps to flush out bugs with incorrect lookup of the filtered columns
+    // among all columns involved in the query.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {a: 1, _id: 0, n: 1},
+        expected: [{n: 0}, {n: 2}, {n: 3}, {n: 5}, {n: 7}, {n: 11}],
+        testDescription: "SingleColumnScalarsAndArrays"
+    });
+})();
+
+// Checks matching on sub-path that contains scalars and arrays.
+(function testPerPathFilters_SingleColumnScalarsAndArrays_SubField() {
+    const docs = [
+        {_id: 0, x: {y: 42}},
+        {_id: 1, x: {y: [0, 42, {z: 1}]}},
+        {_id: 2, x: {y: 0}},
+        {_id: 3, x: [{y: 0}, {y: 42}]},
+        {_id: 4, x: [{y: 0}, {y: 1}]},
+        {_id: 5, x: [42, {y: 0}, {z: 42}]},
+        {_id: 6, x: 0},
+        {_id: 7, x: {z: 0}},
+        {_id: 8, x: {y: {z: 0}}},
+        {_id: 9, x: [{y: [0, 42]}, {y: 1}]},
+        {_id: 10, x: [{y: [[42], 0]}]},
+        {_id: 11, x: [[{y: 42}, 0], 1]}
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y": 42},
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 9}],
+        testDescription: "SingleColumnScalarsAndArrays_SubField"
+    });
+})();
+
+// Check that a single filtered column correctly handles matching, no-matching and missing values.
+(function testPerPathFilters_SingleColumnMatchNoMatchMissingValues() {
+    const docs = [
+        {_id: 0, x: 42},
+        {_id: 1, x: 0},
+        {_id: 2, x: 42},
+        {_id: 3, no_x: 0},
+        {_id: 4, x: 42},
+        {_id: 5, x: 0},
+        {_id: 6, no_x: 0},
+        {_id: 7, x: 42},
+        {_id: 8, no_x: 0},
+        {_id: 9, x: 0},
+        {_id: 10, x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 10}],
+        testDescription: "SingleColumnMatchNoMatchMissingValues"
+    });
+})();
+
+// Check zig-zagging of two filters. We cannot assert through a JS test that the cursors for the
+// filtered columns are advanced as described here in the comments, but the test attempts to
+// exercise various match/no-match/missing combinations of values across columns.
+(function testPerPathFilters_TwoColumns() {
+    const docs = [
+        {_id: 0, x: 0, y: 0},        // start by iterating x
+        {_id: 1, x: 42, y: 42},      // seek into y and match! - continue iterating on x
+        {_id: 2, x: 42, no_y: 0},    // seeking into y skips to n:3
+        {_id: 3, x: 42, y: 0},       // iterating on y
+        {_id: 4, x: 42, y: 42},      // seek into x and match! - continue iterating on y
+        {_id: 5, no_x: 0, y: 42},    // seek into x skips to n:6
+        {_id: 6, x: 42, y: 0},       // seek into y but no match - iterate on y
+        {_id: 7, x: 0, y: 42},       // seek into x but no match - iterate on x
+        {_id: 8, no_x: 0, no_y: 0},  // skipped by x
+        {_id: 9, x: 42, y: 42},      // seek into y and match!
+    ];
+    // Adding into the projection specification non-existent fields doesn't change the output but
+    // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: 42},
+        projection: {_id: 1, a: 1, xa: 1, xb: 1, ya: 1},
+        expected: [{_id: 1}, {_id: 4}, {_id: 9}],
+        testDescription: "TwoColumns"
+    });
+})();
+
+// Check zig-zagging of three filters.
+(function testPerPathFilters_ThreeColumns() {
+    const docs = [
+        {_id: 0, x: 0, y: 42, z: 42},     // x
+        {_id: 1, x: 42, y: 42, z: 0},     // x->y->z
+        {_id: 2, x: 0, y: 42, z: 42},     // z->x
+        {_id: 3, x: 0, y: 42, z: 42},     // x
+        {_id: 4, x: 42, no_y: 0, z: 42},  // x->y
+        {_id: 5, x: 42, y: 0, z: 42},     // y
+        {_id: 6, x: 42, y: 42, z: 42},    // match! ->y
+        {_id: 7, x: 42, y: 42, z: 42},    // match! ->y
+        {_id: 8, no_x: 0, y: 42, z: 42},  // y->z->x
+        {_id: 9, x: 42, y: 42, no_z: 0},  // x
+        {_id: 10, x: 42, y: 42, z: 42},   // match!
+    ];
+    // Adding into the projection specification non-existent fields doesn't change the output but
+    // helps to flush out bugs with incorrect indexing of filtered paths among all others.
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: 42, z: 42},
+        projection: {_id: 1, a: 1, b: 1, xa: 1, xb: 1, ya: 1, za: 1},
+        expected: [{_id: 6}, {_id: 7}, {_id: 10}],
+        testDescription: "ThreeColumns"
+    });
+})();
+
+// Check projection of filtered columns.
+(function testPerPathFilters_ProjectFilteredColumn() {
+    const docs = [
+        {_id: 0, x: {y: 42}},
+        {_id: 1, x: {y: 42, z: 0}},
+        {_id: 2, x: [0, {y: 42}, {y: 0}, {z: 0}]},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {"x.y": 42},
+        projection: {_id: 1, "x.y": 1},
+        expected: [{_id: 0, x: {y: 42}}, {_id: 1, x: {y: 42}}, {_id: 2, x: [{y: 42}, {y: 0}, {}]}],
+        testDescription: "ProjectFilteredColumn"
+    });
+})();
+
+// Check correctness when have both per-path and residual filters.
+(function testPerPathFilters_PerPathAndResidualFilters() {
+    const docs = [
+        {_id: 0, x: 42, no_y: 0},
+        {_id: 1, x: 42, y: 0},
+        {_id: 2, x: 0, no_y: 0},
+        {_id: 3, x: 0, y: 0},
+        {_id: 4, no_x: 0, no_y: 0},
+        {_id: 5, x: 42, no_y: 0},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42, y: {$exists: false}},  // {$exists: false} causes the residual filter
+        projection: {_id: 1},
+        expected: [{_id: 0}, {_id: 5}],
+        testDescription: "PerPathAndResidualFilters"
+    });
+})();
+
+// Check degenerate case with no paths.
+(function testPerPathFilters_NoPathsProjected() {
+    const docs = [
+        {_id: 0, x: 42},
+        {_id: 1, x: 0},
+        {_id: 2, no_x: 42},
+    ];
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {x: 42},
+        projection: {_id: 0, a: {$literal: 1}},
+        expected: [{a: 1}],
+        testDescription: "NoPathsProjected"
+    });
+    runPerPathFiltersTest({
+        docs: docs,
+        query: {},
+        projection: {_id: 0, a: {$literal: 1}},
+        expected: [{a: 1}, {a: 1}, {a: 1}],
+        testDescription: "NoPathsProjected (and no filter)"
+    });
+})();
+
+// While using columnar indexes doesn't guarantee a specific field ordering in the result objects,
+// we still try to provide a stable experience to the user, so we output "_id" first and other
+// fields in alphabetically ascending order.
+(function testPerPathFilters_FieldOrder() {
+    const docs = [{z: 42, a: 42, _id: 42, _a: 42}];
+
+    coll_filters.drop();
+    coll_filters.insert(docs);
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    let res = tojson(coll_filters.find({}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+    let expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 1`);
+
+    // Having a filter on a path that is also being projected, should not affect the order.
+    res = tojson(coll_filters.find({z: 42}, {_a: 1, a: 1, z: 1}).toArray()[0]);
+    expected = '{ "_id" : 42, "_a" : 42, "a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 2`);
+
+    // Omitting the "_id" field should not affect the order.
+    res = tojson(coll_filters.find({a: 42}, {_id: 0, _a: 1, z: 1}).toArray()[0]);
+    expected = '{ "_a" : 42, "z" : 42 }';
+    assert(res == expected, `actual: ${res} != expected: ${expected} in **TEST** field order 3`);
+})();
+
+// Sanity test that per-column filtering is meeting the efficiency expectations.
+(function testPerPathFilters_FieldOrder() {
+    coll_filters.drop();
+
+    const docsCount = 1000;
+    const expectedToMatchCount = 10;
+    for (let i = 0; i < docsCount; i++) {
+        coll_filters.insert({_id: i, x: i % 2, y: i % (docsCount / expectedToMatchCount)});
+    }
+    assert.commandWorked(coll_filters.createIndex({"$**": "columnstore"}));
+
+    assert.eq(coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).toArray().length,
+              expectedToMatchCount);
+    const explain = coll_filters.find({x: 1, y: 1}, {_id: 1, x: 1}).explain("executionStats");
+
+    const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+    assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+
+    const columns = columnScanStages[0].columns;
+    assertArrayEq({
+        actual: columnScanStages[0].paths,
+        expected: ["_id", "x", "y"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
+    assert.eq(Object.keys(columns).length, 3, `Used colums ${columns}`);
+
+    const _id = columns._id;
+    const x = columns.x;
+    const y = columns.y;
+
+    assert(_id.usedInOutput, "_id column should be used in output");
+    assert(x.usedInOutput, "x column should be used in output");
+    assert(!y.usedInOutput, "y column should be used only for filtering");
+
+    // When there are no missing fields, the number of "next" calls in zig-zag search algorithm is
+    // equal to the number of documents docsCount (NB: in non-filtered search the number of "next"
+    // calls is close to k*docsCount where k is the number of paths).
+    assert.eq(_id.numNexts, 0, 'Should not iterate on non-filtered column');
+    assert.eq(x.numNexts + y.numNexts, docsCount, 'Total count of "next" calls');
+
+    // The columns with higher selectivity should be preferred by the zig-zag search for driving the
+    // "next" calls. Due to the regularity of data in this test (if "y" matched "x" would match as
+    // well), after the first match "y" column completely takes over.
+    assert.gt(y.numNexts, 0.9 * docsCount, 'high selectivity should drive "next" calls');
+
+    // We seek into each column to set up the cursors, after that seeks into _id should only happen
+    // on full match, and seeks into x or y should only happen on partial matches.
+    assert.eq(_id.numSeeks, 1 + expectedToMatchCount, "Seeks into the _id column");
+    assert.lt(x.numSeeks + y.numSeeks,
+              2 * expectedToMatchCount,
+              "Number of seeks in filtered columns should be small");
+})();
+})();
diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js
index e137d714514..1f2a60e8253 100644
--- a/jstests/noPassthroughWithMongod/column_scan_explain.js
+++ b/jstests/noPassthroughWithMongod/column_scan_explain.js
@@ -4,6 +4,7 @@
 (function() {
 "use strict";
 
+load("jstests/aggregation/extras/utils.js");  // For assertArrayEq
 load("jstests/libs/analyze_plan.js");         // For planHasStage.
 load("jstests/libs/sbe_util.js");             // For checkSBEEnabled.
 load("jstests/libs/sbe_explain_helpers.js");  // For getSbePlanStages.
@@ -34,35 +35,54 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "x", "y.a"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 4, 'Number of columns should be 4.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`);
+
+    // We seek into each column once, when setting up the cursors. The dense column is the first to
+    // hit EOF after iterating over all documents so other columns iterate at least one time less.
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
-        "x": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+        "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
         "y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
                   sortDoc(expectedObj),
-                  `Mismatching entry for column ${columnName}`);
+                  `Mismatching entry for column ${tojson(columnName)}`);
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(Object.keys(parentColumns).length, 1, 'Number of parent columns should be 1.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(Object.keys(parentColumns).length,
+              1,
+              `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
     // Expecting 4 lookups on the "y" parent column for the 3 docs which didn't have a "y.a"
-    // value and 1 for an unsuccessful call to seek.
+    // value and 1 for an unsuccessful call to seek. We should not iterate over parent columns.
     assert.eq(sortDoc(parentColumns.y),
               {"numNexts": 0, "numSeeks": 4},
-              'Mismatching entry for parent column \'y\'');
-
-    assert.eq(explain.executionStats.totalKeysExamined, 24, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+              'Mismatching entry for parent column "y"');
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts +
+                  columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks +
+                  parentColumns["y"].numNexts + parentColumns["y"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
 }());
 
 // Test the explain output for a scan on a nonexistent field.
@@ -71,29 +91,42 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "z"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
         "z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true},
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
                   sortDoc(expectedObj),
-                  `Mismatching entry for column ${columnName}`);
+                  `Mismatching entry for column "${columnName}"`);
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(parentColumns, {});
-
-    assert.eq(explain.executionStats.totalKeysExamined, 13, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(parentColumns, {}, "Should not access parent columns");
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts +
+                  columns["z"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
 }());
 
 // Test the explain output for a scan on a 2-level nested field.
@@ -102,14 +135,22 @@ assert.commandWorked(coll.insertMany(docs));
 
     const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
     assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assertArrayEq({
+        actual: columnScan.paths,
+        expected: ["_id", "y.b.c"],
+        extraErrorMsg: 'Paths used by column scan stage'
+    });
 
     // Verifying column fields.
-    const columns = columnScanStages[0].columns;
-    assert.eq(Object.keys(columns).length, 3, 'Number of columns should be 3.');
+    const columns = columnScan.columns;
+    assert.eq(
+        Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`);
     const expectedColumns = {
-        "<<RowId Column>>": {"numNexts": 5, "numSeeks": 1, "usedInOutput": false},
-        "_id": {"numNexts": 5, "numSeeks": 1, "usedInOutput": true},
-        "y.b.c": {"numNexts": 2, "numSeeks": 1, "usedInOutput": true},
+        "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false},
+        "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true},
+        "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true},
     };
     for (const [columnName, expectedObj] of Object.entries(expectedColumns)) {
         assert.eq(sortDoc(columns[columnName]),
@@ -118,22 +159,57 @@ assert.commandWorked(coll.insertMany(docs));
     }
 
     // Verifying parent column fields.
-    const parentColumns = columnScanStages[0].parentColumns;
-    assert.eq(Object.keys(parentColumns).length, 2, 'Number of parent columns should be 2.');
+    const parentColumns = columnScan.parentColumns;
+    assert.eq(Object.keys(parentColumns).length,
+              2,
+              `Should access 1 parent column but accessed: ${tojson(parentColumns)}`);
     // Expecting 3 lookups on the "y" parent column for the 2 docs which didn't have a "y.b"
-    // value and 1 unsuccessful call to seek.
+    // value and 1 unsuccessful call to seek. We should not iterate over parent columns.
     assert.eq(sortDoc(parentColumns.y),
               {"numNexts": 0, "numSeeks": 3},
-              'Mismatching entry for parent column \'y\'');
+              'Mismatching entry for parent column "y"');
     // Expecting 4 lookups on the "y.b" parent column for the 3 docs that didn't have a "y.b.c"
     // value and 1 unsuccessful call to seek.
     assert.eq(sortDoc(parentColumns['y.b']),
               {"numNexts": 0, "numSeeks": 4},
-              'Mismatching entry for parent column \'y.b\'');
+              'Mismatching entry for parent column "y.b"');
+
+    // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all columns.
+    assert.eq(explain.executionStats.totalKeysExamined,
+              columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks +
+                  columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts +
+                  columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts +
+                  parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts +
+                  parentColumns["y"].numSeeks,
+              `Mismatch in totalKeysExamined.`);
+
+    assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs.length, 'nReturned: should return all docs');
+}());
 
-    assert.eq(explain.executionStats.totalKeysExamined, 22, `Mismatch in totalKeysExamined.`);
-    assert.eq(
-        columnScanStages[0].numRowStoreFetches, 0, 'Number of row store fetches should be 0.');
-    assert.eq(columnScanStages[0].nReturned, 5, 'Number returned should be 5.');
+// Test fallback to the row store.
+(function testWithFallbackToRowstore() {
+    const coll_rowstore = db.column_scan_explain_rowstore;
+    coll_rowstore.drop();
+    assert.commandWorked(coll_rowstore.createIndex({"$**": "columnstore"}));
+
+    const docs_rowstore = [
+        {_id: 0, x: {y: 42, z: 0}},
+        {_id: 1, x: {y: {z: 0}}},  // fallback
+        {_id: 2, x: [{y: 42}, 0]},
+        {_id: 3, x: [{y: 42}, {z: 0}]},
+        {_id: 4, x: [{y: [42, 43]}, {z: 0}]},
+        {_id: 5, x: [{y: [42, {z: 0}]}, {z: 0}]},  // fallback
+        {_id: 6, x: 42},
+    ];
+    coll_rowstore.insert(docs_rowstore);
+    const explain = coll_rowstore.find({}, {_id: 0, "x.y": 1}).explain("executionStats");
+
+    const columnScanStages = getSbePlanStages(explain, "COLUMN_SCAN");
+    assert.eq(columnScanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`);
+    const columnScan = columnScanStages[0];
+
+    assert.eq(columnScan.numRowStoreFetches, 2, 'Mismatch in numRowStoreFetches');
+    assert.eq(columnScan.nReturned, docs_rowstore.length, 'nReturned: should return all docs');
 }());
 }());
author	Irina Yatsenko <irina.yatsenko@mongodb.com>	2022-08-04 22:50:44 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-08-04 23:58:18 +0000
commit	ede6b2adff822dd767cef40c5e1351ee8ed27aaa (patch)
tree	2bffb9eb6be4bcf887914ed5af0c5972a4a75708 /jstests
parent	441237232383286003f6cf231889fffe93225317 (diff)
download	mongo-ede6b2adff822dd767cef40c5e1351ee8ed27aaa.tar.gz