diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2022-08-04 13:05:28 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-04 14:07:38 +0000 |
commit | 6ddb2b3ffedf07ddab76da3d89902739a7faedcf (patch) | |
tree | 150d5b5630ca5b0def3bc1c5985f37ad3841ea1a | |
parent | 78e3577ee036a3573386adef986e076767e54624 (diff) | |
download | mongo-6ddb2b3ffedf07ddab76da3d89902739a7faedcf.tar.gz |
SERVER-62985 Add planning logic to support hinting a columnar index
-rw-r--r-- | jstests/core/notablescan.js | 4 | ||||
-rw-r--r-- | jstests/core/notablescan_capped.js | 5 | ||||
-rw-r--r-- | jstests/core/projection_semantics.js | 343 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/column_index_skeleton.js | 556 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 438 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_columnar_test.cpp | 189 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/util/set_util.h | 46 |
12 files changed, 732 insertions, 907 deletions
diff --git a/jstests/core/notablescan.js b/jstests/core/notablescan.js index baef5d56ae6..ddcf7ba99a8 100644 --- a/jstests/core/notablescan.js +++ b/jstests/core/notablescan.js @@ -44,8 +44,8 @@ try { err = assert.throws(function() { t.find({a: 1}).hint({$natural: 1}).toArray(); }); - assert.includes(err.toString(), - "hint $natural is not allowed, because 'notablescan' is enabled"); + assert.includes(err.toString(), "$natural"); + assert.includes(err.toString(), "notablescan"); t.createIndex({a: 1}); assert.eq(0, t.find({a: 1, b: 1}).itcount()); diff --git a/jstests/core/notablescan_capped.js b/jstests/core/notablescan_capped.js index 6ae04f58bb8..5dad30552f1 100644 --- a/jstests/core/notablescan_capped.js +++ b/jstests/core/notablescan_capped.js @@ -25,9 +25,8 @@ try { err = assert.throws(function() { t.find({a: 1}).tailable(true).next(); }); - assert.includes( - err.toString(), - "Running with 'notablescan', so tailable cursors (which always do a table scan) are not allowed"); + assert.includes(err.toString(), "tailable"); + assert.includes(err.toString(), "notablescan"); } finally { // We assume notablescan was false before this test started and restore that diff --git a/jstests/core/projection_semantics.js b/jstests/core/projection_semantics.js index 42605b3df49..71808561f47 100644 --- a/jstests/core/projection_semantics.js +++ b/jstests/core/projection_semantics.js @@ -1,24 +1,72 @@ /** * Tests the behavior of projection for dotted paths, including edge cases where the path only * sometimes exists. + * + * TODO SERVER-63947 these tags shouldn't be necessary if we remove the explicit columnstore index + * mention. + * @tags: [ + * # columnstore indexes are new in 6.1. + * requires_fcv_61, + * # We could potentially need to resume an index build in the event of a stepdown, which is not + * # yet implemented. + * does_not_support_stepdowns, + * # Columnstore indexes are incompatible with clustered collections. + * incompatible_with_clustered_collection, + * ] */ (function() { "use strict"; +load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. +const columnstoreEnabled = + checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); + const coll = db.projection_semantics; +coll.drop(); + +let globalIdCounter = 0; + +function assertCommandWorkedOrFailedWithCode(cmdRes, code) { + if (!cmdRes.ok) { + assert.assertFailedWithCode(cmdRes, code); + } + assert.commandWorked(cmdRes); +} // Tests that when 'projection' is applied to 'input', we get 'expectedOutput'. // Tests that this remains true if indexes are added, or if we use aggregation instead of find. -function testInputOutput({input, projection, expectedOutput, interestingIndexes = []}) { - coll.drop(); +function testInputOutput( + {input, projection, expectedOutput, interestingIndexes = [], excludeColumnStore = false}) { + if (!input.hasOwnProperty("_id")) { + input = Object.merge({_id: ++globalIdCounter}, input); + } assert.commandWorked(coll.insert(input)); - assert.docEq(coll.findOne({}, projection), expectedOutput); + assert.docEq( + coll.find({_id: input._id}, projection).limit(1).hint({$natural: 1}).toArray()[0], + expectedOutput, + () => + tojson(coll.find({_id: input._id}, projection).limit(1).hint({$natural: 1}).explain())); for (let indexSpec of interestingIndexes) { - assert.commandWorked(coll.createIndex(indexSpec)); - assert.docEq(coll.find({}, projection).hint(indexSpec).toArray()[0], expectedOutput); - assert.commandWorked(coll.dropIndex(indexSpec)); + assertCommandWorkedOrFailedWithCode(coll.createIndex(indexSpec), + ErrorCodes.IndexAlreadyExists); + assert.docEq(coll.find({_id: input._id}, projection).hint(indexSpec).toArray()[0], + expectedOutput); + } + if (columnstoreEnabled && !excludeColumnStore) { + const columnStore = {"$**": "columnstore"}; + assertCommandWorkedOrFailedWithCode(coll.createIndex(columnStore), + ErrorCodes.IndexAlreadyExists); + assert.docEq(coll.find({_id: input._id}, projection).hint(columnStore).toArray()[0], + expectedOutput, + () => tojson(coll.find({_id: input._id}, projection) + .hint(columnStore) + .explain("executionStats"))); } - assert.docEq(coll.aggregate([{$project: projection}]).toArray()[0], expectedOutput); + + assert.docEq( + coll.aggregate([{$match: {_id: input._id}}, {$project: projection}], {hint: {$natural: 1}}) + .toArray()[0], + expectedOutput); } // The basics: what happens when I include a top-level field? @@ -32,21 +80,28 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes }); // Test some basic "normal" cases. - testIncludeA({_id: 0, a: "demo", b: "other", x: "extra"}, {_id: 0, a: "demo"}); - testIncludeA({_id: 1, a: "demo", aWSuffix: "other", 'a.b': "extra"}, {_id: 1, a: "demo"}); - testIncludeA({_id: 2, a: null, b: "other", x: "extra"}, {_id: 2, a: null}); + testIncludeA({_id: ++globalIdCounter, a: "demo", b: "other", x: "extra"}, + {_id: globalIdCounter, a: "demo"}); + testIncludeA({_id: ++globalIdCounter, a: "demo", aWSuffix: "other", 'a.b': "extra"}, + {_id: globalIdCounter, a: "demo"}); + testIncludeA({_id: ++globalIdCounter, a: null, b: "other", x: "extra"}, + {_id: globalIdCounter, a: null}); // Test including "a" when "a" is missing/not present. // TODO SERVER-23229 this will return different results if there is a covering index, so here // but not elsewhere we don't use any "interestingIndexes". testInputOutput({ - input: {_id: 0, b: "other", x: "extra"}, + input: {_id: ++globalIdCounter, b: "other", x: "extra"}, + projection: {a: 1}, + expectedOutput: {_id: globalIdCounter}, + interestingIndexes: [] + }); + testInputOutput({ + input: {_id: ++globalIdCounter}, projection: {a: 1}, - expectedOutput: {_id: 0}, + expectedOutput: {_id: globalIdCounter}, interestingIndexes: [] }); - testInputOutput( - {input: {_id: 0}, projection: {a: 1}, expectedOutput: {_id: 0}, interestingIndexes: []}); // Test a range of interesting values for "a". We expect everything to be preserved unmodified. const testIdentityInclusionA = (input) => testInputOutput({ @@ -55,20 +110,20 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes expectedOutput: input, interestingIndexes: [{_id: 1, a: 1}, {a: 1, _id: 1}] }); - testIdentityInclusionA({_id: 1, a: null}); - testIdentityInclusionA({_id: 2, a: undefined}); - testIdentityInclusionA({_id: 3, a: {}}); - testIdentityInclusionA({_id: 4, a: []}); - testIdentityInclusionA({_id: 5, a: {x: 1, b: "scalar"}}); - testIdentityInclusionA({_id: 6, a: "scalar"}); - testIdentityInclusionA({_id: 7, a: {b: {}}}); - testIdentityInclusionA({_id: 8, a: [null]}); - testIdentityInclusionA({_id: 9, a: ["scalar"]}); - testIdentityInclusionA({_id: 10, a: [[]]}); - testIdentityInclusionA({_id: 11, a: [{}]}); - testIdentityInclusionA({_id: 12, a: [1, {}, 2]}); - testIdentityInclusionA({_id: 13, a: [[1, 2], [{}], 2]}); - testIdentityInclusionA({_id: 14, a: [{b: "scalar"}]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: null}); + testIdentityInclusionA({_id: ++globalIdCounter, a: undefined}); + testIdentityInclusionA({_id: ++globalIdCounter, a: {}}); + testIdentityInclusionA({_id: ++globalIdCounter, a: []}); + testIdentityInclusionA({_id: ++globalIdCounter, a: {x: 1, b: "scalar"}}); + testIdentityInclusionA({_id: ++globalIdCounter, a: "scalar"}); + testIdentityInclusionA({_id: ++globalIdCounter, a: {b: {}}}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [null]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: ["scalar"]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [[]]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [{}]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [1, {}, 2]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [[1, 2], [{}], 2]}); + testIdentityInclusionA({_id: ++globalIdCounter, a: [{b: "scalar"}]}); // Now test with the same documents but excluding the "_id" field. const testIncludeOnlyA = (input, output) => testInputOutput({ @@ -82,29 +137,34 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes }); // The "basics" again. - testIncludeOnlyA({_id: 0, a: "demo", b: "other", x: "extra"}, {a: "demo"}); - testIncludeOnlyA({_id: 1, a: "demo", aWSuffix: "other", 'a.b': "extra"}, {a: "demo"}); - testIncludeOnlyA({_id: 2, a: null, b: "other", x: "extra"}, {a: null}); + testIncludeOnlyA({_id: ++globalIdCounter, a: "demo", b: "other", x: "extra"}, {a: "demo"}); + testIncludeOnlyA({_id: ++globalIdCounter, a: "demo", aWSuffix: "other", 'a.b': "extra"}, + {a: "demo"}); + testIncludeOnlyA({_id: ++globalIdCounter, a: null, b: "other", x: "extra"}, {a: null}); // Missing 'a' value again. // TODO SERVER-23229 this will return different results if there is a covering index, so here // but not elsewhere we don't use any "interestingIndexes". testInputOutput({ - input: {_id: 0, b: "other", x: "extra"}, + input: {_id: ++globalIdCounter, b: "other", x: "extra"}, + projection: {a: 1, _id: 0}, + expectedOutput: {}, + interestingIndexes: [] + }); + testInputOutput({ + input: {_id: ++globalIdCounter}, projection: {a: 1, _id: 0}, expectedOutput: {}, interestingIndexes: [] }); - testInputOutput( - {input: {_id: 0}, projection: {a: 1, _id: 0}, expectedOutput: {}, interestingIndexes: []}); // Just a couple of the cases above to confirm the same behavior just without the _id. - testIncludeOnlyA({_id: 1, a: null}, {a: null}); - testIncludeOnlyA({_id: 3, a: {}}, {a: {}}); - testIncludeOnlyA({_id: 4, a: []}, {a: []}); - testIncludeOnlyA({_id: 5, a: {x: 1, b: "scalar"}}, {a: {x: 1, b: "scalar"}}); - testIncludeOnlyA({_id: 7, a: {b: {}}}, {a: {b: {}}}); - testIncludeOnlyA({_id: 14, a: [{b: "scalar"}]}, {a: [{b: "scalar"}]}); + testIncludeOnlyA({_id: ++globalIdCounter, a: null}, {a: null}); + testIncludeOnlyA({_id: ++globalIdCounter, a: {}}, {a: {}}); + testIncludeOnlyA({_id: ++globalIdCounter, a: []}, {a: []}); + testIncludeOnlyA({_id: ++globalIdCounter, a: {x: 1, b: "scalar"}}, {a: {x: 1, b: "scalar"}}); + testIncludeOnlyA({_id: ++globalIdCounter, a: {b: {}}}, {a: {b: {}}}); + testIncludeOnlyA({_id: ++globalIdCounter, a: [{b: "scalar"}]}, {a: [{b: "scalar"}]}); }()); // Now test one level of nesting - a single "dot" in the path. @@ -119,17 +179,21 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes // Test some basic "normal" cases. // Test that it excludes extra fields at the root and at the sub-document. - testIncludeADotB({_id: 0, a: {b: "demo", y: "extra"}, x: "extra"}, {_id: 0, a: {b: "demo"}}); + testIncludeADotB({_id: ++globalIdCounter, a: {b: "demo", y: "extra"}, x: "extra"}, + {_id: globalIdCounter, a: {b: "demo"}}); // Test that (at least for now) the dotted path doesn't work great here. - testIncludeADotB({_id: 1, a: {b: "demo"}, 'a.b': "extra"}, {_id: 1, a: {b: "demo"}}); + testIncludeADotB({_id: ++globalIdCounter, a: {b: "demo"}, 'a.b': "extra"}, + {_id: globalIdCounter, a: {b: "demo"}}); // Test that '_id' within a sub-document is excluded. - testIncludeADotB({_id: 2, a: {b: "demo", _id: "extra"}}, {_id: 2, a: {b: "demo"}}); + testIncludeADotB({_id: ++globalIdCounter, a: {b: "demo", _id: "extra"}}, + {_id: globalIdCounter, a: {b: "demo"}}); // Test array use case. - testIncludeADotB({_id: 3, a: [{b: 1, c: 1}, {b: 2, c: 2}], x: "extra"}, - {_id: 3, a: [{b: 1}, {b: 2}]}); + testIncludeADotB({_id: ++globalIdCounter, a: [{b: 1, c: 1}, {b: 2, c: 2}], x: "extra"}, + {_id: globalIdCounter, a: [{b: 1}, {b: 2}]}); // Test that a missing field within an object in an array will show up as an empty object. - testIncludeADotB({_id: 4, a: [{b: 1, c: 1}, {c: 2}], x: "extra"}, {_id: 4, a: [{b: 1}, {}]}); + testIncludeADotB({_id: ++globalIdCounter, a: [{b: 1, c: 1}, {c: 2}], x: "extra"}, + {_id: globalIdCounter, a: [{b: 1}, {}]}); // Test including "a.b" when "a.b" is missing/not present. // @@ -144,10 +208,10 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes const testADotBNoIndexes = (input, output) => testInputOutput( {input: input, projection: {'a.b': 1}, expectedOutput: output, interestingIndexes: []}); - testADotBNoIndexes({_id: 0, b: "other", x: "extra"}, {_id: 0}); - testADotBNoIndexes({_id: 1}, {_id: 1}); - testADotBNoIndexes({_id: 2, a: {}}, {_id: 2, a: {}}); - testADotBNoIndexes({_id: 3, a: "scalar"}, {_id: 3}); + testADotBNoIndexes({_id: ++globalIdCounter, b: "other", x: "extra"}, {_id: globalIdCounter}); + testADotBNoIndexes({_id: ++globalIdCounter}, {_id: globalIdCounter}); + testADotBNoIndexes({_id: ++globalIdCounter, a: {}}, {_id: globalIdCounter, a: {}}); + testADotBNoIndexes({_id: ++globalIdCounter, a: "scalar"}, {_id: globalIdCounter}); const testIncludeOnlyADotB = (input, output) => testInputOutput({ input: input, @@ -157,30 +221,34 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes }); // The "basics" again - no _id this time. - testIncludeOnlyADotB({_id: 0, a: {b: "demo", y: "extra"}, x: "extra"}, {a: {b: "demo"}}); - testIncludeOnlyADotB({_id: 1, a: {b: "demo"}, 'a.b': "extra"}, {a: {b: "demo"}}); - testIncludeOnlyADotB({_id: 2, a: {b: "demo", _id: "extra"}}, {a: {b: "demo"}}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: {b: "demo", y: "extra"}, x: "extra"}, + {a: {b: "demo"}}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: {b: "demo"}, 'a.b': "extra"}, + {a: {b: "demo"}}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: {b: "demo", _id: "extra"}}, {a: {b: "demo"}}); - testIncludeOnlyADotB({_id: 3, a: [{b: 1, c: 1}, {b: 2, c: 2}], x: "extra"}, + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [{b: 1, c: 1}, {b: 2, c: 2}], x: "extra"}, {a: [{b: 1}, {b: 2}]}); - testIncludeOnlyADotB({_id: 4, a: [{b: 1, c: 1}, {c: 2}], x: "extra"}, {a: [{b: 1}, {}]}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [{b: 1, c: 1}, {c: 2}], x: "extra"}, + {a: [{b: 1}, {}]}); // More cases where 'a.b' doesn't exist - but with arrays this time. - testIncludeOnlyADotB({_id: 4, a: [], x: "extra"}, {a: []}); - testIncludeOnlyADotB({_id: 5, a: [{}, {}], x: "extra"}, {a: [{}, {}]}); - testIncludeOnlyADotB({_id: 6, a: ["scalar", "scalar"], x: "extra"}, {a: []}); - testIncludeOnlyADotB({_id: 7, a: [null]}, {a: []}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [], x: "extra"}, {a: []}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [{}, {}], x: "extra"}, {a: [{}, {}]}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: ["scalar", "scalar"], x: "extra"}, {a: []}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [null]}, {a: []}); // This is an interesting case: the scalars are ignored but the shadow documents are preserved. - testIncludeOnlyADotB({_id: 8, a: ["scalar", {}, "scalar", {c: 1}], x: "extra"}, {a: [{}, {}]}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: ["scalar", {}, "scalar", {c: 1}], x: "extra"}, + {a: [{}, {}]}); // Further interest: the array within the array is preserved. - testIncludeOnlyADotB({_id: 9, a: [[]]}, {a: [[]]}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [[]]}, {a: [[]]}); // But not the scalar elements of it. - testIncludeOnlyADotB({_id: 10, a: [[1, 2, 3]]}, {a: [[]]}); + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [[1, 2, 3]]}, {a: [[]]}); // But if there's a "b" again we see that. - testIncludeOnlyADotB({_id: 10, a: [[1, {b: 1}, {b: 2, c: 2}, "scalar"]]}, + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [[1, {b: 1}, {b: 2, c: 2}, "scalar"]]}, {a: [[{b: 1}, {b: 2}]]}); testIncludeOnlyADotB({ - _id: 10, + _id: ++globalIdCounter, a: [ ["x", {b: 1}, {b: 2, c: 2}, "x"], [[{b: 1}]], @@ -194,7 +262,7 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes [{b: 1}, [{b: 2}], [[{b: [2]}]]], ] }); - testIncludeOnlyADotB({_id: 11, a: [[], [[], [], [1], [{c: 1}]], {b: 1}]}, { + testIncludeOnlyADotB({_id: ++globalIdCounter, a: [[], [[], [], [1], [{c: 1}]], {b: 1}]}, { a: [ [], [[], [], [], [{}]], @@ -210,17 +278,20 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes interestingIndexes: [{a: 1}, {'a.b': 1}, {'a.c': 1}, {'a.b': 1, 'a.c': 1}, {'a.b': 1, 'a.c': -1}] }); - testIncludeOnlyADotBAndADotC({_id: 0, a: {b: "scalar", c: "scalar", d: "extra"}}, - {a: {b: "scalar", c: "scalar"}}); - testIncludeOnlyADotBAndADotC({_id: 1, a: [{b: 1, c: 2, d: 3}, {b: 4, c: 5, d: 6}]}, - {a: [{b: 1, c: 2}, {b: 4, c: 5}]}); + testIncludeOnlyADotBAndADotC( + {_id: ++globalIdCounter, a: {b: "scalar", c: "scalar", d: "extra"}}, + {a: {b: "scalar", c: "scalar"}}); + testIncludeOnlyADotBAndADotC( + {_id: ++globalIdCounter, a: [{b: 1, c: 2, d: 3}, {b: 4, c: 5, d: 6}]}, + {a: [{b: 1, c: 2}, {b: 4, c: 5}]}); // Array cases where one or both of the paths don't exist. - testIncludeOnlyADotBAndADotC({_id: 5, a: [{b: 1, c: 2}, {b: 3, d: 4}]}, + testIncludeOnlyADotBAndADotC({_id: ++globalIdCounter, a: [{b: 1, c: 2}, {b: 3, d: 4}]}, {a: [{b: 1, c: 2}, {b: 3}]}); - testIncludeOnlyADotBAndADotC({_id: 6, a: [{c: 1, d: 2}, {b: 3, d: 4}]}, {a: [{c: 1}, {b: 3}]}); - testIncludeOnlyADotBAndADotC({_id: 7, a: []}, {a: []}); - testIncludeOnlyADotBAndADotC({_id: 8, a: [{b: 1, c: 2}, "extra", {b: 3, c: 4}]}, + testIncludeOnlyADotBAndADotC({_id: ++globalIdCounter, a: [{c: 1, d: 2}, {b: 3, d: 4}]}, + {a: [{c: 1}, {b: 3}]}); + testIncludeOnlyADotBAndADotC({_id: ++globalIdCounter, a: []}, {a: []}); + testIncludeOnlyADotBAndADotC({_id: ++globalIdCounter, a: [{b: 1, c: 2}, "extra", {b: 3, c: 4}]}, {a: [{b: 1, c: 2}, {b: 3, c: 4}]}); // Non-array cases where one or both of the paths don't exist. @@ -234,9 +305,11 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes interestingIndexes: [] }); - testIncludeADotBAndCNoIndexes({_id: 2, a: {b: "scalar", d: "extra"}}, {a: {b: "scalar"}}); - testIncludeADotBAndCNoIndexes({_id: 3, a: {c: "scalar", d: "extra"}}, {a: {c: "scalar"}}); - testIncludeADotBAndCNoIndexes({_id: 4, a: {d: "extra"}}, {a: {}}); + testIncludeADotBAndCNoIndexes({_id: ++globalIdCounter, a: {b: "scalar", d: "extra"}}, + {a: {b: "scalar"}}); + testIncludeADotBAndCNoIndexes({_id: ++globalIdCounter, a: {c: "scalar", d: "extra"}}, + {a: {c: "scalar"}}); + testIncludeADotBAndCNoIndexes({_id: ++globalIdCounter, a: {d: "extra"}}, {a: {}}); }()); (function testInclusionLevelsOfNesting() { @@ -428,48 +501,60 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes // Now test the exclusion semantics. This part is a lot smaller since a lot of the behaviors mirror // inclusion projection. (function testExclusionSemantics() { + // Need a new helper to exclude column store coverage, since column store indexes cannot be used + // for exclusion projections. + const testInputOutputExclusion = ({input, projection, expectedOutput}) => testInputOutput({ + input: input, + projection: projection, + expectedOutput: expectedOutput, + excludeColumnStore: true + }); + // Test some basic top-level flat examples. - testInputOutput({ - input: {_id: 0, a: 1, b: 1, c: 1}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: 1, b: 1, c: 1}, projection: {a: 0}, - expectedOutput: {_id: 0, b: 1, c: 1} + expectedOutput: {_id: globalIdCounter, b: 1, c: 1} }); - testInputOutput({ - input: {_id: 0, a: 1, b: 1, c: 1}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: 1, b: 1, c: 1}, projection: {a: 0, b: 0}, - expectedOutput: {_id: 0, c: 1} + expectedOutput: {_id: globalIdCounter, c: 1} }); // Test some dotted examples. - testInputOutput({ - input: {_id: 0, a: {b: 1, c: 1, d: 1}, x: {y: 1, z: 1}}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: {b: 1, c: 1, d: 1}, x: {y: 1, z: 1}}, projection: {"a.b": 0}, - expectedOutput: {_id: 0, a: {c: 1, d: 1}, x: {y: 1, z: 1}} + expectedOutput: {_id: globalIdCounter, a: {c: 1, d: 1}, x: {y: 1, z: 1}} }); // One notable difference between inclusion and exclusion projections is that parent's scalar // values remain untouched during an exclusion projection. The "scalar" here remains. In an // inclusion projection, these would disappear. - testInputOutput({ - input: {_id: 0, a: ["scalar", {b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: ["scalar", {b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": 0}, - expectedOutput: {_id: 0, a: ["scalar", {c: 1, d: 1}, {c: 2}, {}]} + expectedOutput: {_id: globalIdCounter, a: ["scalar", {c: 1, d: 1}, {c: 2}, {}]} }); - testInputOutput({ - input: {_id: 0, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": 0, "a.c": 0}, - expectedOutput: {_id: 0, a: [{d: 1}, {}, {}]} + expectedOutput: {_id: globalIdCounter, a: [{d: 1}, {}, {}]} }); - testInputOutput({ - input: {_id: 0, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": 0, "a.c": 0}, - expectedOutput: {_id: 0, a: [{d: 1}, {}, {}]} + expectedOutput: {_id: globalIdCounter, a: [{d: 1}, {}, {}]} }); - testInputOutput( - {input: {_id: 0, a: []}, projection: {"a.b": 0}, expectedOutput: {_id: 0, a: []}}); - testInputOutput({ - input: {_id: 0, a: [[], [{b: [[1, 2], {c: 1, d: 1}]}]]}, + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: []}, + projection: {"a.b": 0}, + expectedOutput: {_id: globalIdCounter, a: []} + }); + testInputOutputExclusion({ + input: {_id: ++globalIdCounter, a: [[], [{b: [[1, 2], {c: 1, d: 1}]}]]}, projection: {"a.b.c": 0}, - expectedOutput: {_id: 0, a: [[], [{b: [[1, 2], {d: 1}]}]]} + expectedOutput: {_id: globalIdCounter, a: [[], [{b: [[1, 2], {d: 1}]}]]} }); }()); @@ -478,35 +563,37 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes (function testComputedProjections() { // Test some basic top-level flat examples. testInputOutput({ - input: {_id: 0, a: 1, b: 1, c: 1}, + input: {_id: ++globalIdCounter, a: 1, b: 1, c: 1}, projection: {a: {$literal: 0}}, - expectedOutput: {_id: 0, a: 0} + expectedOutput: {_id: globalIdCounter, a: 0} }); testInputOutput({ - input: {_id: 0, a: 1, b: 1, c: 1}, + input: {_id: ++globalIdCounter, a: 1, b: 1, c: 1}, projection: {a: {$literal: 0}, b: "string"}, - expectedOutput: {_id: 0, a: 0, b: "string"} + expectedOutput: {_id: globalIdCounter, a: 0, b: "string"} }); // Test some dotted examples. testInputOutput({ - input: {_id: 0, a: {b: 1, c: 1, d: 1}, x: {y: 1, z: 1}}, + input: {_id: ++globalIdCounter, a: {b: 1, c: 1, d: 1}, x: {y: 1, z: 1}}, projection: {"a.b": "new value"}, - expectedOutput: {_id: 0, a: {b: "new value"}} + expectedOutput: {_id: globalIdCounter, a: {b: "new value"}} }); // One notable difference for computed projections is that they overwrite scalars rather than // ignoring or removing them. testInputOutput({ - input: {_id: 0, a: ["scalar", {b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + input: {_id: ++globalIdCounter, a: ["scalar", {b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": "new value"}, - expectedOutput: - {_id: 0, a: [{b: "new value"}, {b: "new value"}, {b: "new value"}, {b: "new value"}]} + expectedOutput: { + _id: globalIdCounter, + a: [{b: "new value"}, {b: "new value"}, {b: "new value"}, {b: "new value"}] + } }); testInputOutput({ - input: {_id: 0, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + input: {_id: ++globalIdCounter, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": "new value", "a.c": "new C"}, expectedOutput: { - _id: 0, + _id: globalIdCounter, a: [ {b: "new value", c: "new C"}, {b: "new value", c: "new C"}, @@ -515,10 +602,10 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes } }); testInputOutput({ - input: {_id: 0, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, + input: {_id: ++globalIdCounter, a: [{b: 1, c: 1, d: 1}, {b: 2, c: 2}, {b: 3}]}, projection: {"a.b": "new value", "a.c": "new C"}, expectedOutput: { - _id: 0, + _id: globalIdCounter, a: [ {b: "new value", c: "new C"}, {b: "new value", c: "new C"}, @@ -527,30 +614,44 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes } }); testInputOutput({ - input: {_id: 0, a: []}, + input: {_id: ++globalIdCounter, a: []}, projection: {"a.b": "new value"}, - expectedOutput: {_id: 0, a: []} + expectedOutput: {_id: globalIdCounter, a: []} }); // Computed projections will traverse through double arrays and preserve structure. For example, // we preserve two brackets inside the existing 'b: [[1,2]]' and leave the empty array in 'a' // untouched rather than replace it with {b: {c: "new value"}}. testInputOutput({ - input: {_id: 0, a: [[], [{b: [[1, 2], {c: 1, d: 1}]}]]}, + input: {_id: ++globalIdCounter, a: [[], [{b: [[1, 2], {c: 1, d: 1}]}]]}, projection: {"a.b.c": "new value"}, - expectedOutput: - {_id: 0, a: [[], [{b: [[{c: "new value"}, {c: "new value"}], {c: "new value"}]}]]} + expectedOutput: { + _id: globalIdCounter, + a: [[], [{b: [[{c: "new value"}, {c: "new value"}], {c: "new value"}]}]] + } }); }()); // Test some miscellaneous properties of projections. (function testMiscellaneousProjections() { // Test including and excluding _id only. - testInputOutput({input: {_id: 0}, projection: {_id: 1}, expectedOutput: {_id: 0}}); - testInputOutput({input: {_id: 0}, projection: {_id: 0}, expectedOutput: {}}); testInputOutput({ - input: {_id: 0, a: 1, b: 1}, + input: {_id: ++globalIdCounter}, + projection: {_id: 1}, + expectedOutput: {_id: globalIdCounter} + }); + testInputOutput({ + input: {_id: ++globalIdCounter}, + projection: {_id: 0}, + expectedOutput: {}, + // Column store indexes do not support exclusion projections. + excludeColumnStore: true, + }); + testInputOutput({ + input: {_id: ++globalIdCounter, a: 1, b: 1}, projection: {_id: 0}, expectedOutput: {a: 1, b: 1}, + // Column store indexes do not support exclusion projections. + excludeColumnStore: true, }); // Test that you can specify nested paths with dots or as sub-objects and it'll mean the @@ -572,6 +673,8 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes input: {measurements: {temperature: 20, pressure: 0.7, humidity: 0.4, time: new Date()}}, projection: {measurements: {humidity: 0, time: 0}, _id: 0}, expectedOutput: {measurements: {temperature: 20, pressure: 0.7}}, + // Column store indexes do not support exclusion projections. + excludeColumnStore: true, }); }()); }()); diff --git a/jstests/noPassthroughWithMongod/column_index_skeleton.js b/jstests/noPassthroughWithMongod/column_index_skeleton.js deleted file mode 100644 index 368b9c7b3bc..00000000000 --- a/jstests/noPassthroughWithMongod/column_index_skeleton.js +++ /dev/null @@ -1,556 +0,0 @@ -/** - * Testing of just the query layer's integration for columnar index. - * This test is intended to be temporary. - */ -(function() { -"use strict"; - -load("jstests/libs/fail_point_util.js"); -load("jstests/libs/analyze_plan.js"); -load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. - -const isSBEEnabled = checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); - -if (!isSBEEnabled) { - // This test is only relevant when SBE is enabled. - jsTestLog("Skipping columnstore test since either SBE or columnstore are disabled."); - return; -} - -const testDB = db; -const coll = db.column_index_skeleton; -coll.drop(); -assert.commandWorked(coll.createIndex({"$**": "columnstore"})); - -const docs = [ - { - - }, - {"a": null}, - {"a": "scalar"}, - { - "a": { - - } - }, - {"a": {"x": 1, "b": "scalar"}}, - { - "a": { - "b": { - - } - } - }, - { - "a": { - "x": 1, - "b": { - - } - } - }, - {"a": {"x": 1, "b": {"x": 1}}}, - {"a": {"b": {"c": "scalar"}}}, - {"a": {"b": {"c": null}}}, - { - "a": { - "b": { - "c": [ - [1, 2], - [{ - - }], - 2 - ] - } - } - }, - {"a": {"x": 1, "b": {"x": 1, "c": ["scalar"]}}}, - {"a": {"x": 1, "b": {"c": {"x": 1}}}}, - {"a": {"b": []}}, - {"a": {"b": [null]}}, - {"a": {"b": ["scalar"]}}, - {"a": {"b": [[]]}}, - { - "a": { - "b": [ - 1, - { - - }, - 2 - ] - } - }, - { - "a": { - "b": [ - [1, 2], - [{ - - }], - 2 - ] - } - }, - { - "a": { - "x": 1, - "b": [ - [1, 2], - [{ - - }], - 2 - ] - } - }, - {"a": {"b": [{"c": "scalar"}]}}, - {"a": {"b": [{"c": "scalar"}, {"c": "scalar2"}]}}, - { - "a": { - "b": [{ - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }] - } - }, - {"a": {"b": [1, {"c": "scalar"}, 2]}}, - { - "a": { - "b": [ - 1, - { - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }, - 2 - ] - } - }, - { - "a": { - "x": 1, - "b": [ - 1, - { - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }, - 2 - ] - } - }, - {"a": {"b": [[1, 2], [{"c": "scalar"}], 2]}}, - { - "a": { - "b": [ - [1, 2], - [{ - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }], - 2 - ] - } - }, - { - "a": { - "x": 1, - "b": [ - [1, 2], - [{ - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }], - 2 - ] - } - }, - {"a": []}, - {"a": [null]}, - {"a": ["scalar"]}, - {"a": [[]]}, - { - "a": [{ - - }] - }, - { - "a": [ - 1, - { - - }, - 2 - ] - }, - { - "a": [ - [1, 2], - [{ - - }], - 2 - ] - }, - {"a": [{"b": "scalar"}]}, - {"a": [{"b": null}]}, - {"a": [1, {"b": "scalar"}, 2]}, - {"a": [1, {"b": []}, 2]}, - {"a": [1, {"b": [null]}, 2]}, - {"a": [1, {"b": ["scalar"]}, 2]}, - {"a": [1, {"b": [[]]}, 2]}, - {"a": [{"b": []}]}, - {"a": [{"b": ["scalar"]}]}, - {"a": [{"b": [[]]}]}, - { - "a": [{ - "b": { - - } - }] - }, - {"a": [{"b": {"c": "scalar"}}]}, - { - "a": [{ - "b": { - "c": [ - [1, 2], - [{ - - }], - 2 - ] - } - }] - }, - {"a": [{"b": {"x": 1}}]}, - {"a": [{"b": {"x": 1, "c": "scalar"}}]}, - {"a": [{"b": [{"c": "scalar"}]}]}, - {"a": [{"b": [{"c": ["scalar"]}]}]}, - {"a": [{"b": [1, {"c": ["scalar"]}, 2]}]}, - { - "a": [{ - "b": [{ - - }] - }] - }, - { - "a": [{ - "b": [ - [1, 2], - [{ - - }], - 2 - ] - }] - }, - {"a": [{"b": [[1, 2], [{"c": "scalar"}], 2]}]}, - {"a": [{"b": [[1, 2], [{"c": ["scalar"]}], 2]}]}, - { - "a": [ - 1, - { - "b": { - - } - }, - 2 - ] - }, - {"a": [1, {"b": {"c": "scalar"}}, 2]}, - {"a": [1, {"b": {"c": {"x": 1}}}, 2]}, - { - "a": [ - 1, - { - "b": { - "c": [ - 1, - { - - }, - 2 - ] - } - }, - 2 - ] - }, - {"a": [1, {"b": {"x": 1}}, 2]}, - {"a": [1, {"b": {"x": 1, "c": "scalar"}}, 2]}, - {"a": [1, {"b": {"x": 1, "c": [[]]}}, 2]}, - { - "a": [ - 1, - { - "b": { - "x": 1, - "c": [ - 1, - { - - }, - 2 - ] - } - }, - 2 - ] - }, - { - "a": [ - 1, - { - "b": [{ - - }] - }, - 2 - ] - }, - {"a": [1, {"b": [{"c": "scalar"}]}, 2]}, - {"a": [1, {"b": [{"c": {"x": 1}}]}, 2]}, - { - "a": [ - 1, - { - "b": [{ - "c": [ - 1, - { - - }, - 2 - ] - }] - }, - 2 - ] - }, - { - "a": [ - 1, - { - "b": [ - 1, - { - - }, - 2 - ] - }, - 2 - ] - }, - {"a": [1, {"b": [1, {"c": null}, 2]}, 2]}, - {"a": [1, {"b": [1, {"c": "scalar"}, 2]}, 2]}, - { - "a": [ - 1, - { - "b": [ - 1, - { - "c": [ - 1, - { - - }, - 2 - ] - }, - 2 - ] - }, - 2 - ] - }, - { - "a": [ - 1, - { - "b": [ - [1, 2], - [{ - - }], - 2 - ] - }, - 2 - ] - }, - {"a": [1, {"b": [[1, 2], [{"c": "scalar"}], 2]}, 2]}, - { - "a": [ - 1, - { - "b": [ - [1, 2], - [{ - "c": [ - 1, - { - - }, - 2 - ] - }], - 2 - ] - }, - 2 - ] - }, - {"a": [[1, 2], [{"b": "scalar"}], 2]}, - {"a": [[1, 2], [{"b": {"x": 1, "c": "scalar"}}], 2]}, - { - "a": [ - [1, 2], - [{ - "b": { - "x": 1, - "c": [ - 1, - { - - }, - 2 - ] - } - }], - 2 - ] - }, - {"a": [[1, 2], [{"b": []}], 2]}, - {"a": [[1, 2], [{"b": [1, {"c": "scalar"}, 2]}], 2]}, - {"a": [[1, 2], [{"b": [[1, 2], [{"c": "scalar"}], 2]}], 2]}, - { - "a": [ - [1, 2], - [{ - "b": [ - [1, 2], - [{ - "c": [ - [1, 2], - [{ - - }], - 2 - ] - }], - 2 - ] - }], - 2 - ] - }, - { - "a": [{ - "b": [ - {"c": 1}, - { - - } - ] - }] - }, - {"a": [{"b": [{"c": 1}, {"d": 1}]}]}, - { - "a": [ - {"b": {"c": 1}}, - { - "b": { - - } - } - ] - }, - {"a": [{"b": {"c": 1}}, {"b": {"d": 1}}]}, - { - "a": [ - {"b": {"c": 1}}, - { - - } - ] - }, - {"a": [{"b": {"c": 1}}, {"b": null}]}, - {"a": [{"b": {"c": 1}}, {"b": []}]}, - {"a": [{"b": []}, {"b": []}]}, - - {a: {b: [{c: [1, 2]}]}}, - {a: {b: {c: [1, 2]}}}, - {a: [[1, 2], [{b: [[1, 2], [{c: [[1, 2], [{}], 2]}], 2]}], 2]}, -]; - -let docNum = 0; -let bulk = coll.initializeUnorderedBulkOp(); -for (let doc of docs) { - // Intentionally not using _id as the unique identifier, to avoid getting IDHACK plans when we - // query by it. - let numObj = {num: docNum++}; - let insertObj = {}; - Object.assign(insertObj, numObj, doc); - if (docNum % 2 == 0) { - insertObj.optionalField = "foo"; - } - bulk.insert(insertObj); -} -bulk.execute(); - -const kProjection = { - _id: 0, - "a.b.c": 1, - num: 1, - optionalField: 1 -}; - -// Run an explain. -const expl = coll.find({}, kProjection).explain(); -assert(planHasStage(db, expl, "COLUMN_SCAN"), expl); - -// Run a query getting all of the results using the column index. -let results = coll.find({}, kProjection).toArray(); -assert.gt(results.length, 0); - -for (let res of results) { - const trueResult = coll.find({num: res.num}, kProjection).hint({$natural: 1}).toArray()[0]; - const originalDoc = coll.findOne({num: res.num}); - assert.eq(res, trueResult, originalDoc); - print("Test passed for " + res.num); -} - -// Drop the collection so that validation doesn't happen :) -coll.drop(); -})(); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index b2292c0dc44..9025c6b4cbe 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -325,7 +325,7 @@ void fillOutPlannerParams(OperationContext* opCtx, canonicalQuery, collection, plannerParams->indices, - plannerParams->columnarIndexes); + plannerParams->columnStoreIndexes); // If query supports index filters, filter params.indices by indices in query settings. // Ignore index filters when it is possible to use the id-hack. diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 83cc6617d44..9f33ae1c38b 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -44,12 +44,14 @@ #include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" +#include "mongo/db/pipeline/dependencies.h" #include "mongo/db/pipeline/document_source_group.h" #include "mongo/db/pipeline/document_source_lookup.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/classic_plan_cache.h" #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/collation/collator_interface.h" +#include "mongo/db/query/index_entry.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/query/plan_cache.h" #include "mongo/db/query/plan_enumerator.h" @@ -61,6 +63,7 @@ #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/db/query/query_solution.h" +#include "mongo/db/query/util/set_util.h" #include "mongo/logv2/log.h" #include "mongo/util/assert_util_core.h" @@ -140,9 +143,28 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData, } /** - * Returns whether the hintedIndex matches the cluster key. When hinting by index name, - * 'hintObj' takes the shape of {$hint: <indexName>}. When hinting by key pattern, - * 'hintObj' represents the actual key pattern (eg: {_id: 1}). + * Returns whether the hint matches the given index. When hinting by index name, 'hintObj' takes the + * shape of {$hint: <indexName>}. When hinting by key pattern, 'hintObj' represents the actual key + * pattern (eg: {_id: 1}). + */ +bool hintMatchesNameOrPattern(const BSONObj& hintObj, + StringData indexName, + BSONObj indexKeyPattern) { + + BSONElement firstHintElt = hintObj.firstElement(); + if (firstHintElt.fieldNameStringData() == "$hint"_sd && + firstHintElt.type() == BSONType::String) { + // An index name is provided by the hint. + return indexName == firstHintElt.valueStringData(); + ; + } + + // An index spec is provided by the hint. + return hintObj.woCompare(indexKeyPattern) == 0; +} + +/** + * Returns whether the hintedIndex matches the cluster key. */ bool hintMatchesClusterKey(const boost::optional<ClusteredCollectionInfo>& clusteredInfo, const BSONObj& hintObj) { @@ -153,23 +175,24 @@ bool hintMatchesClusterKey(const boost::optional<ClusteredCollectionInfo>& clust auto clusteredIndexSpec = clusteredInfo->getIndexSpec(); - BSONElement firstHintElt = hintObj.firstElement(); - if (firstHintElt.fieldNameStringData() == "$hint"_sd && - firstHintElt.type() == BSONType::String) { - // An index name is provided by the hint. - - // The clusteredIndex's name should always be filled in with a default value when not - // specified upon creation. - tassert(6012100, - "clusteredIndex's 'name' field should be filled in by default after creation", - clusteredIndexSpec.getName()); - - auto hintName = firstHintElt.valueStringData(); - return hintName == clusteredIndexSpec.getName().value(); - } + // The clusteredIndex's name should always be filled in with a default value when not + // specified upon creation. + tassert(6012100, + "clusteredIndex's 'ne' field should be filled in by default after creation", + clusteredIndexSpec.getName()); + return hintMatchesNameOrPattern( + hintObj, clusteredIndexSpec.getName().value(), clusteredIndexSpec.getKey()); +} - // An index spec is provided by the hint. - return hintObj.woCompare(clusteredIndexSpec.getKey()) == 0; +/** + * Returns whether the hintedIndex matches the columnstore index. + */ +bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry& columnStoreIndex) { + // TODO SERVER-68400: Should be possible to have some other keypattern. + return hintMatchesNameOrPattern(hintObj, + columnStoreIndex.catalogName, + BSON("$**" + << "columnstore")); } /** @@ -200,22 +223,101 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params return {std::move(filterDeps), std::move(outputDeps)}; } -void tryToAddColumnScan(const QueryPlannerParams& params, - const CanonicalQuery& query, - std::vector<std::unique_ptr<QuerySolution>>& out) { - if (params.columnarIndexes.empty()) { - return; +Status columnScanIsPossibleStatus(const CanonicalQuery& query, const QueryPlannerParams& params) { + if (params.columnStoreIndexes.empty()) { + return {ErrorCodes::InvalidOptions, "No columnstore indexes available"}; + } + if (!query.isSbeCompatible()) { + return {ErrorCodes::NotImplemented, + "A columnstore index can only be used with queries in the SBE engine. The given " + "query is not eligible for this engine (yet)"}; + } + if (query.getForceClassicEngine()) { + return {ErrorCodes::InvalidOptions, + "A columnstore index can only be used with queries in the SBE engine, but the " + "query specified to force the classic engine"}; + } + return Status::OK(); +} + +bool columnScanIsPossible(const CanonicalQuery& query, const QueryPlannerParams& params) { + return columnScanIsPossibleStatus(query, params).isOK(); +} + +std::unique_ptr<QuerySolution> makeColumnScanPlan( + const CanonicalQuery& query, + const QueryPlannerParams& params, + const ColumnIndexEntry& columnStoreIndex, + DepsTracker filterDeps, + DepsTracker outputDeps, + StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn, + std::unique_ptr<MatchExpression> residualPredicate) { + dassert(columnScanIsPossible(query, params)); + + // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. + return QueryPlannerAnalysis::analyzeDataAccess( + query, + params, + std::make_unique<ColumnIndexScanNode>(columnStoreIndex, + std::move(outputDeps.fields), + std::move(filterDeps.fields), + std::move(filterSplitByColumn), + std::move(residualPredicate))); +} + +/** + * A helper function which applies a heuristic to determine if a COLUMN_SCAN plan would examine few + * enough fields to be considered faster than a COLLSCAN. + */ +Status checkFieldLimits(const OrderedPathSet& filterDeps, + const OrderedPathSet& outputDeps, + const StringMap<std::unique_ptr<MatchExpression>>& filterSplitByColumn) { + + const int nReferencedFields = + static_cast<int>(set_util::setUnion(filterDeps, outputDeps).size()); + const int maxNumFields = filterSplitByColumn.size() > 0 + ? internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.load() + : internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.load(); + if (nReferencedFields > maxNumFields) { + return Status{ErrorCodes::Error{6430508}, + str::stream() << "referenced too many fields. nReferenced=" + << nReferencedFields << ", limit=" << maxNumFields}; + } + return Status::OK(); +} +/** + * Attempts to build a plan using a columnstore index. Returns a non-OK status if it can't build + * one + * - with the code and message indicating the problem - or a QuerySolution if it can. + */ +StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan( + const QueryPlannerParams& params, + const CanonicalQuery& query, + const boost::optional<ColumnIndexEntry>& hintedIndex = boost::none) { + if (auto status = columnScanIsPossibleStatus(query, params); !status.isOK()) { + return status; + } + + invariant(params.columnStoreIndexes.size() >= 1); + const auto& columnStoreIndex = hintedIndex.value_or(params.columnStoreIndexes.front()); + if (!hintedIndex && params.columnStoreIndexes.size() > 1) { + // TODO SERVER-67140 only warnn if there is more than one index that is actually eligible + // for use. + LOGV2_DEBUG(6298500, + 2, + "Multiple column store indexes present. Selecting the first " + "one arbitrarily", + "indexName"_attr = columnStoreIndex.catalogName); } - invariant(params.columnarIndexes.size() == 1); auto [filterDeps, outputDeps] = computeDeps(params, query); if (filterDeps.needWholeDocument || outputDeps.needWholeDocument) { // We only want to use the columnar index if we can avoid fetching the whole document. - return; - } - if (!query.isSbeCompatible() || query.getForceClassicEngine()) { - // We only support column scans in SBE. - return; + // TODO SERVER-66284 Would like to enable a plan when hinted, even if we need the whole + // document. Something like COLUMN_SCAN -> FETCH. + return {ErrorCodes::Error{6298501}, + "cannot use columnstore index because the query requires seeing the entire " + "document"}; } // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. @@ -227,31 +329,21 @@ void tryToAddColumnScan(const QueryPlannerParams& params, } else { residualPredicate = query.root()->shallowClone(); } - const bool canPushFilters = filterSplitByColumn.size() > 0; - - auto columnScan = std::make_unique<ColumnIndexScanNode>(params.columnarIndexes.front(), - std::move(outputDeps.fields), - std::move(filterDeps.fields), - std::move(filterSplitByColumn), - std::move(residualPredicate)); - - const int nReferencedFields = static_cast<int>(columnScan->allFields.size()); - const int maxNumFields = canPushFilters - ? internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.load() - : internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.load(); - if (nReferencedFields > maxNumFields) { - LOGV2_DEBUG(6430508, - 5, - "Opting out of column scan plan due to too many referenced fields", - "nReferencedFields"_attr = nReferencedFields, - "maxNumFields"_attr = maxNumFields, - "canPushFilters"_attr = canPushFilters); - return; + auto fieldLimitStatus = + checkFieldLimits(filterDeps.fields, outputDeps.fields, filterSplitByColumn); + + if (fieldLimitStatus.isOK() || hintedIndex) { + // We have a hint, or few enough dependencies that we suspect a column scan is still + // better than a collection scan. Build it and return it. + return makeColumnScanPlan(query, + params, + columnStoreIndex, + std::move(filterDeps), + std::move(outputDeps), + std::move(filterSplitByColumn), + std::move(residualPredicate)); } - - // We have few enough dependencies that we suspect a column scan is still better than a - // collection scan. Add that solution. - out.push_back(QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(columnScan))); + return Status{ErrorCodes::Error{6298502}, "columnstore index is not applicable for this query"}; } } // namespace @@ -259,7 +351,6 @@ using std::numeric_limits; using std::unique_ptr; namespace dps = ::mongo::dotted_path_support; - // Copied verbatim from db/index.h static bool isIdIndex(const BSONObj& pattern) { BSONObjIterator i(pattern); @@ -741,6 +832,94 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache( return {std::move(soln)}; } +/** + * For some reason this type is hard to construct inline and keep the compiler happy. Convenience + * helper to do so since we do it a couple times. + */ +StatusWith<std::vector<std::unique_ptr<QuerySolution>>> singleSolution( + std::unique_ptr<QuerySolution> soln) { + std::vector<std::unique_ptr<QuerySolution>> out; + out.push_back(std::move(soln)); + return {std::move(out)}; +} + +bool canTableScan(const QueryPlannerParams& params) { + return !(params.options & QueryPlannerParams::NO_TABLE_SCAN); +} + +StatusWith<std::vector<std::unique_ptr<QuerySolution>>> attemptCollectionScan( + const CanonicalQuery& query, bool isTailable, const QueryPlannerParams& params) { + if (!canTableScan(params)) { + return Status(ErrorCodes::NoQueryExecutionPlans, + "not allowed to output a collection scan because 'notablescan' is enabled"); + } + if (auto soln = buildCollscanSoln(query, isTailable, params)) { + return singleSolution(std::move(soln)); + } + return Status(ErrorCodes::NoQueryExecutionPlans, "Failed to build collection scan soln"); +} + +StatusWith<std::vector<std::unique_ptr<QuerySolution>>> handleNaturalHint( + const CanonicalQuery& query, + const QueryPlannerParams& params, + BSONElement naturalHint, + bool isTailable) { + // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect any + // $natural sort to have been normalized to a $natural hint upstream. Additionally, if + // the hint matches the collection's cluster key, we also output a collscan utilizing + // the cluster key. + + // Perform validation specific to $natural. + LOGV2_DEBUG(20969, 5, "Forcing a table scan due to hinted $natural"); + if (!query.getFindCommandRequest().getMin().isEmpty() || + !query.getFindCommandRequest().getMax().isEmpty()) { + return Status(ErrorCodes::NoQueryExecutionPlans, + "min and max are incompatible with $natural"); + } + auto result = attemptCollectionScan(query, isTailable, params); + if (result.isOK()) { + return result; + } + return result.getStatus().withContext("could not force a collection scan with a $natural hint"); +} + +StatusWith<std::vector<std::unique_ptr<QuerySolution>>> handleClusteredScanHint( + const CanonicalQuery& query, const QueryPlannerParams& params, bool isTailable) { + // Perform validation specific to hinting on a cluster key. + BSONObj minObj = query.getFindCommandRequest().getMin(); + BSONObj maxObj = query.getFindCommandRequest().getMax(); + + const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey(); + + // Check if the query collator is compatible with the collection collator for the + // provided min and max values. + if ((!minObj.isEmpty() && + !indexCompatibleMaxMin( + minObj, query.getCollator(), params.clusteredCollectionCollator, clusterKey)) || + (!maxObj.isEmpty() && + !indexCompatibleMaxMin( + maxObj, query.getCollator(), params.clusteredCollectionCollator, clusterKey))) { + return Status(ErrorCodes::Error(6137400), + "The clustered index is not compatible with the values provided " + "for min/max due to the query collation"); + } + + auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() { + if (collator) { + auto min = stripFieldNamesAndApplyCollation(minObj, collator); + auto max = stripFieldNamesAndApplyCollation(maxObj, collator); + return min.woCompare(max) < 0; + } else { + return minObj.woCompare(maxObj) < 0; + } + }; + if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) { + return Status(ErrorCodes::Error(6137401), "max() must be greater than min()"); + } + return attemptCollectionScan(query, isTailable, params); +} + + StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( const CanonicalQuery& query, const QueryPlannerParams& params) { // It's a little silly to ask for a count and for owned data. This could indicate a bug @@ -764,106 +943,50 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( "index"_attr = params.indices[i].toString()); } - const bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); const bool isTailable = query.getFindCommandRequest().getTailable(); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. if (isTailable) { - if (!canTableScan) { - return Status( - ErrorCodes::NoQueryExecutionPlans, - "Running with 'notablescan', so tailable cursors (which always do a table " - "scan) are not allowed"); + auto collScanResult = attemptCollectionScan(query, isTailable, params); + if (collScanResult.isOK()) { + return collScanResult; } - auto soln = buildCollscanSoln(query, isTailable, params); - if (!soln) { - return Status(ErrorCodes::NoQueryExecutionPlans, - "Failed to build collection scan soln"); + return collScanResult.getStatus().withContext( + "query is tailable so must do a collection scan"); + } + + // Hints require us to only consider the hinted index. If index filters in the query settings + // were used to override the allowed indices for planning, we should not use the hinted index + // requested in the query. + boost::optional<BSONObj> hintedIndexBson; + if (!params.indexFiltersApplied) { + if (auto hintObj = query.getFindCommandRequest().getHint(); !hintObj.isEmpty()) { + hintedIndexBson = hintObj; } - std::vector<std::unique_ptr<QuerySolution>> out; - out.push_back(std::move(soln)); - return {std::move(out)}; - } - - if (!query.getFindCommandRequest().getHint().isEmpty()) { - const BSONObj& hintObj = query.getFindCommandRequest().getHint(); - const auto naturalHint = hintObj[query_request_helper::kNaturalSortField]; - if (naturalHint || hintMatchesClusterKey(params.clusteredInfo, hintObj)) { - // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect - // any $natural sort to have been normalized to a $natural hint upstream. - // Additionally, if the hint matches the collection's cluster key, we also output a - // collscan utilizing the cluster key. - - if (naturalHint) { - // Perform validation specific to $natural. - LOGV2_DEBUG(20969, 5, "Forcing a table scan due to hinted $natural"); - if (!canTableScan) { - return Status(ErrorCodes::NoQueryExecutionPlans, - "hint $natural is not allowed, because 'notablescan' is enabled"); - } - if (!query.getFindCommandRequest().getMin().isEmpty() || - !query.getFindCommandRequest().getMax().isEmpty()) { - return Status(ErrorCodes::NoQueryExecutionPlans, - "min and max are incompatible with $natural"); - } - } else { - // Perform validation specific to hinting on a cluster key. - BSONObj minObj = query.getFindCommandRequest().getMin(); - BSONObj maxObj = query.getFindCommandRequest().getMax(); - - const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey(); - - // Check if the query collator is compatible with the collection collator for - // the provided min and max values. - if ((!minObj.isEmpty() && - !indexCompatibleMaxMin(minObj, - query.getCollator(), - params.clusteredCollectionCollator, - clusterKey)) || - (!maxObj.isEmpty() && - !indexCompatibleMaxMin(maxObj, - query.getCollator(), - params.clusteredCollectionCollator, - clusterKey))) { - return Status(ErrorCodes::Error(6137400), - "The clustered index is not compatible with the values provided " - "for min/max due to the query collation"); - } + } - auto wellSorted = [&minObj, &maxObj, collator = query.getCollator()]() { - if (collator) { - auto min = stripFieldNamesAndApplyCollation(minObj, collator); - auto max = stripFieldNamesAndApplyCollation(maxObj, collator); - return min.woCompare(max) < 0; - } else { - return minObj.woCompare(maxObj) < 0; + if (hintedIndexBson) { + // If we have a hint, check if it matches any "special" index before proceeding. + const auto& hintObj = *hintedIndexBson; + if (const auto naturalHint = hintObj[query_request_helper::kNaturalSortField]) { + return handleNaturalHint(query, params, naturalHint, isTailable); + } else if (hintMatchesClusterKey(params.clusteredInfo, hintObj)) { + return handleClusteredScanHint(query, params, isTailable); + } else { + for (auto&& columnIndex : params.columnStoreIndexes) { + if (hintMatchesColumnStoreIndex(hintObj, columnIndex)) { + // Hint matches - either build the plan or fail. + auto statusWithSoln = tryToBuildColumnScan(params, query, columnIndex); + if (!statusWithSoln.isOK()) { + return statusWithSoln.getStatus(); } - }; - if (!minObj.isEmpty() && !maxObj.isEmpty() && !wellSorted()) { - return Status(ErrorCodes::Error(6137401), "max() must be greater than min()"); + return singleSolution(std::move(statusWithSoln.getValue())); } } - - auto soln = buildCollscanSoln(query, isTailable, params); - if (!soln) { - return Status(ErrorCodes::NoQueryExecutionPlans, - "Failed to build collection scan soln"); - } - std::vector<std::unique_ptr<QuerySolution>> out; - out.push_back(std::move(soln)); - return {std::move(out)}; } } - // Hints require us to only consider the hinted index. If index filters in the query - // settings were used to override the allowed indices for planning, we should not use the - // hinted index requested in the query. - BSONObj hintedIndex; - if (!params.indexFiltersApplied) { - hintedIndex = query.getFindCommandRequest().getHint(); - } - // Either the list of indices passed in by the caller, or the list of indices filtered // according to the hint. This list is later expanded in order to allow the planner to // handle wildcard indexes. @@ -871,10 +994,10 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( // Will hold a copy of the index entry chosen by the hint. boost::optional<IndexEntry> hintedIndexEntry; - if (hintedIndex.isEmpty()) { + if (!hintedIndexBson) { fullIndexList = params.indices; } else { - fullIndexList = QueryPlannerIXSelect::findIndexesByHint(hintedIndex, params.indices); + fullIndexList = QueryPlannerIXSelect::findIndexesByHint(*hintedIndexBson, params.indices); if (fullIndexList.empty()) { return Status(ErrorCodes::BadValue, @@ -964,9 +1087,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( return Status(ErrorCodes::NoQueryExecutionPlans, "Sort and covering analysis failed while planning hint/min/max query"); } - std::vector<std::unique_ptr<QuerySolution>> out; - out.push_back(std::move(soln)); - return {std::move(out)}; + return singleSolution(std::move(soln)); } for (size_t i = 0; i < relevantIndices.size(); ++i) { @@ -1148,7 +1269,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( // desired behavior when an index is hinted that is not relevant to the query. In the case // that // $** index is hinted, we do not want this behavior. - if (!hintedIndex.isEmpty() && relevantIndices.size() == 1) { + if (hintedIndexBson && relevantIndices.size() == 1) { if (out.size() > 0) { return {std::move(out)}; } @@ -1159,15 +1280,12 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( } // Return hinted index solution if found. - auto soln = buildWholeIXSoln(relevantIndices.front(), query, params); - if (!soln) { - return Status(ErrorCodes::NoQueryExecutionPlans, - "Failed to build whole-index solution for $hint"); + if (auto soln = buildWholeIXSoln(relevantIndices.front(), query, params)) { + LOGV2_DEBUG(20980, 5, "Planner: outputting soln that uses hinted index as scan"); + return singleSolution(std::move(soln)); } - LOGV2_DEBUG(20980, 5, "Planner: outputting soln that uses hinted index as scan"); - std::vector<std::unique_ptr<QuerySolution>> out; - out.push_back(std::move(soln)); - return {std::move(out)}; + return Status(ErrorCodes::NoQueryExecutionPlans, + "Failed to build whole-index solution for $hint"); } // If a sort order is requested, there may be an index that provides it, even if that @@ -1300,7 +1418,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( if (params.options & QueryPlannerParams::GENERATE_COVERED_IXSCANS && out.size() == 0 && query.getQueryObj().isEmpty() && projection && !projection->requiresDocument()) { - const auto* indicesToConsider = hintedIndex.isEmpty() ? &fullIndexList : &relevantIndices; + const auto* indicesToConsider = hintedIndexBson ? &relevantIndices : &fullIndexList; for (auto&& index : *indicesToConsider) { if (index.type != INDEX_BTREE || index.multikey || index.sparse || index.filterExpr || !CollatorInterface::collatorsMatch(index.collator, query.getCollator())) { @@ -1330,7 +1448,9 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( // Check whether we're eligible to use the columnar index, assuming no other indexes can be // used. if (out.empty()) { - tryToAddColumnScan(params, query, out); + if (auto statusWithSoln = tryToBuildColumnScan(params, query); statusWithSoln.isOK()) { + out.emplace_back(std::move(statusWithSoln.getValue())); + } } // The caller can explicitly ask for a collscan. @@ -1338,7 +1458,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( // No indexed plans? We must provide a collscan if possible or else we can't run the query. bool collScanRequired = 0 == out.size(); - if (collScanRequired && !canTableScan) { + if (collScanRequired && !canTableScan(params)) { return Status(ErrorCodes::NoQueryExecutionPlans, "No indexed plans available, and running with 'notablescan'"); } @@ -1347,7 +1467,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( // Also, if a hint is specified it indicates that we MUST use it. bool possibleToCollscan = !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && - !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintedIndex.isEmpty(); + !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && !hintedIndexBson; if (collScanRequired && !possibleToCollscan) { return Status(ErrorCodes::NoQueryExecutionPlans, "No query solutions"); } diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp index aeb3f26b69f..ab52067bf66 100644 --- a/src/mongo/db/query/query_planner_columnar_test.cpp +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -68,8 +68,8 @@ protected: kInternalQueryMaxNumberOfFieldsToChooseFilteredColumnScanDefault); } - void addColumnarIndexAndEnableFilterSplitting() { - params.columnarIndexes.emplace_back(kIndexName); + void addColumnStoreIndexAndEnableFilterSplitting(StringData indexName = kIndexName) { + params.columnStoreIndexes.emplace_back(indexName.toString()); params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS; } @@ -88,8 +88,8 @@ private: RAIIServerParameterControllerForTest _controllerSBE{"internalQueryForceClassicEngine", false}; }; -TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "_id" << 0)); @@ -103,8 +103,8 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) { })"); } -TEST_F(QueryPlannerColumnarTest, ComputedProjectionUsesColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ComputedProjectionUsesColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj( BSON("a" << BSON("$gt" << 3)), @@ -126,8 +126,8 @@ TEST_F(QueryPlannerColumnarTest, ComputedProjectionUsesColumnarIndex) { })"); } -TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), fromjson(R"({ a: 1, @@ -152,7 +152,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1)); @@ -166,8 +166,8 @@ TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields })"); } -TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAndBlockingSort) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnStoreIndexAndBlockingSort) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSON("a" << 1), BSON("a" << 1 << "_id" << 0)); @@ -182,7 +182,7 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAnd } TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSON("b" << 1), BSON("a" << 1 << "_id" << 0)); @@ -201,24 +201,24 @@ TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan) })"); } -TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 0 << "_id" << 0)); assertNumSolutions(1U); assertSolutionExists(R"({proj: {spec: {a: 0, _id: 0}, node: {cscan: {dir: 1}}}})"); } -TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << 1), BSONObj(), BSONObj()); assertNumSolutions(1U); assertSolutionExists(R"({cscan: {dir: 1, filter: {a: {$eq: 1}}}})"); } -TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1)); @@ -226,8 +226,8 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIn assertSolutionExists(R"({proj: {spec: {a: 1, b: 1, c: 1}, node: {cscan: {dir: 1}}}})"); } -TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); // This will need 3 fields for the $concat, so should not be able to use a column scan. @@ -239,7 +239,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUse // Test with a number of fields equal to the limit. TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj( @@ -248,8 +248,8 @@ TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) { assertSolutionExists(R"({proj: {spec: {a: 1, b: 1}, node: {cscan: {dir: 1}}}})"); } -TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); // Without the '_id' this should be eligible. @@ -258,8 +258,8 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIn assertSolutionExists("{column_scan: {outputFields: ['a', 'b']}}"); } -TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << BSON("c" << 1 << "d" << 1))); @@ -268,8 +268,8 @@ TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnar } TEST_F(QueryPlannerColumnarTest, - ProjectionWithTooManyFieldsDoesNotUseColumnarIndexUnsupportedPredicate) { - addColumnarIndexAndEnableFilterSplitting(); + ProjectionWithTooManyFieldsDoesNotUseColumnStoreIndexUnsupportedPredicate) { + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSON("unsupported" << BSON("$exists" << false)), @@ -279,8 +279,87 @@ TEST_F(QueryPlannerColumnarTest, assertSolutionExists(R"({proj: {spec: {a: 1, b: 1, c: 1}, node: {cscan: {dir: 1}}}})"); } -TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, HintOverridesFieldLimitUnfiltered) { + addColumnStoreIndexAndEnableFilterSplitting(); + internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); + runQuerySortProjSkipLimitHint(BSONObj(), + BSONObj(), + BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1), + 0, + 0, + BSON("$**" + << "columnstore")); + assertNumSolutions(1U); + assertSolutionExists(R"({column_scan: {outputFields: ["a", "b", "c", "d", "_id"]}})"); +} + +TEST_F(QueryPlannerColumnarTest, HintOverridesFieldLimitFiltered) { + addColumnStoreIndexAndEnableFilterSplitting(); + internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); + runQuerySortProjSkipLimitHint(BSON("a" + << "selective"), + BSONObj(), + BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1), + 0, + 0, + BSON("$**" + << "columnstore")); + assertNumSolutions(1U); + assertSolutionExists(R"({ + column_scan: { + outputFields: ["a", "b", "c", "d", "_id"], + matchFields: ["a"] + } + })"); +} + +// TODO SERVER-66284 Ideally this wouldn't fail. +TEST_F(QueryPlannerColumnarTest, HintFailsWhenFetchIsRequired) { + addColumnStoreIndexAndEnableFilterSplitting(); + internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); + ASSERT_THROWS( + runQuerySortProjSkipLimitHint(BSON("a" + << "selective"), + BSONObj(), + BSONObj(), + 0, + 0, + BSON("$**" + << "columnstore")), + unittest::TestAssertionFailureException); // Expect the test fixture to assert on status. +} + +TEST_F(QueryPlannerColumnarTest, HintCausesErrorIfIndexNotFound) { + // Missing addColumnStoreIndexAndEnableFilterSplitting(); + ASSERT_THROWS( + runQuerySortProjSkipLimitHint(BSON("a" + << "selective"), + BSONObj(), + BSON("a" << 1), + 0, + 0, + BSON("$**" + << "columnstore")), + unittest::TestAssertionFailureException); // Expect the test fixture to assert on status. +} + +TEST_F(QueryPlannerColumnarTest, HintCausesErrorIfQueryDoesNotSupportSBE) { + addColumnStoreIndexAndEnableFilterSplitting(); + setMarkQueriesSbeCompatible(false); + ASSERT_THROWS( + runQuerySortProjSkipLimitHint(BSON("a" + << "selective"), + BSONObj(), + BSONObj(), + 0, + 0, + BSON("$**" + << "columnstore")), + unittest::TestAssertionFailureException); // Expect the test fixture to assert on status. +} + +TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); addIndex(BSON("a" << 1)); runQuerySortProj(BSON("a" << 5), BSONObj(), BSON("a" << 1 << "_id" << 0)); @@ -290,7 +369,7 @@ TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembly) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSONNULL), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -304,8 +383,8 @@ TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembl })"); } -TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) { - addColumnarIndexAndEnableFilterSplitting(); +TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnStoreIndex) { + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -325,7 +404,7 @@ TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) { TEST_F(QueryPlannerColumnarTest, TooManyProjectedFieldsDisqualifiesColumnScanEvenWithEligiblePredicates) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1)); @@ -334,7 +413,7 @@ TEST_F(QueryPlannerColumnarTest, } TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); runQuerySortProj(BSON("a" << 2 << "b" << 3 << "c" << 4), @@ -345,7 +424,7 @@ TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) { } TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColumnScan) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); // The projection only needs 1 field, but the match references '$$ROOT' so needs the whole // document. @@ -357,7 +436,7 @@ TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColum assertSolutionExists(R"({proj: {spec: {b: 1, _id: 0}, node: {cscan: {dir: 1}}}})"); } TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualifiesColumnScan) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); // Neither the match nor the project mentions 4 fields, but together they exceed the threshhold. internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(4); @@ -369,7 +448,7 @@ TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualif } TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); // If there are 3 fields referenced in the match and 3 in the projection, but they overlap, we // should be OK to use column scan. @@ -392,7 +471,7 @@ TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) { })"); } TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); auto complexPredicate = fromjson(R"({ a: {$gte: 0, $lt: 10}, @@ -422,7 +501,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { } TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); // Same predicate as above, except with exists: false, which disqualifies the whole thing. auto complexPredicate = fromjson(R"({ @@ -457,7 +536,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { } TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -466,7 +545,7 @@ TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) { } TEST_F(QueryPlannerColumnarTest, GroupTest) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx); @@ -503,7 +582,7 @@ TEST_F(QueryPlannerColumnarTest, GroupTest) { } TEST_F(QueryPlannerColumnarTest, MatchGroupTest) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx); @@ -547,7 +626,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupTest) { } TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse( {fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}, name: {$first: '$name'}}}")}, expCtx); @@ -589,7 +668,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) { // Test that if a dotted path is requested then we need to add a PROJECTION_DEFAULT stage on top of // the COLUMN_SCAN. TEST_F(QueryPlannerColumnarTest, DottedFieldsRequireProjectionStage) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); runQuerySortProj( BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "b.c" << 1 << "_id" << 0)); @@ -614,7 +693,7 @@ TEST_F(QueryPlannerColumnarTest, DottedFieldsRequireProjectionStage) { // non-ambiguous for field path expressions like in a $group stage, but is not fully correct for a // normal projection. This o TEST_F(QueryPlannerColumnarTest, DottedFieldsWithGroupStageDoesNotRequireProjection) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse( {fromjson("{$group: {_id: '$foo.bar', s: {$sum: '$x.y'}, name: {$first: '$name'}}}")}, @@ -660,7 +739,7 @@ TEST_F(QueryPlannerColumnarTest, DottedFieldsWithGroupStageDoesNotRequireProject } TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1); @@ -689,7 +768,7 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) { } TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) { - addColumnarIndexAndEnableFilterSplitting(); + addColumnStoreIndexAndEnableFilterSplitting(); params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1); @@ -716,8 +795,22 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) { })"); } +TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreIndexes) { + addColumnStoreIndexAndEnableFilterSplitting("first index"_sd); + params.columnStoreIndexes.emplace_back("second index"); + + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertSolutionExists(R"({ + column_scan: { + indexName: 'first index', + outputFields: ['a'], + matchFields: [] + } + })"); +} + TEST_F(QueryPlannerColumnarTest, FullPredicateOption) { - params.columnarIndexes.emplace_back(kIndexName); + params.columnStoreIndexes.emplace_back(kIndexName); // Filter that could be pushed down, but isn't due to the lack of the // GENERATE_PER_COLUMN_FILTER flag. diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 3a7dc82c79f..811c2613c20 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -168,7 +168,7 @@ struct QueryPlannerParams { std::vector<IndexEntry> indices; // Columnar indexes available. - std::vector<ColumnIndexEntry> columnarIndexes; + std::vector<ColumnIndexEntry> columnStoreIndexes; // What's our shard key? If INCLUDE_SHARD_FILTER is set we will create a shard filtering // stage. If we know the shard key, we can perform covering analysis instead of always diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index ce1754ba817..1984edba47b 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -64,6 +64,9 @@ using std::string; Status filterMatches(const BSONObj& testFilter, const MatchExpression* trueFilter, std::unique_ptr<CollatorInterface> collator) { + if (!trueFilter) { + return {ErrorCodes::Error{6298503}, "actual (true) filter was null"}; + } std::unique_ptr<MatchExpression> trueFilterClone(trueFilter->shallowClone()); MatchExpression::sortTree(trueFilterClone.get()); @@ -154,6 +157,22 @@ Status columnIxScanFiltersByPathMatch( return Status::OK(); } +Status indexNamesMatch(BSONElement expectedIndexName, std::string actualIndexName) { + if (expectedIndexName.type() != BSONType::String) { + return {ErrorCodes::Error{5619234}, + str::stream() << "Provided JSON gave a 'ixscan' with a 'name', but the name " + "was not an string: " + << expectedIndexName}; + } + if (expectedIndexName.valueStringData() != actualIndexName) { + return {ErrorCodes::Error{5619235}, + str::stream() << "Provided JSON gave a 'column_scan' with an 'indexName' which did " + "not match. Expected: " + << expectedIndexName << " Found: " << actualIndexName}; + } + return Status::OK(); +} + template <typename Iterable> Status stringSetsMatch(BSONElement expectedStringArrElem, Iterable actualStrings, @@ -521,20 +540,11 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, } } - BSONElement name = ixscanObj["name"]; + auto name = ixscanObj["name"]; if (!name.eoo()) { - if (name.type() != BSONType::String) { - return {ErrorCodes::Error{5619234}, - str::stream() - << "Provided JSON gave a 'ixscan' with a 'name', but the name " - "was not an string: " - << name}; - } - if (name.valueStringData() != ixn->index.identifier.catalogName) { - return {ErrorCodes::Error{5619235}, - str::stream() << "Provided JSON gave a 'ixscan' with a 'name' which did " - "not match. Expected: " - << name << " Found: " << ixn->index.identifier.catalogName}; + if (auto nameStatus = indexNamesMatch(name, ixn->index.identifier.catalogName); + !nameStatus.isOK()) { + return nameStatus; } } @@ -1301,6 +1311,14 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, } auto obj = expectedElem.Obj(); + if (auto indexName = obj["indexName"]) { + if (auto nameStatus = + indexNamesMatch(indexName, actualColumnIxScanNode->indexEntry.catalogName); + !nameStatus.isOK()) { + return nameStatus; + } + } + if (auto outputFields = obj["outputFields"]) { if (auto outputStatus = stringSetsMatch(outputFields, diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 838fae34aba..f1f2295a727 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -51,6 +51,7 @@ #include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/projection_ast_util.h" #include "mongo/db/query/query_planner_common.h" +#include "mongo/db/query/util/set_util.h" namespace mongo { @@ -1104,6 +1105,7 @@ ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry, : indexEntry(std::move(indexEntry)), outputFields(std::move(outputFieldsIn)), matchFields(std::move(matchFieldsIn)), + allFields(set_util::setUnion(outputFields, matchFields)), filtersByPath(std::move(filtersByPath)), postAssemblyFilter(std::move(postAssemblyFilter)) { allFields = outputFields; diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 5f379bc5b05..50a35027936 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -558,6 +558,10 @@ struct ColumnIndexScanNode : public QuerySolutionNode { // 'postAssemblyFilter'. OrderedPathSet matchFields; + // A cached copy of the union of the above two field sets which we expect to be frequently asked + // for. + OrderedPathSet allFields; + // A column scan can apply a filter to the columns directly while scanning, or to a document // assembled from the scanned columns. @@ -568,10 +572,6 @@ struct ColumnIndexScanNode : public QuerySolutionNode { // An optional filter to apply after assembling a document from all scanned columns. For // example: {$or: [{a: 2}, {b: 2}]}. std::unique_ptr<MatchExpression> postAssemblyFilter; - - // A cached copy of the union of the above two field sets which we expect to be frequently asked - // for. - OrderedPathSet allFields; }; /** diff --git a/src/mongo/db/query/util/set_util.h b/src/mongo/db/query/util/set_util.h new file mode 100644 index 00000000000..ab513eead71 --- /dev/null +++ b/src/mongo/db/query/util/set_util.h @@ -0,0 +1,46 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <algorithm> +#include <set> + +#pragma once + +namespace mongo::set_util { + +template <typename T, typename Comparator> +std::set<T, Comparator> setUnion(const std::set<T, Comparator>& set1, + const std::set<T, Comparator>& set2) { + std::set<T, Comparator> out; + std::set_union( + set1.begin(), set1.end(), set2.begin(), set2.end(), std::inserter(out, out.begin())); + return out; +} + +} // namespace mongo::set_util |