diff options
author | David Storch <david.storch@10gen.com> | 2017-05-03 16:52:01 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-05-05 16:20:29 -0400 |
commit | 9b1f1f9d1225711aa049725174e6ca88ea24e9c8 (patch) | |
tree | d4c46d5149939f0f2901131c85d99cdc070e736e | |
parent | 960e238d4ae9fadb02b8f9566a392d232d6fdf27 (diff) | |
download | mongo-9b1f1f9d1225711aa049725174e6ca88ea24e9c8.tar.gz |
SERVER-28952 fix distinct planning for multikey indices
(cherry picked from commit 566694eb312d7eaaf9e6e756e42cf863ad7f0f58)
-rw-r--r-- | jstests/core/distinct_multikey.js | 126 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 31 |
2 files changed, 150 insertions, 7 deletions
diff --git a/jstests/core/distinct_multikey.js b/jstests/core/distinct_multikey.js new file mode 100644 index 00000000000..ac9e69ff00f --- /dev/null +++ b/jstests/core/distinct_multikey.js @@ -0,0 +1,126 @@ +/** + * Tests for distinct planning and execution in the presence of multikey indexes. + */ +(function() { + "use strict"; + + load("jstests/libs/analyze_plan.js"); + + const isMMAPv1 = jsTest.options().storageEngine === "mmapv1"; + + let coll = db.jstest_distinct_multikey; + coll.drop(); + assert.commandWorked(coll.createIndex({a: 1})); + assert.writeOK(coll.insert({a: [1, 2, 3]})); + assert.writeOK(coll.insert({a: [2, 3, 4]})); + assert.writeOK(coll.insert({a: [5, 6, 7]})); + + // Test that distinct can correctly use a multikey index when there is no predicate. + let result = coll.distinct("a"); + assert.eq([1, 2, 3, 4, 5, 6, 7], result.sort()); + let explain = coll.explain("queryPlanner").distinct("a"); + assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION")); + assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN")); + + // Test that distinct can correctly use a multikey index when there is a predicate. This query + // should not be eligible for the distinct scan and cannot be covered. + result = coll.distinct("a", {a: 3}); + assert.eq([1, 2, 3, 4], result.sort()); + explain = coll.explain("queryPlanner").distinct("a", {a: 3}); + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + + // Test distinct over a dotted multikey field, with a predicate. + coll.drop(); + assert.commandWorked(coll.createIndex({"a.b": 1})); + assert.writeOK(coll.insert({a: {b: [1, 2, 3]}})); + assert.writeOK(coll.insert({a: {b: [2, 3, 4]}})); + + result = coll.distinct("a.b", {"a.b": 3}); + assert.eq([1, 2, 3, 4], result.sort()); + explain = coll.explain("queryPlanner").distinct("a.b", {"a.b": 3}); + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + + // Test that the distinct scan can be used when there is a predicate and the index is not + // multikey. + coll.drop(); + assert.commandWorked(coll.createIndex({a: 1})); + assert.writeOK(coll.insert({a: 1})); + assert.writeOK(coll.insert({a: 2})); + assert.writeOK(coll.insert({a: 3})); + + result = coll.distinct("a", {a: {$gte: 2}}); + assert.eq([2, 3], result.sort()); + explain = coll.explain("queryPlanner").distinct("a", {a: {$gte: 2}}); + assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION")); + assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN")); + + // Test a distinct which can use a multikey index, where the field being distinct'ed is not + // multikey. + coll.drop(); + assert.commandWorked(coll.createIndex({a: 1, b: 1})); + assert.writeOK(coll.insert({a: 1, b: [2, 3]})); + assert.writeOK(coll.insert({a: 8, b: [3, 4]})); + assert.writeOK(coll.insert({a: 7, b: [4, 5]})); + + result = coll.distinct("a", {a: {$gte: 2}}); + assert.eq([7, 8], result.sort()); + explain = coll.explain("queryPlanner").distinct("a", {a: {$gte: 2}}); + if (isMMAPv1) { + // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct + // scan since it does not know that the "a" field is not multikey. + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + } else { + assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION")); + assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN")); + } + + // Test distinct over a trailing multikey field. + result = coll.distinct("b", {a: {$gte: 2}}); + assert.eq([3, 4, 5], result.sort()); + explain = coll.explain("queryPlanner").distinct("b", {a: {$gte: 2}}); + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + + // Test distinct over a trailing non-multikey field, where the leading field is multikey. + coll.drop(); + assert.commandWorked(coll.createIndex({a: 1, b: 1})); + assert.writeOK(coll.insert({a: [2, 3], b: 1})); + assert.writeOK(coll.insert({a: [3, 4], b: 8})); + assert.writeOK(coll.insert({a: [3, 5], b: 7})); + + result = coll.distinct("b", {a: 3}); + assert.eq([1, 7, 8], result.sort()); + explain = coll.explain("queryPlanner").distinct("b", {a: 3}); + if (isMMAPv1) { + // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct + // scan since it does not know that the "a" field is not multikey. + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + } else { + assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION")); + assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN")); + } + + // Test distinct over a trailing non-multikey dotted path where the leading field is multikey. + coll.drop(); + assert.commandWorked(coll.createIndex({a: 1, "b.c": 1})); + assert.writeOK(coll.insert({a: [2, 3], b: {c: 1}})); + assert.writeOK(coll.insert({a: [3, 4], b: {c: 8}})); + assert.writeOK(coll.insert({a: [3, 5], b: {c: 7}})); + + result = coll.distinct("b.c", {a: 3}); + assert.eq([1, 7, 8], result.sort()); + explain = coll.explain("queryPlanner").distinct("b.c", {a: 3}); + if (isMMAPv1) { + // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct + // scan since it does not know that the "a" field is not multikey. + assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH")); + assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN")); + } else { + assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION")); + assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN")); + } +}()); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 3cc981a03a8..d6c0cc2bd8e 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -1354,21 +1354,38 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) { return false; } - // Make a new DistinctNode. We will swap this for the ixscan in the provided solution. - auto distinctNode = stdx::make_unique<DistinctNode>(indexScanNode->index); - distinctNode->direction = indexScanNode->direction; - distinctNode->bounds = indexScanNode->bounds; - // Figure out which field we're skipping to the next value of. - distinctNode->fieldNo = 0; + int fieldNo = 0; BSONObjIterator it(indexScanNode->index.keyPattern); while (it.more()) { if (field == it.next().fieldName()) { break; } - distinctNode->fieldNo++; + ++fieldNo; } + // We should not use a distinct scan if the field over which we are computing the distinct is + // multikey. + if (indexScanNode->index.multikey) { + const auto& multikeyPaths = indexScanNode->index.multikeyPaths; + if (multikeyPaths.empty()) { + // We don't have path-level multikey information available. + return false; + } + + if (!multikeyPaths[fieldNo].empty()) { + // Path-level multikey information indicates that the distinct key contains at least one + // array component. + return false; + } + } + + // Make a new DistinctNode. We will swap this for the ixscan in the provided solution. + auto distinctNode = stdx::make_unique<DistinctNode>(indexScanNode->index); + distinctNode->direction = indexScanNode->direction; + distinctNode->bounds = indexScanNode->bounds; + distinctNode->fieldNo = fieldNo; + if (fetchNode) { // If there is a fetch node, then there is no need for the projection. The fetch node should // become the new root, with the distinct as its child. The PROJECT=>FETCH=>IXSCAN tree |