summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2017-05-03 16:52:01 -0400
committerDavid Storch <david.storch@10gen.com>2017-05-05 16:20:29 -0400
commit9b1f1f9d1225711aa049725174e6ca88ea24e9c8 (patch)
treed4c46d5149939f0f2901131c85d99cdc070e736e
parent960e238d4ae9fadb02b8f9566a392d232d6fdf27 (diff)
downloadmongo-9b1f1f9d1225711aa049725174e6ca88ea24e9c8.tar.gz
SERVER-28952 fix distinct planning for multikey indices
(cherry picked from commit 566694eb312d7eaaf9e6e756e42cf863ad7f0f58)
-rw-r--r--jstests/core/distinct_multikey.js126
-rw-r--r--src/mongo/db/query/get_executor.cpp31
2 files changed, 150 insertions, 7 deletions
diff --git a/jstests/core/distinct_multikey.js b/jstests/core/distinct_multikey.js
new file mode 100644
index 00000000000..ac9e69ff00f
--- /dev/null
+++ b/jstests/core/distinct_multikey.js
@@ -0,0 +1,126 @@
+/**
+ * Tests for distinct planning and execution in the presence of multikey indexes.
+ */
+(function() {
+ "use strict";
+
+ load("jstests/libs/analyze_plan.js");
+
+ const isMMAPv1 = jsTest.options().storageEngine === "mmapv1";
+
+ let coll = db.jstest_distinct_multikey;
+ coll.drop();
+ assert.commandWorked(coll.createIndex({a: 1}));
+ assert.writeOK(coll.insert({a: [1, 2, 3]}));
+ assert.writeOK(coll.insert({a: [2, 3, 4]}));
+ assert.writeOK(coll.insert({a: [5, 6, 7]}));
+
+ // Test that distinct can correctly use a multikey index when there is no predicate.
+ let result = coll.distinct("a");
+ assert.eq([1, 2, 3, 4, 5, 6, 7], result.sort());
+ let explain = coll.explain("queryPlanner").distinct("a");
+ assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN"));
+
+ // Test that distinct can correctly use a multikey index when there is a predicate. This query
+ // should not be eligible for the distinct scan and cannot be covered.
+ result = coll.distinct("a", {a: 3});
+ assert.eq([1, 2, 3, 4], result.sort());
+ explain = coll.explain("queryPlanner").distinct("a", {a: 3});
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+
+ // Test distinct over a dotted multikey field, with a predicate.
+ coll.drop();
+ assert.commandWorked(coll.createIndex({"a.b": 1}));
+ assert.writeOK(coll.insert({a: {b: [1, 2, 3]}}));
+ assert.writeOK(coll.insert({a: {b: [2, 3, 4]}}));
+
+ result = coll.distinct("a.b", {"a.b": 3});
+ assert.eq([1, 2, 3, 4], result.sort());
+ explain = coll.explain("queryPlanner").distinct("a.b", {"a.b": 3});
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+
+ // Test that the distinct scan can be used when there is a predicate and the index is not
+ // multikey.
+ coll.drop();
+ assert.commandWorked(coll.createIndex({a: 1}));
+ assert.writeOK(coll.insert({a: 1}));
+ assert.writeOK(coll.insert({a: 2}));
+ assert.writeOK(coll.insert({a: 3}));
+
+ result = coll.distinct("a", {a: {$gte: 2}});
+ assert.eq([2, 3], result.sort());
+ explain = coll.explain("queryPlanner").distinct("a", {a: {$gte: 2}});
+ assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN"));
+
+ // Test a distinct which can use a multikey index, where the field being distinct'ed is not
+ // multikey.
+ coll.drop();
+ assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+ assert.writeOK(coll.insert({a: 1, b: [2, 3]}));
+ assert.writeOK(coll.insert({a: 8, b: [3, 4]}));
+ assert.writeOK(coll.insert({a: 7, b: [4, 5]}));
+
+ result = coll.distinct("a", {a: {$gte: 2}});
+ assert.eq([7, 8], result.sort());
+ explain = coll.explain("queryPlanner").distinct("a", {a: {$gte: 2}});
+ if (isMMAPv1) {
+ // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct
+ // scan since it does not know that the "a" field is not multikey.
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+ } else {
+ assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN"));
+ }
+
+ // Test distinct over a trailing multikey field.
+ result = coll.distinct("b", {a: {$gte: 2}});
+ assert.eq([3, 4, 5], result.sort());
+ explain = coll.explain("queryPlanner").distinct("b", {a: {$gte: 2}});
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+
+ // Test distinct over a trailing non-multikey field, where the leading field is multikey.
+ coll.drop();
+ assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+ assert.writeOK(coll.insert({a: [2, 3], b: 1}));
+ assert.writeOK(coll.insert({a: [3, 4], b: 8}));
+ assert.writeOK(coll.insert({a: [3, 5], b: 7}));
+
+ result = coll.distinct("b", {a: 3});
+ assert.eq([1, 7, 8], result.sort());
+ explain = coll.explain("queryPlanner").distinct("b", {a: 3});
+ if (isMMAPv1) {
+ // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct
+ // scan since it does not know that the "a" field is not multikey.
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+ } else {
+ assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN"));
+ }
+
+ // Test distinct over a trailing non-multikey dotted path where the leading field is multikey.
+ coll.drop();
+ assert.commandWorked(coll.createIndex({a: 1, "b.c": 1}));
+ assert.writeOK(coll.insert({a: [2, 3], b: {c: 1}}));
+ assert.writeOK(coll.insert({a: [3, 4], b: {c: 8}}));
+ assert.writeOK(coll.insert({a: [3, 5], b: {c: 7}}));
+
+ result = coll.distinct("b.c", {a: 3});
+ assert.eq([1, 7, 8], result.sort());
+ explain = coll.explain("queryPlanner").distinct("b.c", {a: 3});
+ if (isMMAPv1) {
+ // MMAPv1 does not support path-level multikey metadata tracking. It cannot use a distinct
+ // scan since it does not know that the "a" field is not multikey.
+ assert(planHasStage(explain.queryPlanner.winningPlan, "FETCH"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "IXSCAN"));
+ } else {
+ assert(planHasStage(explain.queryPlanner.winningPlan, "PROJECTION"));
+ assert(planHasStage(explain.queryPlanner.winningPlan, "DISTINCT_SCAN"));
+ }
+}());
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 3cc981a03a8..d6c0cc2bd8e 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1354,21 +1354,38 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) {
return false;
}
- // Make a new DistinctNode. We will swap this for the ixscan in the provided solution.
- auto distinctNode = stdx::make_unique<DistinctNode>(indexScanNode->index);
- distinctNode->direction = indexScanNode->direction;
- distinctNode->bounds = indexScanNode->bounds;
-
// Figure out which field we're skipping to the next value of.
- distinctNode->fieldNo = 0;
+ int fieldNo = 0;
BSONObjIterator it(indexScanNode->index.keyPattern);
while (it.more()) {
if (field == it.next().fieldName()) {
break;
}
- distinctNode->fieldNo++;
+ ++fieldNo;
}
+ // We should not use a distinct scan if the field over which we are computing the distinct is
+ // multikey.
+ if (indexScanNode->index.multikey) {
+ const auto& multikeyPaths = indexScanNode->index.multikeyPaths;
+ if (multikeyPaths.empty()) {
+ // We don't have path-level multikey information available.
+ return false;
+ }
+
+ if (!multikeyPaths[fieldNo].empty()) {
+ // Path-level multikey information indicates that the distinct key contains at least one
+ // array component.
+ return false;
+ }
+ }
+
+ // Make a new DistinctNode. We will swap this for the ixscan in the provided solution.
+ auto distinctNode = stdx::make_unique<DistinctNode>(indexScanNode->index);
+ distinctNode->direction = indexScanNode->direction;
+ distinctNode->bounds = indexScanNode->bounds;
+ distinctNode->fieldNo = fieldNo;
+
if (fetchNode) {
// If there is a fetch node, then there is no need for the projection. The fetch node should
// become the new root, with the distinct as its child. The PROJECT=>FETCH=>IXSCAN tree