summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/noPassthroughWithMongod/all_paths_index_multikey.js146
-rw-r--r--src/mongo/db/catalog/collection_info_cache_impl.cpp2
-rw-r--r--src/mongo/db/exec/projection_exec_agg.cpp17
-rw-r--r--src/mongo/db/exec/projection_exec_agg.h3
-rw-r--r--src/mongo/db/exec/projection_exec_agg_test.cpp19
-rw-r--r--src/mongo/db/field_ref.cpp35
-rw-r--r--src/mongo/db/field_ref.h29
-rw-r--r--src/mongo/db/field_ref_test.cpp37
-rw-r--r--src/mongo/db/query/SConscript1
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp25
-rw-r--r--src/mongo/db/query/index_bounds_builder.h10
-rw-r--r--src/mongo/db/query/planner_access.cpp120
-rw-r--r--src/mongo/db/query/planner_access.h2
-rw-r--r--src/mongo/db/query/planner_allpaths_helpers.cpp249
-rw-r--r--src/mongo/db/query/planner_allpaths_helpers.h101
-rw-r--r--src/mongo/db/query/planner_ixselect.cpp36
-rw-r--r--src/mongo/db/query/query_planner_all_paths_index_test.cpp367
17 files changed, 1099 insertions, 100 deletions
diff --git a/jstests/noPassthroughWithMongod/all_paths_index_multikey.js b/jstests/noPassthroughWithMongod/all_paths_index_multikey.js
index 81ba7aa7b62..375bc704ef4 100644
--- a/jstests/noPassthroughWithMongod/all_paths_index_multikey.js
+++ b/jstests/noPassthroughWithMongod/all_paths_index_multikey.js
@@ -69,25 +69,38 @@
for (let op of operationList) {
for (let path of pathList) {
const query = {[path]: op.expression};
- // Explain the query, and determine whether an indexed solution is available.
- const explain = assert.commandWorked(coll.find(query).explain());
- const ixScans = getPlanStages(explain.queryPlanner.winningPlan, "IXSCAN");
- // If we expect the current path to have been excluded based on the $** keyPattern
- // or projection, confirm that no indexed solution was found.
- if (!expectedPaths.includes(path)) {
- assert.eq(ixScans.length, 0, path);
- continue;
- }
- // Verify that the winning plan uses the $** index with the expected path.
- assert.eq(ixScans.length, 1);
- assert.docEq(ixScans[0].keyPattern, {"$_path": 1, [path]: 1});
- // Verify that the results obtained from the $** index are identical to a COLLSCAN.
- assertArrayEq(coll.find(query).toArray(),
- coll.find(query).hint({$natural: 1}).toArray());
+ assertAllPathsQuery(query, expectedPaths.includes(path) ? path : null);
}
}
}
+ // Runs a single allPaths query test. If 'expectedPath' is non-null, verifies that there is an
+ // indexed solution that uses the $** index with the given path string. If 'expectedPath' is
+ // null, verifies that no indexed solution was found. If 'explainStats' is non-empty, verifies
+ // that the query's explain output reflects the given stats.
+ function assertAllPathsQuery(query, expectedPath, explainStats = {}) {
+ // Explain the query, and determine whether an indexed solution is available.
+ const explainOutput = coll.find(query).explain("executionStats");
+ const ixScans = getPlanStages(explainOutput.queryPlanner.winningPlan, "IXSCAN");
+ // If we expect the current path to have been excluded based on the $** keyPattern
+ // or projection, confirm that no indexed solution was found.
+ if (!expectedPath) {
+ assert.eq(ixScans.length, 0);
+ return;
+ }
+ // Verify that the winning plan uses the $** index with the expected path.
+ assert.eq(ixScans.length, 1);
+ assert.docEq(ixScans[0].keyPattern, {"$_path": 1, [expectedPath]: 1});
+ // Verify that the results obtained from the $** index are identical to a COLLSCAN.
+ assertArrayEq(coll.find(query).toArray(), coll.find(query).hint({$natural: 1}).toArray());
+ // Verify that the explain output reflects the given 'explainStats'.
+ for (let stat in explainStats) {
+ assert.eq(explainStats[stat],
+ stat.split('.').reduce((obj, i) => obj[i], explainOutput),
+ explainOutput);
+ }
+ }
+
// Required in order to build $** indexes.
assert.commandWorked(
db.adminCommand({setParameter: 1, internalQueryAllowAllPathsIndexes: true}));
@@ -128,6 +141,109 @@
coll.find({"b.c.d": {$elemMatch: {"e.f": {$gt: 0, $lt: 9}}}})
.hint({$natural: 1})
.itcount());
+
+ // Fieldname-or-array-index query tests.
+ assert(coll.drop());
+ assert.commandWorked(coll.createIndex({"$**": 1}));
+
+ // Insert some documents that exhibit a mix of numeric fieldnames and array indices.
+ assert.commandWorked(coll.insert({_id: 1, a: [{b: [{c: 1}]}]}));
+ assert.commandWorked(coll.insert({_id: 2, a: [{b: [{c: 0}, {c: 1}]}]}));
+ assert.commandWorked(coll.insert({_id: 3, a: {'0': [{b: {'1': {c: 1}}}, {d: 1}]}}));
+ assert.commandWorked(coll.insert({_id: 4, a: [{b: [{1: {c: 1}}]}]}));
+ assert.commandWorked(
+ coll.insert({_id: 5, a: [{b: [{'1': {c: {'2': {d: [0, 1, 2, 3, {e: 1}]}}}}]}]}));
+
+ /*
+ * Multikey Metadata Keys:
+ * {'': 1, '': 'a'}
+ * {'': 1, '': 'a.0'}
+ * {'': 1, '': 'a.b'}
+ * {'': 1, '': 'a.b.1.c.2.d'}
+ * Keys:
+ * {'': 'a.b.c', '': 1} // _id: 1, a,b multikey
+ * {'': 'a.b.c', '': 0} // _id: 2, a,b multikey
+ * {'': 'a.b.c', '': 1} // _id: 2, a,b multikey
+ * {'': 'a.0.b.1.c', '': 1} // _id: 3, '0, 1' are fieldnames, a.0 multikey
+ * {'': 'a.0.d', '': 1} // _id: 3, '0' is fieldname, a.0 multikey
+ * {'': 'a.b.1.c', '': 1} // _id: 4, '1' is fieldname, a,b multikey
+ * {'': 'a.b.1.c.2.d', '': 0} // _id: 5, a,b,a.b.1.c.2.d multikey, '1' is fieldname
+ * {'': 'a.b.1.c.2.d', '': 1} // _id: 5
+ * {'': 'a.b.1.c.2.d', '': 2} // _id: 5
+ * {'': 'a.b.1.c.2.d', '': 3} // _id: 5
+ * {'': 'a.b.1.c.2.d.e', '': 1} // _id: 5
+ */
+
+ // Test that a query with multiple numeric path components returns all relevant documents,
+ // whether the numeric path component refers to a fieldname or array index in each doc:
+ //
+ // _id:1 will be captured by the special fieldname-or-array-index bounds 'a.b.c', but will
+ // be filtered out by the INEXACT_FETCH since it has no array index or fieldname 'b.1'.
+ // _id:2 will match both 'a.0' and 'b.1' by array index.
+ // _id:3 will match both 'a.0' and 'b.1' by fieldname.
+ // _id:4 will match 'a.0' by array index and 'b.1' by fieldname.
+ // _id:5 is not captured by the special fieldname-or-array-index bounds.
+ //
+ // We examine the solution's 'nReturned' versus 'totalDocsExamined' to confirm this.
+ // totalDocsExamined: [_id:1, _id:2, _id:3, _id:4], nReturned: [_id:2, _id:3, _id:4]
+ assertAllPathsQuery({'a.0.b.1.c': 1},
+ 'a.0.b.1.c',
+ {'executionStats.nReturned': 3, 'executionStats.totalDocsExamined': 4});
+
+ // Test that we can query a specific field of an array whose fieldname is itself numeric.
+ assertAllPathsQuery({'a.0.1.d': 1},
+ 'a.0.1.d',
+ {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1});
+
+ // Test that we can query a primitive value at a specific array index.
+ assertAllPathsQuery({'a.0.b.1.c.2.d.3': 3},
+ 'a.0.b.1.c.2.d.3',
+ {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1});
+
+ // Test that a $** index can't be used for a query through more than 8 nested array indices.
+ assert.commandWorked(
+ coll.insert({_id: 6, a: [{b: [{c: [{d: [{e: [{f: [{g: [{h: [{i: [1]}]}]}]}]}]}]}]}]}));
+ // We can query up to a depth of 8 arrays via specific indices, but not through 9 or more.
+ assertAllPathsQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i': 1},
+ 'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i');
+ assertAllPathsQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i.0': 1}, null);
+
+ // Test that fieldname-or-array-index queries do not inappropriately trim predicates; that
+ // is, all predicates on the field are added to a FETCH filter above the IXSCAN.
+ assert(coll.drop());
+ assert.commandWorked(coll.createIndex({"$**": 1}));
+
+ assert.commandWorked(coll.insert({_id: 1, a: [0, 1, 2]}));
+ assert.commandWorked(coll.insert({_id: 2, a: [1, 2, 3]}));
+ assert.commandWorked(coll.insert({_id: 3, a: [2, 3, 4], b: [5, 6, 7]}));
+ assert.commandWorked(coll.insert({_id: 4, a: [3, 4, 5], b: [6, 7, 8], c: {'0': 9}}));
+ assert.commandWorked(coll.insert({_id: 5, a: [4, 5, 6], b: [7, 8, 9], c: {'0': 10}}));
+ assert.commandWorked(coll.insert({_id: 6, a: [5, 6, 7], b: [8, 9, 10], c: {'0': 11}}));
+
+ assertAllPathsQuery({"a.0": {$gt: 1, $lt: 4}}, 'a.0', {'executionStats.nReturned': 2});
+ assertAllPathsQuery({"a.1": {$gte: 1, $lte: 4}}, 'a.1', {'executionStats.nReturned': 4});
+ assertAllPathsQuery({"b.2": {$in: [5, 9]}}, 'b.2', {'executionStats.nReturned': 1});
+ assertAllPathsQuery({"c.0": {$in: [10, 11]}}, 'c.0', {'executionStats.nReturned': 2});
+
+ // Test that the $** index doesn't trim predicates when planning across multiple nested
+ // $and/$or expressions on various fieldname-or-array-index paths.
+ const trimTestQuery = {
+ $or: [
+ {"a.0": {$gte: 0, $lt: 3}, "a.1": {$in: [2, 3, 4]}},
+ {"b.1": {$gt: 6, $lte: 9}, "c.0": {$gt: 9, $lt: 12}}
+ ]
+ };
+ const trimTestExplain = coll.find(trimTestQuery).explain("executionStats");
+ // Verify that the expected number of documents were matched, and the $** index was used.
+ // Matched documents: [_id:2, _id:3, _id:5, _id:6]
+ assert.eq(trimTestExplain.executionStats.nReturned, 4);
+ const trimTestIxScans = getPlanStages(trimTestExplain.queryPlanner.winningPlan, "IXSCAN");
+ for (let ixScan of trimTestIxScans) {
+ assert.eq(ixScan.keyPattern["$_path"], 1);
+ }
+ // Finally, confirm that a collection scan produces the same results.
+ assertArrayEq(coll.find(trimTestQuery).toArray(),
+ coll.find(trimTestQuery).hint({$natural: 1}).toArray());
} finally {
// Disable $** indexes once the tests have either completed or failed.
assert.commandWorked(
diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp
index eed5b2843e3..38adae1cf13 100644
--- a/src/mongo/db/catalog/collection_info_cache_impl.cpp
+++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp
@@ -102,7 +102,7 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) {
// If a subtree was specified in the keyPattern, or if an inclusion projection is
// present, then we need only index the path(s) preserved by the projection.
for (const auto& path : pathProj->getExhaustivePaths()) {
- _indexedPaths.addPath(path);
+ _indexedPaths.addPath(path.dottedField());
}
}
} else if (descriptor->getAccessMethodName() == IndexNames::TEXT) {
diff --git a/src/mongo/db/exec/projection_exec_agg.cpp b/src/mongo/db/exec/projection_exec_agg.cpp
index 7c292ac4a81..ff2d2d062cb 100644
--- a/src/mongo/db/exec/projection_exec_agg.cpp
+++ b/src/mongo/db/exec/projection_exec_agg.cpp
@@ -71,12 +71,18 @@ public:
_projection = ParsedAggregationProjection::create(
expCtx, projSpec, idPolicy, recursionPolicy, ProjectionParseMode::kBanComputedFields);
+
+ // For an inclusion, record the exhaustive set of fields retained by the projection.
+ if (getType() == ProjectionType::kInclusionProjection) {
+ DepsTracker depsTracker;
+ _projection->addDependencies(&depsTracker);
+ for (auto&& field : depsTracker.fields)
+ _exhaustivePaths.insert(FieldRef{field});
+ }
}
- std::set<std::string> getExhaustivePaths() const {
- DepsTracker depsTracker;
- _projection->addDependencies(&depsTracker);
- return depsTracker.fields;
+ const std::set<FieldRef>& getExhaustivePaths() const {
+ return _exhaustivePaths;
}
ProjectionType getType() const {
@@ -116,6 +122,7 @@ private:
}
std::unique_ptr<ParsedAggregationProjection> _projection;
+ std::set<FieldRef> _exhaustivePaths;
};
// ProjectionExecAgg's constructor and destructor are defined here, at a point where the
@@ -151,7 +158,7 @@ stdx::unordered_set<std::string> ProjectionExecAgg::applyProjectionToFields(
return _exec->applyProjectionToFields(fields);
}
-std::set<std::string> ProjectionExecAgg::getExhaustivePaths() const {
+const std::set<FieldRef>& ProjectionExecAgg::getExhaustivePaths() const {
return _exec->getExhaustivePaths();
}
} // namespace mongo
diff --git a/src/mongo/db/exec/projection_exec_agg.h b/src/mongo/db/exec/projection_exec_agg.h
index 90ad2b7ef55..9b3e9884397 100644
--- a/src/mongo/db/exec/projection_exec_agg.h
+++ b/src/mongo/db/exec/projection_exec_agg.h
@@ -31,6 +31,7 @@
#include <memory>
#include "mongo/bson/bsonobj.h"
+#include "mongo/db/field_ref.h"
namespace mongo {
@@ -78,7 +79,7 @@ public:
* empty set if the exhaustive set cannot be determined. An inclusion will always produce an
* exhaustive set; an exclusion will always produce an empty set.
*/
- std::set<std::string> getExhaustivePaths() const;
+ const std::set<FieldRef>& getExhaustivePaths() const;
ProjectionType getType() const;
diff --git a/src/mongo/db/exec/projection_exec_agg_test.cpp b/src/mongo/db/exec/projection_exec_agg_test.cpp
index 5105677f3f0..1aaa0668954 100644
--- a/src/mongo/db/exec/projection_exec_agg_test.cpp
+++ b/src/mongo/db/exec/projection_exec_agg_test.cpp
@@ -61,6 +61,15 @@ std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdExclusionAndNested
projSpec, DefaultIdPolicy::kExcludeId, ArrayRecursionPolicy::kRecurseNestedArrays);
}
+std::set<FieldRef> toFieldRefs(const std::set<std::string>& stringPaths) {
+ std::set<FieldRef> fieldRefs;
+ std::transform(stringPaths.begin(),
+ stringPaths.end(),
+ std::inserter(fieldRefs, fieldRefs.begin()),
+ [](const auto& path) { return FieldRef(path); });
+ return fieldRefs;
+}
+
//
// Error cases.
//
@@ -190,7 +199,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdInclusion) {
// Extract the exhaustive set of paths that will be preserved by the projection.
auto exhaustivePaths = parsedProject->getExhaustivePaths();
- std::set<std::string> expectedPaths{"_id", "a.b.c", "d"};
+ std::set<FieldRef> expectedPaths = toFieldRefs({"_id", "a.b.c", "d"});
// Verify that the exhaustive set of paths is as expected.
ASSERT(exhaustivePaths == expectedPaths);
@@ -203,7 +212,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusion) {
// Extract the exhaustive set of paths that will be preserved by the projection.
auto exhaustivePaths = parsedProject->getExhaustivePaths();
- std::set<std::string> expectedPaths{"_id", "a.b.c", "d"};
+ std::set<FieldRef> expectedPaths = toFieldRefs({"_id", "a.b.c", "d"});
// Verify that the exhaustive set of paths is as expected.
ASSERT(exhaustivePaths == expectedPaths);
@@ -216,7 +225,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusionIdOnly) {
// Extract the exhaustive set of paths that will be preserved by the projection.
auto exhaustivePaths = parsedProject->getExhaustivePaths();
- std::set<std::string> expectedPaths{"_id"};
+ std::set<FieldRef> expectedPaths = toFieldRefs({"_id"});
// Verify that the exhaustive set of paths is as expected.
ASSERT(exhaustivePaths == expectedPaths);
@@ -229,7 +238,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdExclusion) {
// Extract the exhaustive set of paths that will be preserved by the projection.
auto exhaustivePaths = parsedProject->getExhaustivePaths();
- std::set<std::string> expectedPaths{"a.b.c", "d"};
+ std::set<FieldRef> expectedPaths = toFieldRefs({"a.b.c", "d"});
// Verify that the exhaustive set of paths is as expected.
ASSERT(exhaustivePaths == expectedPaths);
@@ -242,7 +251,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdExclusion) {
// Extract the exhaustive set of paths that will be preserved by the projection.
auto exhaustivePaths = parsedProject->getExhaustivePaths();
- std::set<std::string> expectedPaths{"a.b.c", "d"};
+ std::set<FieldRef> expectedPaths = toFieldRefs({"a.b.c", "d"});
// Verify that the exhaustive set of paths is as expected.
ASSERT(exhaustivePaths == expectedPaths);
diff --git a/src/mongo/db/field_ref.cpp b/src/mongo/db/field_ref.cpp
index 2e905090307..1bce17b20e7 100644
--- a/src/mongo/db/field_ref.cpp
+++ b/src/mongo/db/field_ref.cpp
@@ -26,9 +26,12 @@
* it in the license file.
*/
+#include "mongo/platform/basic.h"
+
#include "mongo/db/field_ref.h"
-#include <algorithm> // for min
+#include <algorithm>
+#include <cctype>
#include "mongo/util/assert_util.h"
@@ -226,6 +229,10 @@ bool FieldRef::isPrefixOf(const FieldRef& other) const {
return common == _size && other._size > common;
}
+bool FieldRef::isPrefixOfOrEqualTo(const FieldRef& other) const {
+ return isPrefixOf(other) || *this == other;
+}
+
size_t FieldRef::commonPrefixSize(const FieldRef& other) const {
if (_size == 0 || other._size == 0) {
return 0;
@@ -244,6 +251,32 @@ size_t FieldRef::commonPrefixSize(const FieldRef& other) const {
return prefixSize;
}
+bool FieldRef::isNumericPathComponent(StringData component) {
+ return !component.empty() && !(component.size() > 1 && component[0] == '0') &&
+ std::all_of(component.begin(), component.end(), [](auto c) { return std::isdigit(c); });
+}
+
+bool FieldRef::isNumericPathComponent(size_t i) const {
+ return FieldRef::isNumericPathComponent(getPart(i));
+}
+
+bool FieldRef::hasNumericPathComponents() const {
+ for (size_t i = 0; i < numParts(); ++i) {
+ if (isNumericPathComponent(i))
+ return true;
+ }
+ return false;
+}
+
+std::set<size_t> FieldRef::getNumericPathComponents(size_t startPart) const {
+ std::set<size_t> numericPathComponents;
+ for (auto i = startPart; i < numParts(); ++i) {
+ if (isNumericPathComponent(i))
+ numericPathComponents.insert(i);
+ }
+ return numericPathComponents;
+}
+
StringData FieldRef::dottedField(size_t offset) const {
return dottedSubstring(offset, numParts());
}
diff --git a/src/mongo/db/field_ref.h b/src/mongo/db/field_ref.h
index ad354a0f39c..96368b8f547 100644
--- a/src/mongo/db/field_ref.h
+++ b/src/mongo/db/field_ref.h
@@ -30,6 +30,7 @@
#include <boost/optional.hpp>
#include <iosfwd>
+#include <set>
#include <string>
#include <vector>
@@ -50,6 +51,12 @@ namespace mongo {
*/
class FieldRef {
public:
+ /**
+ * Returns true if the argument is a numeric string which is eligible to act as the key name for
+ * an element in a BSON array; in other words, the string matches the regex ^(0|[1-9]+[0-9]*)$.
+ */
+ static bool isNumericPathComponent(StringData component);
+
FieldRef() = default;
explicit FieldRef(StringData path);
@@ -94,11 +101,33 @@ public:
bool isPrefixOf(const FieldRef& other) const;
/**
+ * Returns true if 'this' FieldRef is a prefix of 'other', or if both paths are identical.
+ */
+ bool isPrefixOfOrEqualTo(const FieldRef& other) const;
+
+ /**
* Returns the number of field parts in the prefix that 'this' and 'other' share.
*/
size_t commonPrefixSize(const FieldRef& other) const;
/**
+ * Returns true if the specified path component is a numeric string which is eligible to act as
+ * the key name for an element in a BSON array; in other words, the fieldname matches the regex
+ * ^(0|[1-9]+[0-9]*)$.
+ */
+ bool isNumericPathComponent(size_t i) const;
+
+ /**
+ * Returns true if this FieldRef has any numeric path components.
+ */
+ bool hasNumericPathComponents() const;
+
+ /**
+ * Returns the positions of all numeric path components, starting from the given position.
+ */
+ std::set<size_t> getNumericPathComponents(size_t startPart = 0) const;
+
+ /**
* Returns a StringData of the full dotted field in its current state (i.e., some parts may
* have been replaced since the parse() call).
*/
diff --git a/src/mongo/db/field_ref_test.cpp b/src/mongo/db/field_ref_test.cpp
index 654444bc472..616b8a4d5cc 100644
--- a/src/mongo/db/field_ref_test.cpp
+++ b/src/mongo/db/field_ref_test.cpp
@@ -166,25 +166,39 @@ TEST(Prefix, Normal) {
prefix.parse("a.b");
ASSERT_TRUE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
prefix.parse("a");
ASSERT_TRUE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
+
+ prefix.parse("a.b.c");
+ ASSERT_FALSE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
}
TEST(Prefix, Dotted) {
FieldRef prefix("a.0"), base("a.0.c");
ASSERT_TRUE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
+
+ prefix.parse("a.0.c");
+ ASSERT_FALSE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
}
TEST(Prefix, NoPrefixes) {
FieldRef prefix("a.b"), base("a.b");
ASSERT_FALSE(prefix.isPrefixOf(base));
+ ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base));
base.parse("a");
ASSERT_FALSE(prefix.isPrefixOf(base));
+ ASSERT_FALSE(prefix.isPrefixOfOrEqualTo(base));
base.parse("b");
ASSERT_FALSE(prefix.isPrefixOf(base));
+ ASSERT_FALSE(prefix.isPrefixOfOrEqualTo(base));
}
TEST(Prefix, EmptyBase) {
@@ -837,4 +851,27 @@ TEST(RemoveLastPartLong, FieldRefFromCopyAssignmentIsADeepCopy) {
ASSERT_EQ(other, FieldRef("a.b.c"));
}
+TEST(NumericPathComponents, CanIdentifyNumericPathComponents) {
+ FieldRef path("a.0.b.1.c");
+ ASSERT(!path.isNumericPathComponent(0));
+ ASSERT(path.isNumericPathComponent(1));
+ ASSERT(!path.isNumericPathComponent(2));
+ ASSERT(path.isNumericPathComponent(3));
+ ASSERT(!path.isNumericPathComponent(4));
+}
+
+TEST(NumericPathComponents, CanObtainAllNumericPathComponents) {
+ FieldRef path("a.0.b.1.c.2.d");
+ std::set<size_t> expectedComponents{size_t(1), size_t(3), size_t(5)};
+ auto numericPathComponents = path.getNumericPathComponents();
+ ASSERT(numericPathComponents == expectedComponents);
+}
+
+TEST(NumericPathComponents, FieldsWithLeadingZeroesAreNotConsideredNumeric) {
+ FieldRef path("a.0.b.01.c.2.d");
+ std::set<size_t> expectedComponents{size_t(1), size_t(5)};
+ auto numericPathComponents = path.getNumericPathComponents();
+ ASSERT(numericPathComponents == expectedComponents);
+}
+
} // namespace
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 4975a56e8d4..3ce7ee94e67 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -24,6 +24,7 @@ env.Library(
"plan_cache_indexability.cpp",
"plan_enumerator.cpp",
"planner_access.cpp",
+ "planner_allpaths_helpers.cpp",
"planner_analysis.cpp",
"planner_ixselect.cpp",
"query_planner.cpp",
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 026efa45ba8..13d45890e72 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/query/expression_index.h"
#include "mongo/db/query/expression_index_knobs.h"
#include "mongo/db/query/indexability.h"
+#include "mongo/db/query/planner_allpaths_helpers.h"
#include "mongo/db/query/planner_ixselect.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/util/log.h"
@@ -56,6 +57,8 @@ namespace mongo {
namespace {
+namespace app = ::mongo::all_paths_planning;
+
// Helper for checking that an OIL "appears" to be ascending given one interval.
void assertOILIsAscendingLocally(const vector<Interval>& intervals, size_t idx) {
// Each individual interval being examined should be ascending or none.
@@ -335,6 +338,22 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
const IndexEntry& index,
OrderedIntervalList* oilOut,
BoundsTightness* tightnessOut) {
+ // Fill out the bounds and tightness appropriate for the given predicate.
+ _translatePredicate(expr, elt, index, oilOut, tightnessOut);
+
+ // Under certain circumstances, queries on a $** index require that the bounds' tightness be
+ // adjusted regardless of the predicate. Having filled out the initial bounds, we apply any
+ // necessary changes to the tightness here.
+ if (index.type == IndexType::INDEX_ALLPATHS) {
+ *tightnessOut = app::applyAllPathsIndexScanBoundsTightness(index, *tightnessOut);
+ }
+}
+
+void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
+ const BSONElement& elt,
+ const IndexEntry& index,
+ OrderedIntervalList* oilOut,
+ BoundsTightness* tightnessOut) {
// We expect that the OIL we are constructing starts out empty.
invariant(oilOut->intervals.empty());
@@ -352,12 +371,12 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
if (MatchExpression::ELEM_MATCH_VALUE == expr->matchType()) {
OrderedIntervalList acc;
- translate(expr->getChild(0), elt, index, &acc, tightnessOut);
+ _translatePredicate(expr->getChild(0), elt, index, &acc, tightnessOut);
for (size_t i = 1; i < expr->numChildren(); ++i) {
OrderedIntervalList next;
BoundsTightness tightness;
- translate(expr->getChild(i), elt, index, &next, &tightness);
+ _translatePredicate(expr->getChild(i), elt, index, &next, &tightness);
intersectize(next, &acc);
}
@@ -395,7 +414,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
return;
}
- translate(child, elt, index, oilOut, tightnessOut);
+ _translatePredicate(child, elt, index, oilOut, tightnessOut);
oilOut->complement();
// Until the index distinguishes between missing values and literal null values, we cannot
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 9e58584e698..92af526dd59 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -201,6 +201,16 @@ public:
bool* startKeyInclusive,
BSONObj* endKey,
bool* endKeyInclusive);
+
+private:
+ /**
+ * Performs the heavy lifting for IndexBoundsBuilder::translate().
+ */
+ static void _translatePredicate(const MatchExpression* expr,
+ const BSONElement& elt,
+ const IndexEntry& index,
+ OrderedIntervalList* oilOut,
+ BoundsTightness* tightnessOut);
};
} // namespace mongo
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 1193aef1772..2be774dafcf 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/index_tag.h"
#include "mongo/db/query/indexability.h"
+#include "mongo/db/query/planner_allpaths_helpers.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_common.h"
@@ -56,6 +57,7 @@ namespace {
using namespace mongo;
+namespace app = ::mongo::all_paths_planning;
namespace dps = ::mongo::dotted_path_support;
/**
@@ -528,6 +530,81 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
tn->indexPrefix = prefixBob.obj();
}
+void QueryPlannerAccess::finishAllPathsIndexScanNode(QuerySolutionNode* node,
+ const IndexEntry& plannerIndex) {
+ // We should only ever reach this point if we are an IndexScanNode for a $** index.
+ invariant(plannerIndex.type == IndexType::INDEX_ALLPATHS);
+ invariant(node && node->getType() == STAGE_IXSCAN);
+
+ // Sanity check the QuerySolutionNode's copy of the IndexEntry.
+ IndexScanNode* scan = static_cast<IndexScanNode*>(node);
+ invariant(scan->index.multikeyPaths.size() == 1);
+ invariant(scan->index == plannerIndex);
+
+ // Obtain some references to make the remainder of this function more legible.
+ auto& bounds = scan->bounds;
+ auto& index = scan->index;
+
+ // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
+ // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
+ // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the
+ // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's
+ // keyPattern and its multikeyPaths vector.
+ invariant(plannerIndex.keyPattern.nFields() == 1);
+ invariant(bounds.fields.size() == 1);
+ invariant(!bounds.fields.front().name.empty());
+ index.multikeyPaths.insert(index.multikeyPaths.begin(), std::set<std::size_t>{});
+ bounds.fields.insert(bounds.fields.begin(), {"$_path"});
+ index.keyPattern =
+ BSON("$_path" << index.keyPattern.firstElement() << index.keyPattern.firstElement());
+
+ // Create a FieldRef to perform any necessary manipulations on the query path string.
+ FieldRef queryPath{plannerIndex.keyPattern.firstElementFieldName()};
+ auto& multikeyPaths = index.multikeyPaths.back();
+
+ // If the bounds are [MinKey,MaxKey] then we must retrieve all documents which include the given
+ // path. We must therefore add bounds that encompass all its subpaths, specifically the interval
+ // ["path.","path/") on "$_path".
+ const bool isMinMaxInclusive = !bounds.fields.back().intervals.empty() &&
+ bounds.fields.back().intervals.front().isMinToMaxInclusive();
+
+ // Helper function to check whether the final path component in 'queryPath' is an array index.
+ const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
+ return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
+ queryPath.isNumericPathComponent(queryPath.numParts() - 1u));
+ };
+
+ // For [MinKey,MaxKey] bounds, we build a range interval on all subpaths of the query path(s).
+ // We must therefore trim any trailing array indices from the query path before generating the
+ // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For
+ // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey,
+ // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']].
+ // The latter two are subsets of the ['a.', 'a/'] interval.
+ while (isMinMaxInclusive && lastFieldIsArrayIndex(queryPath)) {
+ queryPath.removeLastPart();
+ }
+
+ // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
+ // indices in their keys, so if this query traverses one or more multikey fields via an array
+ // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
+ // and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
+ auto paths = app::generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
+
+ // Add a $_path point-interval for each path that needs to be traversed in the index. If the
+ // bounds on these paths are MinKey-MaxKey, then for each applicable path we must add a range
+ // interval on all its subpaths, i.e. ["path.","path/").
+ static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1);
+ auto& pathIntervals = bounds.fields.front().intervals;
+ for (const auto& fieldPath : paths) {
+ auto path = fieldPath.dottedField().toString();
+ pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path));
+ if (isMinMaxInclusive) {
+ pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval(
+ path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly));
+ }
+ }
+}
+
bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) {
if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) {
return false;
@@ -577,19 +654,20 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
const StageType type = node->getType();
if (STAGE_TEXT == type) {
- finishTextNode(node, index);
- return;
+ return finishTextNode(node, index);
}
IndexEntry* nodeIndex = nullptr;
- IndexBounds* bounds = NULL;
+ IndexBounds* bounds = nullptr;
if (STAGE_GEO_NEAR_2D == type) {
GeoNear2DNode* gnode = static_cast<GeoNear2DNode*>(node);
bounds = &gnode->baseBounds;
+ nodeIndex = &gnode->index;
} else if (STAGE_GEO_NEAR_2DSPHERE == type) {
GeoNear2DSphereNode* gnode = static_cast<GeoNear2DSphereNode*>(node);
bounds = &gnode->baseBounds;
+ nodeIndex = &gnode->index;
} else {
verify(type == STAGE_IXSCAN);
IndexScanNode* scan = static_cast<IndexScanNode*>(node);
@@ -597,36 +675,11 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
bounds = &scan->bounds;
}
- // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
- // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
- // field in each key is 'path.to.field'. We push a point-interval on 'path.to.field' into the
- // bounds vector for the leading '$_path' bound here. We also push corresponding fields into the
- // IndexScanNode's keyPattern and its multikeyPaths vector.
+ // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths.
if (index.type == IndexType::INDEX_ALLPATHS) {
- invariant(bounds->fields.size() == 1 && index.keyPattern.nFields() == 1);
- invariant(!bounds->fields.front().name.empty());
- invariant(nodeIndex);
- bounds->fields.insert(bounds->fields.begin(), {"$_path"});
- bounds->fields.front().intervals.push_back(
- IndexBoundsBuilder::makePointInterval(nodeIndex->keyPattern.firstElementFieldName()));
- // If the bounds are [MinKey, MaxKey] then we're querying for any values in the given
- // path. Therefore we must add bounds that allow subpaths (specifically the bound
- // ["path.","path/") on "$_path").
- const auto& intervals = bounds->fields.back().intervals;
- if (!intervals.empty() && intervals.front().isMinToMaxInclusive()) {
- bounds->fields.front().intervals.push_back(IndexBoundsBuilder::makeRangeInterval(
- bounds->fields.back().name + '.',
- bounds->fields.back().name + static_cast<char>('.' + 1),
- BoundInclusion::kIncludeStartKeyOnly));
- }
- nodeIndex->keyPattern = BSON("$_path" << nodeIndex->keyPattern.firstElement()
- << nodeIndex->keyPattern.firstElement());
- nodeIndex->multikeyPaths.insert(nodeIndex->multikeyPaths.begin(), std::set<std::size_t>{});
+ finishAllPathsIndexScanNode(node, index);
}
- // If the QuerySolutionNode's IndexEntry is available, use its keyPattern.
- const auto& keyPattern = (nodeIndex ? nodeIndex->keyPattern : index.keyPattern);
-
// Find the first field in the scan's bounds that was not filled out.
// TODO: could cache this.
size_t firstEmptyField = 0;
@@ -639,12 +692,11 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
// All fields are filled out with bounds, nothing to do.
if (firstEmptyField == bounds->fields.size()) {
- IndexBoundsBuilder::alignBounds(bounds, keyPattern);
- return;
+ return IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern);
}
// Skip ahead to the firstEmptyField-th element, where we begin filling in bounds.
- BSONObjIterator it(keyPattern);
+ BSONObjIterator it(nodeIndex->keyPattern);
for (size_t i = 0; i < firstEmptyField; ++i) {
verify(it.more());
it.next();
@@ -667,7 +719,7 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
// We create bounds assuming a forward direction but can easily reverse bounds to align
// according to our desired direction.
- IndexBoundsBuilder::alignBounds(bounds, keyPattern);
+ IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern);
}
void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node,
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index e2097f2ab62..cf559315f2b 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -388,6 +388,8 @@ private:
static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index);
+ static void finishAllPathsIndexScanNode(QuerySolutionNode* node, const IndexEntry& index);
+
/**
* Add the filter 'match' to the query solution node 'node'. Takes
* ownership of 'match'.
diff --git a/src/mongo/db/query/planner_allpaths_helpers.cpp b/src/mongo/db/query/planner_allpaths_helpers.cpp
new file mode 100644
index 00000000000..b56de29e577
--- /dev/null
+++ b/src/mongo/db/query/planner_allpaths_helpers.cpp
@@ -0,0 +1,249 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/query/planner_allpaths_helpers.h"
+
+#include <vector>
+
+#include "mongo/bson/util/builder.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace all_paths_planning {
+namespace {
+/**
+ * Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array
+ * indices present in the former if they are not present in the latter. The 'multikeyPathComponents'
+ * set contains the path positions that are known to be arrays; only numerical path components that
+ * immediately follow an array field are considered array indices. If 'fieldNameOrArrayIndexPath' is
+ * 'a.0.b', it will match 'staticComparisonPath' 'a.0.b', and it will also match 'a.b' but only if
+ * 'multikeyPathComponents' indicates that 'a' is an array.
+ */
+bool fieldNameOrArrayIndexPathMatches(const FieldRef& fieldNameOrArrayIndexPath,
+ const FieldRef& staticComparisonPath,
+ const std::set<size_t>& multikeyPathComponents) {
+ // Can't be equal if 'staticComparisonPath' has more parts than 'fieldNameOrArrayIndexPath'.
+ if (staticComparisonPath.numParts() > fieldNameOrArrayIndexPath.numParts()) {
+ return false;
+ }
+ size_t offset = 0;
+ for (size_t i = 0; i < fieldNameOrArrayIndexPath.numParts(); ++i) {
+ if (i - offset >= staticComparisonPath.numParts()) {
+ return false;
+ }
+ if (fieldNameOrArrayIndexPath.getPart(i) == staticComparisonPath.getPart(i - offset)) {
+ continue;
+ } else if (multikeyPathComponents.count(i - 1) &&
+ fieldNameOrArrayIndexPath.isNumericPathComponent(i)) {
+ ++offset;
+ continue;
+ }
+ return false;
+ }
+ // Ensure that we matched the entire 'staticComparisonPath' dotted string.
+ return fieldNameOrArrayIndexPath.numParts() == staticComparisonPath.numParts() + offset;
+}
+
+/**
+ * Returns true if 'multikeyPathSet' contains a FieldRef that matches 'pathToLookup' exactly, or
+ * matches 'pathToLookup' when the latter's array indices are ignored.
+ */
+bool fieldNameOrArrayIndexPathSetContains(const std::set<FieldRef>& multikeyPathSet,
+ const std::set<std::size_t>& multikeyPathComponents,
+ const FieldRef& pathToLookup) {
+ // Fast-path check for an exact match. If there is no exact match and 'pathToLookup' has no
+ // numeric path components, then 'multikeyPathSet' does not contain the path.
+ if (multikeyPathSet.count(pathToLookup)) {
+ return true;
+ } else if (!pathToLookup.hasNumericPathComponents()) {
+ return false;
+ }
+ // Determine whether any of the 'multikeyPathSet' entries match 'pathToLookup' under relaxed
+ // fieldname-or-array-index constraints.
+ return std::any_of(
+ multikeyPathSet.begin(), multikeyPathSet.end(), [&](const auto& multikeyPath) {
+ return fieldNameOrArrayIndexPathMatches(
+ pathToLookup, multikeyPath, multikeyPathComponents);
+ });
+}
+
+/**
+ * Returns the positions of all path components in 'queryPath' that may be interpreted as array
+ * indices by the query system. We obtain this list by finding all multikey path components that
+ * have a numerical path component immediately after.
+ */
+std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& multikeyPaths,
+ const FieldRef& queryPath) {
+ std::vector<size_t> arrayIndices;
+ for (auto i : multikeyPaths) {
+ if (i < queryPath.numParts() - 1 && queryPath.isNumericPathComponent(i + 1)) {
+ arrayIndices.push_back(i + 1);
+ }
+ }
+ return arrayIndices;
+}
+
+/**
+ * Returns an std::string of the full dotted field, minus the parts listed in 'skipParts'.
+ */
+FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
+ const std::set<size_t>& skipComponents) {
+ // If 'skipComponents' is empty, we return 'path' immediately.
+ if (skipComponents.empty()) {
+ return path;
+ }
+ StringBuilder ss;
+ size_t startPart = 0;
+ for (const auto& skipPart : skipComponents) {
+ ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
+ << path.dottedSubstring(startPart, skipPart);
+ startPart = skipPart + 1;
+ }
+ if (startPart < path.numParts()) {
+ ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
+ << path.dottedSubstring(startPart, path.numParts());
+ }
+ return FieldRef{ss.str()};
+}
+} // namespace
+
+MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry(
+ const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) {
+ FieldRef pathToLookup;
+ std::set<std::size_t> multikeyPaths;
+ for (size_t i = 0; i < indexedPath.numParts(); ++i) {
+ pathToLookup.appendPart(indexedPath.getPart(i));
+ if (fieldNameOrArrayIndexPathSetContains(multikeyPathSet, multikeyPaths, pathToLookup)) {
+ multikeyPaths.insert(i);
+ }
+ }
+ return {multikeyPaths};
+}
+
+std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
+ const FieldRef& queryPath) {
+ // We iterate over the power set of array index positions to generate all necessary paths.
+ // The algorithm is unavoidably O(n2^n), but we enforce that 'n' is never more than single
+ // digits during the planner's index selection phase.
+ const auto potentialArrayIndices = findArrayIndexPathComponents(multikeyPaths, queryPath);
+ invariant(potentialArrayIndices.size() <= kAllPathsMaxArrayIndexTraversalDepth);
+ invariant(potentialArrayIndices.size() < sizeof(size_t) * 8u);
+ // We iterate over every value [0..2^n), where 'n' is the size of 'potentialArrayIndices',
+ // treating each value as a 'bitMask' of 'n' bits. Each bit in 'bitMask' represents the
+ // entry at the equivalent position in the 'potentialArrayIndices' vector. When a given bit
+ // is set, we treat the corresponding numeric path component as an array index, and generate
+ // a path that omits it. When a bit is not set, we treat the numeric path component as a
+ // literal fieldname, and we generate a path that includes it. Because we iterate over every
+ // value [0..2^n), we ensure that we generate every combination of 'n' bits, and therefore
+ // every possible fieldname and array index path.
+ std::set<FieldRef> paths;
+ for (size_t bitMask = 0; bitMask < (size_t{1} << potentialArrayIndices.size()); ++bitMask) {
+ std::set<size_t> arrayIndicesToSkip;
+ for (size_t i = 0; i < potentialArrayIndices.size(); ++i) {
+ if (bitMask & (size_t{1} << i)) {
+ arrayIndicesToSkip.insert(potentialArrayIndices[i]);
+ }
+ }
+ paths.insert(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip));
+ }
+ return paths;
+}
+
+BoundsTightness applyAllPathsIndexScanBoundsTightness(const IndexEntry& index,
+ BoundsTightness tightnessIn) {
+ // This method should only ever be called for a $** IndexEntry. We expect to be called during
+ // planning, *before* finishAllPathsIndexScanNode has been invoked. The IndexEntry should thus
+ // only have a single keyPattern field and multikeyPath entry, but this is sufficient to
+ // determine whether it will be necessary to adjust the tightness.
+ invariant(index.type == IndexType::INDEX_ALLPATHS);
+ invariant(index.keyPattern.nFields() == 1);
+ invariant(index.multikeyPaths.size() == 1);
+
+ // If the tightness is already INEXACT_FETCH, any further changes are redundant.
+ if (tightnessIn == BoundsTightness::INEXACT_FETCH) {
+ return tightnessIn;
+ }
+
+ // If the query passes through any array indices, we must always fetch and filter the documents.
+ const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents(
+ index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()});
+
+ // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH.
+ return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH);
+}
+
+bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
+ const std::set<FieldRef>& includedPaths,
+ const FieldRef& queryPath) {
+ // $** multikeyPaths always have a singleton set, since they are single-element indexes.
+ invariant(multikeyPaths.size() == 1);
+
+ // Find the positions of all multikey path components in 'queryPath' that have a numerical path
+ // component immediately after. For a queryPath of 'a.2.b' this will return position 0; that is,
+ // 'a'. If no such multikey path was found, we are clear to proceed with planning.
+ const auto arrayIndices = findArrayIndexPathComponents(multikeyPaths.front(), queryPath);
+ if (arrayIndices.empty()) {
+ return true;
+ }
+ // To support $** fieldname-or-array-index semantics, the planner must generate the power set of
+ // all paths with and without array indices. Because this is O(2^n), we decline to answer
+ // queries that traverse more than 8 levels of array indices.
+ if (arrayIndices.size() > kAllPathsMaxArrayIndexTraversalDepth) {
+ LOG(2) << "Declining to answer query on field '" << queryPath.dottedField()
+ << "' with $** index, as it traverses through more than "
+ << kAllPathsMaxArrayIndexTraversalDepth << " nested array indices.";
+ return false;
+ }
+ // If 'includedPaths' is empty, then either the $** projection is an exclusion, or no explicit
+ // projection was provided. In either case, it is not possible for the query path to lie along
+ // an array index projection, and so we are safe to proceed with planning.
+ if (includedPaths.empty()) {
+ return true;
+ }
+ // Find the $** projected field which prefixes or is equal to the query path. If 'includedPaths'
+ // is non-empty then we are guaranteed that exactly one entry will prefix the query path, since
+ // (a) if no such inclusion exists, an IndexEntry would not have been created for this path, and
+ // (b) conflicting paths, such as 'a.b' and 'a.b.c', are not permitted in projections.
+ auto includePath = std::find_if(
+ includedPaths.begin(), includedPaths.end(), [&queryPath](const auto& includedPath) {
+ return includedPath.isPrefixOfOrEqualTo(queryPath);
+ });
+ invariant(std::next(includePath) == includedPaths.end() || *std::next(includePath) > queryPath);
+
+ // If the projectedPath responsible for including this queryPath prefixes it up to and including
+ // the numerical array index field, then the queryPath lies along a projection through the array
+ // index, and we cannot support the query for the reasons outlined above.
+ return arrayIndices[0] >= includePath->numParts();
+}
+
+} // namespace all_paths_planning
+} // namespace mongo
diff --git a/src/mongo/db/query/planner_allpaths_helpers.h b/src/mongo/db/query/planner_allpaths_helpers.h
new file mode 100644
index 00000000000..4bbb333dead
--- /dev/null
+++ b/src/mongo/db/query/planner_allpaths_helpers.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+#include <set>
+
+#include "mongo/db/field_ref.h"
+#include "mongo/db/index/multikey_paths.h"
+#include "mongo/db/query/index_bounds_builder.h"
+#include "mongo/db/query/query_solution.h"
+
+namespace mongo {
+namespace all_paths_planning {
+
+using BoundsTightness = IndexBoundsBuilder::BoundsTightness;
+
+/**
+ * Specifies the maximum depth of nested array indices through which a query may traverse before a
+ * $** declines to answer it, due to the exponential complexity of the bounds required.
+ */
+static constexpr size_t kAllPathsMaxArrayIndexTraversalDepth = 8u;
+
+/**
+ * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
+ * looking up multikeyness in 'multikeyPathSet'.
+ */
+MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry(
+ const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet);
+
+/**
+ * Given a query path and the set of known multikey path components, this function outputs the power
+ * set of paths generated by considering each relevant numerical path component first as a literal
+ * fieldname, and then as an array index. In other words, for each numerical path component that
+ * immediately follows a multikey field, we will output one path with the numerical component and
+ * one path without. If both 'a' and 'b' are multikey, then the queryPath 'a.0.b.1.c' will produce
+ * ['a.0.b.1.c', 'a.0.b.c', 'a.b.1.c', 'a.b.c'].
+ */
+std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
+ const FieldRef& queryPath);
+
+/**
+ * In certain circumstances, it is necessary to adjust the tightness of the bounds generated by the
+ * planner for $** indexes. For instance, if the query traverses through one or more arrays via
+ * specific indices, then we must enforce INEXACT_FETCH to ensure correctness, regardless of the
+ * predicate. Given an IndexEntry representing an expanded $** index, we apply any necessary changes
+ * to the tightness here.
+ */
+BoundsTightness applyAllPathsIndexScanBoundsTightness(const IndexEntry& index,
+ BoundsTightness tightnessIn);
+
+/**
+ * Returns false if 'queryPath' includes any numerical path components which render it unanswerable
+ * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either
+ * of the following scenarios occur:
+ *
+ * - The query path traverses through more than 'kAllPathsMaxArrayIndexTraversalDepth' nested arrays
+ * via explicit array indices.
+ * - The query path lies along a $** projection through an array index.
+ *
+ * For an example of the latter case, say that our query path is 'a.0.b', our projection includes
+ * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or
+ * array index against the documents, but because the $** index always projects numeric path
+ * components strictly as field names, the projection {'a.0': 1} cannot correctly support this
+ * query.
+ *
+ * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but
+ * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of
+ * the values [1, 2, 3] for 'a'.
+ */
+bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
+ const std::set<FieldRef>& includedPaths,
+ const FieldRef& queryPath);
+
+} // namespace all_paths_planning
+} // namespace mongo
diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp
index 4ff04cd959a..8dd796ac18c 100644
--- a/src/mongo/db/query/planner_ixselect.cpp
+++ b/src/mongo/db/query/planner_ixselect.cpp
@@ -44,6 +44,7 @@
#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/index_tag.h"
#include "mongo/db/query/indexability.h"
+#include "mongo/db/query/planner_allpaths_helpers.h"
#include "mongo/db/query/query_planner_common.h"
#include "mongo/util/log.h"
@@ -51,29 +52,14 @@ namespace mongo {
namespace {
+namespace app = ::mongo::all_paths_planning;
+
std::size_t numPathComponents(StringData path) {
return FieldRef{path}.numParts();
}
/**
- * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
- * looking up multikeyness in 'multikeyPathSet'.
- */
-MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry(
- const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) {
- FieldRef pathToLookup;
- std::set<std::size_t> multikeyPathComponents;
- for (size_t i = 0; i < indexedPath.numParts(); ++i) {
- pathToLookup.appendPart(indexedPath.getPart(i));
- if (multikeyPathSet.count(pathToLookup)) {
- multikeyPathComponents.insert(i);
- }
- }
- return {multikeyPathComponents};
-}
-
-/**
- * Given a single allPaths index, and a set of fields which are being queried, create a virtual
+ * Given a single allPaths index, and a set of fields which are being queried, create 'mock'
* IndexEntry for each of the appropriate fields.
*/
void expandIndex(const IndexEntry& allPathsIndex,
@@ -94,15 +80,25 @@ void expandIndex(const IndexEntry& allPathsIndex,
const auto projectedFields = projExec->applyProjectionToFields(fields);
+ const auto& includedPaths = projExec->getExhaustivePaths();
+
out->reserve(out->size() + projectedFields.size());
for (auto&& fieldName : projectedFields) {
+ // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks.
+ const auto queryPath = FieldRef{fieldName};
// $** indices hold multikey metadata directly in the index keys, rather than in the index
// catalog. In turn, the index key data is used to produce a set of multikey paths
// in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector
// which will indicate to the downstream planning code which components of 'fieldName' are
// multikey.
- auto multikeyPaths = buildMultiKeyPathsForExpandedAllPathsIndexEntry(
- FieldRef{fieldName}, allPathsIndex.multikeyPathSet);
+ auto multikeyPaths = app::buildMultiKeyPathsForExpandedAllPathsIndexEntry(
+ queryPath, allPathsIndex.multikeyPathSet);
+
+ // Check whether a query on the current fieldpath is answerable by the $** index, given any
+ // numerical path components that may be present in the path string.
+ if (!app::validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) {
+ continue;
+ }
// The expanded IndexEntry is only considered multikey if the particular path represented by
// this IndexEntry has a multikey path component. For instance, suppose we have index {$**:
diff --git a/src/mongo/db/query/query_planner_all_paths_index_test.cpp b/src/mongo/db/query/query_planner_all_paths_index_test.cpp
index 66b6dd8eeac..0f8aa3830e9 100644
--- a/src/mongo/db/query/query_planner_all_paths_index_test.cpp
+++ b/src/mongo/db/query/query_planner_all_paths_index_test.cpp
@@ -151,7 +151,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverMultikeyFieldNoElemMatch)
addAllPathsIndex(BSON("$**" << 1), {"a"});
runQuery(fromjson("{a: {$gt: 0, $lt: 9}}"));
- ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertNumSolutions(2U);
assertSolutionExists(
"{fetch: {filter: {a: {$gt: 0}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, a: 1},"
@@ -166,7 +166,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverMultikeyFieldWithElemMatc
addAllPathsIndex(BSON("$**" << 1), {"a"});
runQuery(fromjson("{a: {$elemMatch: {$gt: 0, $lt: 9}}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {a: {$elemMatch: {$gt: 0, $lt: 9}}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, a: 1},"
@@ -177,7 +177,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNonMultikeyFieldWithMulti
addAllPathsIndex(BSON("$**" << 1), {"b"});
runQuery(fromjson("{a: {$gt: 0, $lt: 9}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, a: 1},"
"bounds: {'$_path': [['a','a',true,true]], a: [[0,9,false,false]]}}}}}");
@@ -187,7 +187,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithFirstCompo
addAllPathsIndex(BSON("$**" << 1), {"a"});
runQuery(fromjson("{'a.b': {$gt: 0, $lt: 9}}"));
- ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertNumSolutions(2U);
assertSolutionExists(
"{fetch: {filter: {'a.b': {$gt: 0}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1},"
@@ -202,7 +202,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithElemMatchO
addAllPathsIndex(BSON("$**" << 1), {"a"});
runQuery(fromjson("{a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1},"
@@ -214,7 +214,7 @@ TEST_F(QueryPlannerAllPathsTest,
addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"});
runQuery(fromjson("{a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}"));
- ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertNumSolutions(2U);
assertSolutionExists(
"{fetch: {filter: {a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1},"
@@ -229,7 +229,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithTwoElemMat
addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"});
runQuery(fromjson("{a: {$elemMatch: {b: {$elemMatch: {$gt: 0, $lt: 9}}}}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {a: {$elemMatch: {b: {$elemMatch: {$gt: 0, $lt: 9}}}}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1},"
@@ -240,7 +240,7 @@ TEST_F(QueryPlannerAllPathsTest, ElemMatchOnInnermostMultikeyPathPermitsTightBou
addAllPathsIndex(BSON("$**" << 1), {"a", "a.b", "a.b.c"});
runQuery(fromjson("{'a.b.c': {$elemMatch: {'d.e.f': {$gt: 0, $lt: 9}}}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {'a.b.c': {$elemMatch: {'d.e.f': {$gt: 0, $lt: 9}}}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b.c.d.e.f': 1},"
@@ -253,7 +253,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPredsEligibleForIndexUseGenerateCandidatePla
runQuery(
fromjson("{'a.b': {$gt: 0, $lt: 9}, 'a.c': {$gt: 11, $lt: 20}, d: {$gt: 31, $lt: 40}}"));
- ASSERT_EQUALS(getNumSolutions(), 4U);
+ assertNumSolutions(4U);
assertSolutionExists(
"{fetch: {filter: {'a.b':{$gt:0,$lt: 9},'a.c':{$gt:11},d:{$gt:31,$lt:40}}, node: "
"{ixscan: {filter: null, pattern: {'$_path': 1, 'a.c': 1},"
@@ -678,7 +678,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect
// Run a point query on both fields and confirm that an AND_SORTED plan is generated.
runQuery(fromjson("{a:10, b:10}"));
// Three plans are generated: one IXSCAN for each index, and an AND_SORTED on both.
- ASSERT_EQUALS(getNumSolutions(), 3U);
+ assertNumSolutions(3U);
assertSolutionExists(
"{fetch: {filter: {a:10}, node: {ixscan: {filter: null, pattern: {b:1}}}}}");
assertSolutionExists(
@@ -690,7 +690,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect
// Run a range query on both fields and confirm that an AND_HASH plan is generated.
runQuery(fromjson("{a:{$gt: 10}, b:{$gt: 10}}"));
// Three plans are generated: one IXSCAN for each index, and an AND_HASH on both.
- ASSERT_EQUALS(getNumSolutions(), 3U);
+ assertNumSolutions(3U);
assertSolutionExists(
"{fetch: {filter: {a:{$gt: 10}}, node: {ixscan: {filter: null, pattern: {b:1}}}}}");
assertSolutionExists(
@@ -705,7 +705,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect
// First re-run the AND_SORTED test.
runQuery(fromjson("{a:10, b:10}"));
// Solution count has increased from 3 to 5, as $** 'duplicates' the {a:1} and {b:1} IXSCANS.
- ASSERT_EQUALS(getNumSolutions(), 5U);
+ assertNumSolutions(5U);
assertSolutionExists(
"{fetch: {filter: {a:10}, node: {ixscan: {filter: null, pattern: {b:1}}}}}");
assertSolutionExists(
@@ -723,7 +723,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect
// Now re-run the AND_HASH test.
runQuery(fromjson("{a:{$gt: 10}, b:{$gt: 10}}"));
// Solution count has increased from 3 to 5, as $** 'duplicates' the {a:1} and {b:1} IXSCANS.
- ASSERT_EQUALS(getNumSolutions(), 5U);
+ assertNumSolutions(5U);
assertSolutionExists(
"{fetch: {filter: {a:{$gt:10}}, node: {ixscan: {filter: null, pattern: {b:1}}}}}");
assertSolutionExists(
@@ -753,7 +753,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotSupplyCandidatePlanForTextS
// Confirm that the allPaths index generates candidate plans for queries which do not include a
// $text predicate.
runQuery(fromjson("{a: 10, b: 10}"));
- ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertNumSolutions(2U);
assertSolutionExists(
"{fetch: {filter: {b: 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, a: 1}}}}}");
assertSolutionExists(
@@ -762,7 +762,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotSupplyCandidatePlanForTextS
// Confirm that the allPaths index does not produce any candidate plans when a query includes a
// $text predicate, even for non-$text predicates which may be present in the query.
runQuery(fromjson("{a: 10, b: 10, $text: {$search: 'banana'}}"));
- ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {b: 10}, node: {text: {prefix: {a: 10}, search: 'banana'}}}}");
}
@@ -1094,4 +1094,341 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexMustUseBlockingSortWhenFieldIsNotI
// TODO SERVER-36517: Add testing for DISTINCT_SCAN.
// TODO SERVER-35331: Add testing for hints.
+//
+// Field name or array index tests.
+//
+
+TEST_F(QueryPlannerAllPathsTest, CannotAnswerQueryThroughArrayIndexProjection) {
+ addAllPathsIndex(BSON("$**" << 1), {"a"}, fromjson("{'a.0': 1, 'a.b': 1}"));
+
+ // Queries whose path lies along a projected array index cannot be answered.
+ runQuery(fromjson("{'a.0': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+
+ runQuery(fromjson("{'a.0.b': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, CanAnswerQueryOnNonProjectedArrayIndex) {
+ addAllPathsIndex(BSON("$**" << 1), {"a", "c"}, fromjson("{'a.0': 1, b: 1, c: 1}"));
+
+ // Queries on fields that are not projected array indices can be answered, even when such a
+ // projection is present in the 'starPathsTempName' spec.
+ runQuery(fromjson("{b: 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, b: 1}, "
+ "bounds: {'$_path': [['b','b',true,true]], b: [[10,10,true,true]]}}}}}");
+
+ // Queries on indices of arrays that have been projected in their entirety can be answered.
+ runQuery(fromjson("{'c.0': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'c.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'c.0': "
+ "1}, bounds: {'$_path': [['c','c',true,true], ['c.0','c.0',true,true]], 'c.0': "
+ "[[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldGenerateAllPotentialPathBoundsForArrayIndexQueries) {
+ addAllPathsIndex(BSON("a.$**" << 1), {"a", "a.b"});
+
+ // A numeric path component immediately following a multikey path may represent an array index,
+ // a fieldname, or both. The $** index does not record array indices explicitly, so for all such
+ // potential array indices in the query path, we generate bounds that both include and exclude
+ // the numeric path components.
+ runQuery(fromjson("{'a.0': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0': "
+ "1}, bounds: {'$_path': [['a','a',true,true], ['a.0','a.0',true,true]], 'a.0': "
+ "[[10,10,true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], ['a.b','a.b',true,true]], "
+ "'a.0.b': [[10,10,true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b.1.c': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'a.0.b.1.c': 1}, bounds: {'$_path': [['a.0.b.1.c','a.0.b.1.c',true,true], "
+ "['a.0.b.c','a.0.b.c',true,true], ['a.b.1.c','a.b.1.c',true,true], "
+ "['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': [[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldAddAllPredicatesToFetchFilterForArrayIndexQueries) {
+ addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"});
+
+ // Ordinarily, a query such as {'a': {$gt: 0, $lt: 5}} on a multikey field 'a' produces an EXACT
+ // IXSCAN on one of the predicates and a FETCH filter on the other. For fieldname-or-array-index
+ // queries on 'a.0' or similar, however, *all* predicates must be added to the FETCH filter to
+ // ensure correctness.
+ runQuery(fromjson("{'a.0': {$gt: 10, $lt: 20}}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'a.0': 1}, bounds: {'$_path': [['a','a',true,true], "
+ "['a.0','a.0',true,true]], 'a.0': [[-inf,20,true,false]]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'a.0': 1}, bounds: {'$_path': [['a','a',true,true], "
+ "['a.0','a.0',true,true]], 'a.0': [[10,inf,false,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}, node: {ixscan: {filter: null, "
+ "pattern: {'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
+ "['a.b','a.b',true,true]], 'a.0.b': [[-inf,20.0,true,true]]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}, node: {ixscan: {filter: null, "
+ "pattern: {'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
+ "['a.b','a.b',true,true]], 'a.0.b': [[10,inf,true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b.1.c': {$in: [10, 20, 30]}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': {$in: [10, 20, 30]}}, node: {ixscan: {filter: null, "
+ "pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': "
+ "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], "
+ "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': "
+ "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}"));
+ assertNumSolutions(3U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, "
+ "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': "
+ "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], "
+ "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': "
+ "[[-inf,30,true,true]]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, "
+ "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': "
+ "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], "
+ "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': "
+ "[[10,inf,true,true]]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, "
+ "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.1.b.0.c': 1}, bounds: {'$_path': "
+ "[['a.1.b.0.c','a.1.b.0.c',true,true], ['a.1.b.c','a.1.b.c',true,true], "
+ "['a.b.0.c','a.b.0.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.1.b.0.c': "
+ "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}");
+
+ // Finally, confirm that queries which include a numeric path component on a field that is *not*
+ // multikey produce EXACT bounds and do *not* add their predicates to the filter.
+ runQuery(fromjson("{'d.0': {$gt: 10, $lt: 20}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'd.0': 1}, "
+ "bounds: {'$_path': [['d.0','d.0',true,true]], 'd.0': [[10,20,false,false]]}}}}}");
+
+ // Here, in contrast to the multikey case, we generate one indexed solution for each predicate
+ // and only filter the *other* predicate in the FETCH stage.
+ runQuery(fromjson("{'d.0.e': {$gt: 10, $lt: 20}, 'd.1.f': {$in: [10, 20, 30]}}"));
+ assertNumSolutions(2U);
+ assertSolutionExists(
+ "{fetch: {filter: {'d.0.e': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'd.1.f': 1}, bounds: {'$_path': [['d.1.f','d.1.f',true,true]], 'd.1.f': "
+ "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: {'d.1.f': {$in: [10, 20, 30]}}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'd.0.e': 1}, bounds: {'$_path': [['d.0.e','d.0.e',true,true]], 'd.0.e': "
+ "[[10,20,false,false]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldOnlyBuildSpecialBoundsForMultikeyPaths) {
+ addAllPathsIndex(BSON("a.$**" << 1), {"a.b", "a.b.c.d"});
+
+ // 'a' is not multikey, so 'a.0' refers specifically to a fieldname rather than an array index,
+ // and special bounds will not be generated.
+ runQuery(fromjson("{'a.0': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0': 1}, "
+ "bounds: {'$_path': [['a.0','a.0',true,true]], 'a.0': [[10,10,true,true]]}}}}}");
+
+ // 'a' is not multikey, so 'a.0.b' does not refer to multikey path 'a.b' and 'b.1' is therefore
+ // also strictly a fieldname. Special bounds will not be generated for either.
+ runQuery(fromjson("{'a.0.b.1': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1': "
+ "1}, bounds: {'$_path': [['a.0.b.1','a.0.b.1',true,true]], 'a.0.b.1': "
+ "[[10,10,true,true]]}}}}}");
+
+ // 'a.b' is multikey but 'a.b.c' is not, so special bounds will only be generated for 'b.1'.
+ runQuery(fromjson("{'a.b.1.c.2.d': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.1.c.2.d': 10}, node: {ixscan: {filter: null, pattern: {'$_path': "
+ "1, 'a.b.1.c.2.d': 1}, bounds: {'$_path': [['a.b.1.c.2.d','a.b.1.c.2.d',true,true], "
+ "['a.b.c.2.d','a.b.c.2.d',true,true]], 'a.b.1.c.2.d': [[10,10,true,true]]}}}}}");
+
+ // 'a.b' is multikey but 'a.b.c' is not. 'a.b.c.2.d' therefore refers to a different path than
+ // multikey path 'a.[b].c.[d]', so special bounds will be generated for 'b.1' but not for 'd.3'.
+ runQuery(fromjson("{'a.b.1.c.2.d.3.e': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.1.c.2.d.3.e': 10}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'a.b.1.c.2.d.3.e': 1}, bounds: "
+ "{'$_path':[['a.b.1.c.2.d.3.e','a.b.1.c.2.d.3.e',true,true], "
+ "['a.b.c.2.d.3.e','a.b.c.2.d.3.e',true,true]], 'a.b.1.c.2.d.3.e':[[10,10,true,true]]}}}}}");
+
+ // 'a.b' and 'a.b.c.d' are both multikey. Special bounds will be generated for 'b.1' and 'd.3'.
+ runQuery(fromjson("{'a.b.1.c.d.3.e': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.1.c.d.3.e': 10}, node: {ixscan: {filter: null, pattern: {'$_path': "
+ "1, 'a.b.1.c.d.3.e': 1}, bounds: {'$_path':[['a.b.1.c.d.3.e','a.b.1.c.d.3.e',true,true], "
+ "['a.b.1.c.d.e','a.b.1.c.d.e',true,true], ['a.b.c.d.3.e','a.b.c.d.3.e',true,true], "
+ "['a.b.c.d.e','a.b.c.d.e',true,true]], 'a.b.1.c.d.3.e':[[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldGenerateSpecialBoundsForNumericMultikeyPaths) {
+ addAllPathsIndex(BSON("a.$**" << 1), {"a.b", "a.b.0"});
+
+ // 'a.b.0' is itself a multikey path, but since 'a.b' is multikey 'b.0' may refer to an array
+ // element of 'b'. We generate special bounds for 'b.0'.
+ runQuery(fromjson("{'a.b.0': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'a.b.0': 1}, bounds: {'$_path': [['a.b','a.b',true,true], ['a.b.0','a.b.0',true,true]], "
+ "'a.b.0': [[10,10,true,true]]}}}}}");
+
+ // 'a.b' and 'a.b.0' are both multikey paths; we generate special bounds for 'b.0' and '0.1'.
+ runQuery(fromjson("{'a.b.0.1.c': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.0.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'a.b.0.1.c': 1}, bounds: {'$_path': [['a.b.0.1.c','a.b.0.1.c',true,true], "
+ "['a.b.0.c','a.b.0.c',true,true], ['a.b.1.c','a.b.1.c',true,true], "
+ "['a.b.c','a.b.c',true,true]], 'a.b.0.1.c': [[10,10,true,true]]}}}}}");
+
+ // 'a.b' is multikey but 'a.b.1' is not, so we only generate special bounds for 'b.1'.
+ runQuery(fromjson("{'a.b.1.1.c': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.b.1.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'a.b.1.1.c': 1}, bounds: {'$_path': [['a.b.1.1.c','a.b.1.1.c',true,true], "
+ "['a.b.1.c','a.b.1.c',true,true]], 'a.b.1.1.c': [[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldNotGenerateSpecialBoundsForFieldNamesWithLeadingZeroes) {
+ addAllPathsIndex(BSON("a.$**" << 1), {"a.b"});
+
+ // 'a.b' is multikey, but '01' is not eligible for consideration as a numeric array index; it is
+ // always strictly a field name. We therefore do not generate special bounds.
+ runQuery(fromjson("{'a.b.01': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.b.01': 1}, "
+ "bounds: {'$_path': [['a.b.01','a.b.01',true,true]], 'a.b.01': [[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, InitialNumericPathComponentIsAlwaysFieldName) {
+ addAllPathsIndex(BSON("$**" << 1), {"0"}, fromjson("{'0': 1}"));
+
+ // '0' is multikey, but the first component in a path can never be an array index since it must
+ // always be a field name. We therefore do not generate special bounds for '0.b'.
+ runQuery(fromjson("{'0.b': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, '0.b': 1}, "
+ "bounds: {'$_path': [['0.b','0.b',true,true]], '0.b': [[10,10,true,true]]}}}}}");
+
+ // '0' is multikey, so a query on '0.1.b' will generate special bounds for '1' but not for '0'.
+ runQuery(fromjson("{'0.1.b': 10}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'0.1.b': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, "
+ "'0.1.b': 1}, bounds: {'$_path': [['0.1.b','0.1.b',true,true], ['0.b','0.b',true,true]], "
+ "'0.1.b': [[10,10,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldGenerateSpecialBoundsForNullAndExistenceQueries) {
+ addAllPathsIndex(BSON("a.$**" << 1), {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4"});
+
+ runQuery(fromjson("{'a.0.b': {$exists: true}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b': {$exists: true}}, node: {ixscan: {filter: null, pattern: "
+ "{'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
+ "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], "
+ "'a.0.b': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b.1.c': {$exists: true, $eq: null}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1.c': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, "
+ "pattern:{'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': "
+ "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.1.c.','a.0.b.1.c/',true,false], "
+ "['a.0.b.c','a.0.b.c',true,true], ['a.0.b.c.','a.0.b.c/',true,false], "
+ "['a.b.1.c','a.b.1.c',true,true], ['a.b.1.c.','a.b.1.c/',true,false], "
+ "['a.b.c','a.b.c',true,true], ['a.b.c.','a.b.c/',true,false]], 'a.0.b.1.c': [[{$minKey: "
+ "1},{$maxKey: 1},true,true]]}}}}}");
+
+ // Confirm that any trailing array index fields are trimmed before the fieldname-or-array-index
+ // pathset is generated, such that the subpath bounds do not overlap.
+ runQuery(fromjson("{'a.0.b.1': {$exists: true, $eq: null}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.1': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, "
+ "pattern:{'$_path': 1, 'a.0.b.1': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
+ "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], "
+ "'a.0.b.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
+
+ runQuery(fromjson("{'a.0.b.2.3.4': {$exists: true, $eq: null}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {'a.0.b.2.3.4': {$exists: true, $eq: null}}, node: {ixscan: {filter:null,"
+ "pattern:{'$_path': 1, 'a.0.b.2.3.4': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
+ "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], "
+ "'a.0.b.2.3.4': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerAllPathsTest, ShouldDeclineToAnswerQueriesThatTraverseTooManyArrays) {
+ addAllPathsIndex(BSON("$**" << 1),
+ {"a",
+ "a.b",
+ "a.b.c",
+ "a.b.c.d",
+ "a.b.c.d.e",
+ "a.b.c.d.e.f",
+ "a.b.c.d.e.f.g",
+ "a.b.c.d.e.f.g.h",
+ "a.b.c.d.e.f.g.h.i",
+ "a.b.c.d.e.f.g.h.i.j",
+ "a.b.c.d.e.f.g.h.i.j.k"});
+
+ // We can query through 8 levels of arrays via explicit array indices...
+ runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8': 1}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {node: {ixscan: {pattern: {$_path: 1, 'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8': 1}}}}}");
+
+ // ... but after that, we decline to answer the query using the $** index.
+ runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.9': 1}"));
+ assertNumSolutions(1U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+
+ // However, we continue to allow implicit (non-indices) traversal of arrays to any depth.
+ runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.j.k': 1}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch:{node:{ixscan:{pattern:{$_path:1, 'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.j.k': 1}}}}}");
+}
+
+// TODO SERVER-35335: Add testing for Min/Max.
+// TODO SERVER-36517: Add testing for DISTINCT_SCAN.
+// TODO SERVER-35336: Add testing for partialFilterExpression.
+// TODO SERVER-35331: Add testing for hints.
+// TODO SERVER-36145: Add testing for non-blocking sort.
+
} // namespace mongo