diff options
-rw-r--r-- | jstests/noPassthroughWithMongod/all_paths_index_multikey.js | 146 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_info_cache_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_exec_agg.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_exec_agg.h | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_exec_agg_test.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/field_ref.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/field_ref.h | 29 | ||||
-rw-r--r-- | src/mongo/db/field_ref_test.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/query/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 25 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 120 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_allpaths_helpers.cpp | 249 | ||||
-rw-r--r-- | src/mongo/db/query/planner_allpaths_helpers.h | 101 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_all_paths_index_test.cpp | 367 |
17 files changed, 1099 insertions, 100 deletions
diff --git a/jstests/noPassthroughWithMongod/all_paths_index_multikey.js b/jstests/noPassthroughWithMongod/all_paths_index_multikey.js index 81ba7aa7b62..375bc704ef4 100644 --- a/jstests/noPassthroughWithMongod/all_paths_index_multikey.js +++ b/jstests/noPassthroughWithMongod/all_paths_index_multikey.js @@ -69,25 +69,38 @@ for (let op of operationList) { for (let path of pathList) { const query = {[path]: op.expression}; - // Explain the query, and determine whether an indexed solution is available. - const explain = assert.commandWorked(coll.find(query).explain()); - const ixScans = getPlanStages(explain.queryPlanner.winningPlan, "IXSCAN"); - // If we expect the current path to have been excluded based on the $** keyPattern - // or projection, confirm that no indexed solution was found. - if (!expectedPaths.includes(path)) { - assert.eq(ixScans.length, 0, path); - continue; - } - // Verify that the winning plan uses the $** index with the expected path. - assert.eq(ixScans.length, 1); - assert.docEq(ixScans[0].keyPattern, {"$_path": 1, [path]: 1}); - // Verify that the results obtained from the $** index are identical to a COLLSCAN. - assertArrayEq(coll.find(query).toArray(), - coll.find(query).hint({$natural: 1}).toArray()); + assertAllPathsQuery(query, expectedPaths.includes(path) ? path : null); } } } + // Runs a single allPaths query test. If 'expectedPath' is non-null, verifies that there is an + // indexed solution that uses the $** index with the given path string. If 'expectedPath' is + // null, verifies that no indexed solution was found. If 'explainStats' is non-empty, verifies + // that the query's explain output reflects the given stats. + function assertAllPathsQuery(query, expectedPath, explainStats = {}) { + // Explain the query, and determine whether an indexed solution is available. + const explainOutput = coll.find(query).explain("executionStats"); + const ixScans = getPlanStages(explainOutput.queryPlanner.winningPlan, "IXSCAN"); + // If we expect the current path to have been excluded based on the $** keyPattern + // or projection, confirm that no indexed solution was found. + if (!expectedPath) { + assert.eq(ixScans.length, 0); + return; + } + // Verify that the winning plan uses the $** index with the expected path. + assert.eq(ixScans.length, 1); + assert.docEq(ixScans[0].keyPattern, {"$_path": 1, [expectedPath]: 1}); + // Verify that the results obtained from the $** index are identical to a COLLSCAN. + assertArrayEq(coll.find(query).toArray(), coll.find(query).hint({$natural: 1}).toArray()); + // Verify that the explain output reflects the given 'explainStats'. + for (let stat in explainStats) { + assert.eq(explainStats[stat], + stat.split('.').reduce((obj, i) => obj[i], explainOutput), + explainOutput); + } + } + // Required in order to build $** indexes. assert.commandWorked( db.adminCommand({setParameter: 1, internalQueryAllowAllPathsIndexes: true})); @@ -128,6 +141,109 @@ coll.find({"b.c.d": {$elemMatch: {"e.f": {$gt: 0, $lt: 9}}}}) .hint({$natural: 1}) .itcount()); + + // Fieldname-or-array-index query tests. + assert(coll.drop()); + assert.commandWorked(coll.createIndex({"$**": 1})); + + // Insert some documents that exhibit a mix of numeric fieldnames and array indices. + assert.commandWorked(coll.insert({_id: 1, a: [{b: [{c: 1}]}]})); + assert.commandWorked(coll.insert({_id: 2, a: [{b: [{c: 0}, {c: 1}]}]})); + assert.commandWorked(coll.insert({_id: 3, a: {'0': [{b: {'1': {c: 1}}}, {d: 1}]}})); + assert.commandWorked(coll.insert({_id: 4, a: [{b: [{1: {c: 1}}]}]})); + assert.commandWorked( + coll.insert({_id: 5, a: [{b: [{'1': {c: {'2': {d: [0, 1, 2, 3, {e: 1}]}}}}]}]})); + + /* + * Multikey Metadata Keys: + * {'': 1, '': 'a'} + * {'': 1, '': 'a.0'} + * {'': 1, '': 'a.b'} + * {'': 1, '': 'a.b.1.c.2.d'} + * Keys: + * {'': 'a.b.c', '': 1} // _id: 1, a,b multikey + * {'': 'a.b.c', '': 0} // _id: 2, a,b multikey + * {'': 'a.b.c', '': 1} // _id: 2, a,b multikey + * {'': 'a.0.b.1.c', '': 1} // _id: 3, '0, 1' are fieldnames, a.0 multikey + * {'': 'a.0.d', '': 1} // _id: 3, '0' is fieldname, a.0 multikey + * {'': 'a.b.1.c', '': 1} // _id: 4, '1' is fieldname, a,b multikey + * {'': 'a.b.1.c.2.d', '': 0} // _id: 5, a,b,a.b.1.c.2.d multikey, '1' is fieldname + * {'': 'a.b.1.c.2.d', '': 1} // _id: 5 + * {'': 'a.b.1.c.2.d', '': 2} // _id: 5 + * {'': 'a.b.1.c.2.d', '': 3} // _id: 5 + * {'': 'a.b.1.c.2.d.e', '': 1} // _id: 5 + */ + + // Test that a query with multiple numeric path components returns all relevant documents, + // whether the numeric path component refers to a fieldname or array index in each doc: + // + // _id:1 will be captured by the special fieldname-or-array-index bounds 'a.b.c', but will + // be filtered out by the INEXACT_FETCH since it has no array index or fieldname 'b.1'. + // _id:2 will match both 'a.0' and 'b.1' by array index. + // _id:3 will match both 'a.0' and 'b.1' by fieldname. + // _id:4 will match 'a.0' by array index and 'b.1' by fieldname. + // _id:5 is not captured by the special fieldname-or-array-index bounds. + // + // We examine the solution's 'nReturned' versus 'totalDocsExamined' to confirm this. + // totalDocsExamined: [_id:1, _id:2, _id:3, _id:4], nReturned: [_id:2, _id:3, _id:4] + assertAllPathsQuery({'a.0.b.1.c': 1}, + 'a.0.b.1.c', + {'executionStats.nReturned': 3, 'executionStats.totalDocsExamined': 4}); + + // Test that we can query a specific field of an array whose fieldname is itself numeric. + assertAllPathsQuery({'a.0.1.d': 1}, + 'a.0.1.d', + {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1}); + + // Test that we can query a primitive value at a specific array index. + assertAllPathsQuery({'a.0.b.1.c.2.d.3': 3}, + 'a.0.b.1.c.2.d.3', + {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1}); + + // Test that a $** index can't be used for a query through more than 8 nested array indices. + assert.commandWorked( + coll.insert({_id: 6, a: [{b: [{c: [{d: [{e: [{f: [{g: [{h: [{i: [1]}]}]}]}]}]}]}]}]})); + // We can query up to a depth of 8 arrays via specific indices, but not through 9 or more. + assertAllPathsQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i': 1}, + 'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i'); + assertAllPathsQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i.0': 1}, null); + + // Test that fieldname-or-array-index queries do not inappropriately trim predicates; that + // is, all predicates on the field are added to a FETCH filter above the IXSCAN. + assert(coll.drop()); + assert.commandWorked(coll.createIndex({"$**": 1})); + + assert.commandWorked(coll.insert({_id: 1, a: [0, 1, 2]})); + assert.commandWorked(coll.insert({_id: 2, a: [1, 2, 3]})); + assert.commandWorked(coll.insert({_id: 3, a: [2, 3, 4], b: [5, 6, 7]})); + assert.commandWorked(coll.insert({_id: 4, a: [3, 4, 5], b: [6, 7, 8], c: {'0': 9}})); + assert.commandWorked(coll.insert({_id: 5, a: [4, 5, 6], b: [7, 8, 9], c: {'0': 10}})); + assert.commandWorked(coll.insert({_id: 6, a: [5, 6, 7], b: [8, 9, 10], c: {'0': 11}})); + + assertAllPathsQuery({"a.0": {$gt: 1, $lt: 4}}, 'a.0', {'executionStats.nReturned': 2}); + assertAllPathsQuery({"a.1": {$gte: 1, $lte: 4}}, 'a.1', {'executionStats.nReturned': 4}); + assertAllPathsQuery({"b.2": {$in: [5, 9]}}, 'b.2', {'executionStats.nReturned': 1}); + assertAllPathsQuery({"c.0": {$in: [10, 11]}}, 'c.0', {'executionStats.nReturned': 2}); + + // Test that the $** index doesn't trim predicates when planning across multiple nested + // $and/$or expressions on various fieldname-or-array-index paths. + const trimTestQuery = { + $or: [ + {"a.0": {$gte: 0, $lt: 3}, "a.1": {$in: [2, 3, 4]}}, + {"b.1": {$gt: 6, $lte: 9}, "c.0": {$gt: 9, $lt: 12}} + ] + }; + const trimTestExplain = coll.find(trimTestQuery).explain("executionStats"); + // Verify that the expected number of documents were matched, and the $** index was used. + // Matched documents: [_id:2, _id:3, _id:5, _id:6] + assert.eq(trimTestExplain.executionStats.nReturned, 4); + const trimTestIxScans = getPlanStages(trimTestExplain.queryPlanner.winningPlan, "IXSCAN"); + for (let ixScan of trimTestIxScans) { + assert.eq(ixScan.keyPattern["$_path"], 1); + } + // Finally, confirm that a collection scan produces the same results. + assertArrayEq(coll.find(trimTestQuery).toArray(), + coll.find(trimTestQuery).hint({$natural: 1}).toArray()); } finally { // Disable $** indexes once the tests have either completed or failed. assert.commandWorked( diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp index eed5b2843e3..38adae1cf13 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.cpp +++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp @@ -102,7 +102,7 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) { // If a subtree was specified in the keyPattern, or if an inclusion projection is // present, then we need only index the path(s) preserved by the projection. for (const auto& path : pathProj->getExhaustivePaths()) { - _indexedPaths.addPath(path); + _indexedPaths.addPath(path.dottedField()); } } } else if (descriptor->getAccessMethodName() == IndexNames::TEXT) { diff --git a/src/mongo/db/exec/projection_exec_agg.cpp b/src/mongo/db/exec/projection_exec_agg.cpp index 7c292ac4a81..ff2d2d062cb 100644 --- a/src/mongo/db/exec/projection_exec_agg.cpp +++ b/src/mongo/db/exec/projection_exec_agg.cpp @@ -71,12 +71,18 @@ public: _projection = ParsedAggregationProjection::create( expCtx, projSpec, idPolicy, recursionPolicy, ProjectionParseMode::kBanComputedFields); + + // For an inclusion, record the exhaustive set of fields retained by the projection. + if (getType() == ProjectionType::kInclusionProjection) { + DepsTracker depsTracker; + _projection->addDependencies(&depsTracker); + for (auto&& field : depsTracker.fields) + _exhaustivePaths.insert(FieldRef{field}); + } } - std::set<std::string> getExhaustivePaths() const { - DepsTracker depsTracker; - _projection->addDependencies(&depsTracker); - return depsTracker.fields; + const std::set<FieldRef>& getExhaustivePaths() const { + return _exhaustivePaths; } ProjectionType getType() const { @@ -116,6 +122,7 @@ private: } std::unique_ptr<ParsedAggregationProjection> _projection; + std::set<FieldRef> _exhaustivePaths; }; // ProjectionExecAgg's constructor and destructor are defined here, at a point where the @@ -151,7 +158,7 @@ stdx::unordered_set<std::string> ProjectionExecAgg::applyProjectionToFields( return _exec->applyProjectionToFields(fields); } -std::set<std::string> ProjectionExecAgg::getExhaustivePaths() const { +const std::set<FieldRef>& ProjectionExecAgg::getExhaustivePaths() const { return _exec->getExhaustivePaths(); } } // namespace mongo diff --git a/src/mongo/db/exec/projection_exec_agg.h b/src/mongo/db/exec/projection_exec_agg.h index 90ad2b7ef55..9b3e9884397 100644 --- a/src/mongo/db/exec/projection_exec_agg.h +++ b/src/mongo/db/exec/projection_exec_agg.h @@ -31,6 +31,7 @@ #include <memory> #include "mongo/bson/bsonobj.h" +#include "mongo/db/field_ref.h" namespace mongo { @@ -78,7 +79,7 @@ public: * empty set if the exhaustive set cannot be determined. An inclusion will always produce an * exhaustive set; an exclusion will always produce an empty set. */ - std::set<std::string> getExhaustivePaths() const; + const std::set<FieldRef>& getExhaustivePaths() const; ProjectionType getType() const; diff --git a/src/mongo/db/exec/projection_exec_agg_test.cpp b/src/mongo/db/exec/projection_exec_agg_test.cpp index 5105677f3f0..1aaa0668954 100644 --- a/src/mongo/db/exec/projection_exec_agg_test.cpp +++ b/src/mongo/db/exec/projection_exec_agg_test.cpp @@ -61,6 +61,15 @@ std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdExclusionAndNested projSpec, DefaultIdPolicy::kExcludeId, ArrayRecursionPolicy::kRecurseNestedArrays); } +std::set<FieldRef> toFieldRefs(const std::set<std::string>& stringPaths) { + std::set<FieldRef> fieldRefs; + std::transform(stringPaths.begin(), + stringPaths.end(), + std::inserter(fieldRefs, fieldRefs.begin()), + [](const auto& path) { return FieldRef(path); }); + return fieldRefs; +} + // // Error cases. // @@ -190,7 +199,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdInclusion) { // Extract the exhaustive set of paths that will be preserved by the projection. auto exhaustivePaths = parsedProject->getExhaustivePaths(); - std::set<std::string> expectedPaths{"_id", "a.b.c", "d"}; + std::set<FieldRef> expectedPaths = toFieldRefs({"_id", "a.b.c", "d"}); // Verify that the exhaustive set of paths is as expected. ASSERT(exhaustivePaths == expectedPaths); @@ -203,7 +212,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusion) { // Extract the exhaustive set of paths that will be preserved by the projection. auto exhaustivePaths = parsedProject->getExhaustivePaths(); - std::set<std::string> expectedPaths{"_id", "a.b.c", "d"}; + std::set<FieldRef> expectedPaths = toFieldRefs({"_id", "a.b.c", "d"}); // Verify that the exhaustive set of paths is as expected. ASSERT(exhaustivePaths == expectedPaths); @@ -216,7 +225,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusionIdOnly) { // Extract the exhaustive set of paths that will be preserved by the projection. auto exhaustivePaths = parsedProject->getExhaustivePaths(); - std::set<std::string> expectedPaths{"_id"}; + std::set<FieldRef> expectedPaths = toFieldRefs({"_id"}); // Verify that the exhaustive set of paths is as expected. ASSERT(exhaustivePaths == expectedPaths); @@ -229,7 +238,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdExclusion) { // Extract the exhaustive set of paths that will be preserved by the projection. auto exhaustivePaths = parsedProject->getExhaustivePaths(); - std::set<std::string> expectedPaths{"a.b.c", "d"}; + std::set<FieldRef> expectedPaths = toFieldRefs({"a.b.c", "d"}); // Verify that the exhaustive set of paths is as expected. ASSERT(exhaustivePaths == expectedPaths); @@ -242,7 +251,7 @@ TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdExclusion) { // Extract the exhaustive set of paths that will be preserved by the projection. auto exhaustivePaths = parsedProject->getExhaustivePaths(); - std::set<std::string> expectedPaths{"a.b.c", "d"}; + std::set<FieldRef> expectedPaths = toFieldRefs({"a.b.c", "d"}); // Verify that the exhaustive set of paths is as expected. ASSERT(exhaustivePaths == expectedPaths); diff --git a/src/mongo/db/field_ref.cpp b/src/mongo/db/field_ref.cpp index 2e905090307..1bce17b20e7 100644 --- a/src/mongo/db/field_ref.cpp +++ b/src/mongo/db/field_ref.cpp @@ -26,9 +26,12 @@ * it in the license file. */ +#include "mongo/platform/basic.h" + #include "mongo/db/field_ref.h" -#include <algorithm> // for min +#include <algorithm> +#include <cctype> #include "mongo/util/assert_util.h" @@ -226,6 +229,10 @@ bool FieldRef::isPrefixOf(const FieldRef& other) const { return common == _size && other._size > common; } +bool FieldRef::isPrefixOfOrEqualTo(const FieldRef& other) const { + return isPrefixOf(other) || *this == other; +} + size_t FieldRef::commonPrefixSize(const FieldRef& other) const { if (_size == 0 || other._size == 0) { return 0; @@ -244,6 +251,32 @@ size_t FieldRef::commonPrefixSize(const FieldRef& other) const { return prefixSize; } +bool FieldRef::isNumericPathComponent(StringData component) { + return !component.empty() && !(component.size() > 1 && component[0] == '0') && + std::all_of(component.begin(), component.end(), [](auto c) { return std::isdigit(c); }); +} + +bool FieldRef::isNumericPathComponent(size_t i) const { + return FieldRef::isNumericPathComponent(getPart(i)); +} + +bool FieldRef::hasNumericPathComponents() const { + for (size_t i = 0; i < numParts(); ++i) { + if (isNumericPathComponent(i)) + return true; + } + return false; +} + +std::set<size_t> FieldRef::getNumericPathComponents(size_t startPart) const { + std::set<size_t> numericPathComponents; + for (auto i = startPart; i < numParts(); ++i) { + if (isNumericPathComponent(i)) + numericPathComponents.insert(i); + } + return numericPathComponents; +} + StringData FieldRef::dottedField(size_t offset) const { return dottedSubstring(offset, numParts()); } diff --git a/src/mongo/db/field_ref.h b/src/mongo/db/field_ref.h index ad354a0f39c..96368b8f547 100644 --- a/src/mongo/db/field_ref.h +++ b/src/mongo/db/field_ref.h @@ -30,6 +30,7 @@ #include <boost/optional.hpp> #include <iosfwd> +#include <set> #include <string> #include <vector> @@ -50,6 +51,12 @@ namespace mongo { */ class FieldRef { public: + /** + * Returns true if the argument is a numeric string which is eligible to act as the key name for + * an element in a BSON array; in other words, the string matches the regex ^(0|[1-9]+[0-9]*)$. + */ + static bool isNumericPathComponent(StringData component); + FieldRef() = default; explicit FieldRef(StringData path); @@ -94,11 +101,33 @@ public: bool isPrefixOf(const FieldRef& other) const; /** + * Returns true if 'this' FieldRef is a prefix of 'other', or if both paths are identical. + */ + bool isPrefixOfOrEqualTo(const FieldRef& other) const; + + /** * Returns the number of field parts in the prefix that 'this' and 'other' share. */ size_t commonPrefixSize(const FieldRef& other) const; /** + * Returns true if the specified path component is a numeric string which is eligible to act as + * the key name for an element in a BSON array; in other words, the fieldname matches the regex + * ^(0|[1-9]+[0-9]*)$. + */ + bool isNumericPathComponent(size_t i) const; + + /** + * Returns true if this FieldRef has any numeric path components. + */ + bool hasNumericPathComponents() const; + + /** + * Returns the positions of all numeric path components, starting from the given position. + */ + std::set<size_t> getNumericPathComponents(size_t startPart = 0) const; + + /** * Returns a StringData of the full dotted field in its current state (i.e., some parts may * have been replaced since the parse() call). */ diff --git a/src/mongo/db/field_ref_test.cpp b/src/mongo/db/field_ref_test.cpp index 654444bc472..616b8a4d5cc 100644 --- a/src/mongo/db/field_ref_test.cpp +++ b/src/mongo/db/field_ref_test.cpp @@ -166,25 +166,39 @@ TEST(Prefix, Normal) { prefix.parse("a.b"); ASSERT_TRUE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); prefix.parse("a"); ASSERT_TRUE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); + + prefix.parse("a.b.c"); + ASSERT_FALSE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); } TEST(Prefix, Dotted) { FieldRef prefix("a.0"), base("a.0.c"); ASSERT_TRUE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); + + prefix.parse("a.0.c"); + ASSERT_FALSE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); } TEST(Prefix, NoPrefixes) { FieldRef prefix("a.b"), base("a.b"); ASSERT_FALSE(prefix.isPrefixOf(base)); + ASSERT_TRUE(prefix.isPrefixOfOrEqualTo(base)); base.parse("a"); ASSERT_FALSE(prefix.isPrefixOf(base)); + ASSERT_FALSE(prefix.isPrefixOfOrEqualTo(base)); base.parse("b"); ASSERT_FALSE(prefix.isPrefixOf(base)); + ASSERT_FALSE(prefix.isPrefixOfOrEqualTo(base)); } TEST(Prefix, EmptyBase) { @@ -837,4 +851,27 @@ TEST(RemoveLastPartLong, FieldRefFromCopyAssignmentIsADeepCopy) { ASSERT_EQ(other, FieldRef("a.b.c")); } +TEST(NumericPathComponents, CanIdentifyNumericPathComponents) { + FieldRef path("a.0.b.1.c"); + ASSERT(!path.isNumericPathComponent(0)); + ASSERT(path.isNumericPathComponent(1)); + ASSERT(!path.isNumericPathComponent(2)); + ASSERT(path.isNumericPathComponent(3)); + ASSERT(!path.isNumericPathComponent(4)); +} + +TEST(NumericPathComponents, CanObtainAllNumericPathComponents) { + FieldRef path("a.0.b.1.c.2.d"); + std::set<size_t> expectedComponents{size_t(1), size_t(3), size_t(5)}; + auto numericPathComponents = path.getNumericPathComponents(); + ASSERT(numericPathComponents == expectedComponents); +} + +TEST(NumericPathComponents, FieldsWithLeadingZeroesAreNotConsideredNumeric) { + FieldRef path("a.0.b.01.c.2.d"); + std::set<size_t> expectedComponents{size_t(1), size_t(5)}; + auto numericPathComponents = path.getNumericPathComponents(); + ASSERT(numericPathComponents == expectedComponents); +} + } // namespace diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 4975a56e8d4..3ce7ee94e67 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -24,6 +24,7 @@ env.Library( "plan_cache_indexability.cpp", "plan_enumerator.cpp", "planner_access.cpp", + "planner_allpaths_helpers.cpp", "planner_analysis.cpp", "planner_ixselect.cpp", "query_planner.cpp", diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 026efa45ba8..13d45890e72 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -45,6 +45,7 @@ #include "mongo/db/query/expression_index.h" #include "mongo/db/query/expression_index_knobs.h" #include "mongo/db/query/indexability.h" +#include "mongo/db/query/planner_allpaths_helpers.h" #include "mongo/db/query/planner_ixselect.h" #include "mongo/db/query/query_knobs.h" #include "mongo/util/log.h" @@ -56,6 +57,8 @@ namespace mongo { namespace { +namespace app = ::mongo::all_paths_planning; + // Helper for checking that an OIL "appears" to be ascending given one interval. void assertOILIsAscendingLocally(const vector<Interval>& intervals, size_t idx) { // Each individual interval being examined should be ascending or none. @@ -335,6 +338,22 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, const IndexEntry& index, OrderedIntervalList* oilOut, BoundsTightness* tightnessOut) { + // Fill out the bounds and tightness appropriate for the given predicate. + _translatePredicate(expr, elt, index, oilOut, tightnessOut); + + // Under certain circumstances, queries on a $** index require that the bounds' tightness be + // adjusted regardless of the predicate. Having filled out the initial bounds, we apply any + // necessary changes to the tightness here. + if (index.type == IndexType::INDEX_ALLPATHS) { + *tightnessOut = app::applyAllPathsIndexScanBoundsTightness(index, *tightnessOut); + } +} + +void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, + const BSONElement& elt, + const IndexEntry& index, + OrderedIntervalList* oilOut, + BoundsTightness* tightnessOut) { // We expect that the OIL we are constructing starts out empty. invariant(oilOut->intervals.empty()); @@ -352,12 +371,12 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, if (MatchExpression::ELEM_MATCH_VALUE == expr->matchType()) { OrderedIntervalList acc; - translate(expr->getChild(0), elt, index, &acc, tightnessOut); + _translatePredicate(expr->getChild(0), elt, index, &acc, tightnessOut); for (size_t i = 1; i < expr->numChildren(); ++i) { OrderedIntervalList next; BoundsTightness tightness; - translate(expr->getChild(i), elt, index, &next, &tightness); + _translatePredicate(expr->getChild(i), elt, index, &next, &tightness); intersectize(next, &acc); } @@ -395,7 +414,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, return; } - translate(child, elt, index, oilOut, tightnessOut); + _translatePredicate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); // Until the index distinguishes between missing values and literal null values, we cannot diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 9e58584e698..92af526dd59 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -201,6 +201,16 @@ public: bool* startKeyInclusive, BSONObj* endKey, bool* endKeyInclusive); + +private: + /** + * Performs the heavy lifting for IndexBoundsBuilder::translate(). + */ + static void _translatePredicate(const MatchExpression* expr, + const BSONElement& elt, + const IndexEntry& index, + OrderedIntervalList* oilOut, + BoundsTightness* tightnessOut); }; } // namespace mongo diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 1193aef1772..2be774dafcf 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -45,6 +45,7 @@ #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/index_tag.h" #include "mongo/db/query/indexability.h" +#include "mongo/db/query/planner_allpaths_helpers.h" #include "mongo/db/query/query_knobs.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" @@ -56,6 +57,7 @@ namespace { using namespace mongo; +namespace app = ::mongo::all_paths_planning; namespace dps = ::mongo::dotted_path_support; /** @@ -528,6 +530,81 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr tn->indexPrefix = prefixBob.obj(); } +void QueryPlannerAccess::finishAllPathsIndexScanNode(QuerySolutionNode* node, + const IndexEntry& plannerIndex) { + // We should only ever reach this point if we are an IndexScanNode for a $** index. + invariant(plannerIndex.type == IndexType::INDEX_ALLPATHS); + invariant(node && node->getType() == STAGE_IXSCAN); + + // Sanity check the QuerySolutionNode's copy of the IndexEntry. + IndexScanNode* scan = static_cast<IndexScanNode*>(node); + invariant(scan->index.multikeyPaths.size() == 1); + invariant(scan->index == plannerIndex); + + // Obtain some references to make the remainder of this function more legible. + auto& bounds = scan->bounds; + auto& index = scan->index; + + // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in + // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first + // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the + // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's + // keyPattern and its multikeyPaths vector. + invariant(plannerIndex.keyPattern.nFields() == 1); + invariant(bounds.fields.size() == 1); + invariant(!bounds.fields.front().name.empty()); + index.multikeyPaths.insert(index.multikeyPaths.begin(), std::set<std::size_t>{}); + bounds.fields.insert(bounds.fields.begin(), {"$_path"}); + index.keyPattern = + BSON("$_path" << index.keyPattern.firstElement() << index.keyPattern.firstElement()); + + // Create a FieldRef to perform any necessary manipulations on the query path string. + FieldRef queryPath{plannerIndex.keyPattern.firstElementFieldName()}; + auto& multikeyPaths = index.multikeyPaths.back(); + + // If the bounds are [MinKey,MaxKey] then we must retrieve all documents which include the given + // path. We must therefore add bounds that encompass all its subpaths, specifically the interval + // ["path.","path/") on "$_path". + const bool isMinMaxInclusive = !bounds.fields.back().intervals.empty() && + bounds.fields.back().intervals.front().isMinToMaxInclusive(); + + // Helper function to check whether the final path component in 'queryPath' is an array index. + const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) { + return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) && + queryPath.isNumericPathComponent(queryPath.numParts() - 1u)); + }; + + // For [MinKey,MaxKey] bounds, we build a range interval on all subpaths of the query path(s). + // We must therefore trim any trailing array indices from the query path before generating the + // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For + // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey, + // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']]. + // The latter two are subsets of the ['a.', 'a/'] interval. + while (isMinMaxInclusive && lastFieldIsArrayIndex(queryPath)) { + queryPath.removeLastPart(); + } + + // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array + // indices in their keys, so if this query traverses one or more multikey fields via an array + // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array- + // and non-array permutations of the path in order to produce INEXACT_FETCH bounds. + auto paths = app::generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath); + + // Add a $_path point-interval for each path that needs to be traversed in the index. If the + // bounds on these paths are MinKey-MaxKey, then for each applicable path we must add a range + // interval on all its subpaths, i.e. ["path.","path/"). + static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1); + auto& pathIntervals = bounds.fields.front().intervals; + for (const auto& fieldPath : paths) { + auto path = fieldPath.dottedField().toString(); + pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path)); + if (isMinMaxInclusive) { + pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval( + path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly)); + } + } +} + bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) { return false; @@ -577,19 +654,20 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr const StageType type = node->getType(); if (STAGE_TEXT == type) { - finishTextNode(node, index); - return; + return finishTextNode(node, index); } IndexEntry* nodeIndex = nullptr; - IndexBounds* bounds = NULL; + IndexBounds* bounds = nullptr; if (STAGE_GEO_NEAR_2D == type) { GeoNear2DNode* gnode = static_cast<GeoNear2DNode*>(node); bounds = &gnode->baseBounds; + nodeIndex = &gnode->index; } else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gnode = static_cast<GeoNear2DSphereNode*>(node); bounds = &gnode->baseBounds; + nodeIndex = &gnode->index; } else { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast<IndexScanNode*>(node); @@ -597,36 +675,11 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr bounds = &scan->bounds; } - // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in - // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first - // field in each key is 'path.to.field'. We push a point-interval on 'path.to.field' into the - // bounds vector for the leading '$_path' bound here. We also push corresponding fields into the - // IndexScanNode's keyPattern and its multikeyPaths vector. + // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths. if (index.type == IndexType::INDEX_ALLPATHS) { - invariant(bounds->fields.size() == 1 && index.keyPattern.nFields() == 1); - invariant(!bounds->fields.front().name.empty()); - invariant(nodeIndex); - bounds->fields.insert(bounds->fields.begin(), {"$_path"}); - bounds->fields.front().intervals.push_back( - IndexBoundsBuilder::makePointInterval(nodeIndex->keyPattern.firstElementFieldName())); - // If the bounds are [MinKey, MaxKey] then we're querying for any values in the given - // path. Therefore we must add bounds that allow subpaths (specifically the bound - // ["path.","path/") on "$_path"). - const auto& intervals = bounds->fields.back().intervals; - if (!intervals.empty() && intervals.front().isMinToMaxInclusive()) { - bounds->fields.front().intervals.push_back(IndexBoundsBuilder::makeRangeInterval( - bounds->fields.back().name + '.', - bounds->fields.back().name + static_cast<char>('.' + 1), - BoundInclusion::kIncludeStartKeyOnly)); - } - nodeIndex->keyPattern = BSON("$_path" << nodeIndex->keyPattern.firstElement() - << nodeIndex->keyPattern.firstElement()); - nodeIndex->multikeyPaths.insert(nodeIndex->multikeyPaths.begin(), std::set<std::size_t>{}); + finishAllPathsIndexScanNode(node, index); } - // If the QuerySolutionNode's IndexEntry is available, use its keyPattern. - const auto& keyPattern = (nodeIndex ? nodeIndex->keyPattern : index.keyPattern); - // Find the first field in the scan's bounds that was not filled out. // TODO: could cache this. size_t firstEmptyField = 0; @@ -639,12 +692,11 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr // All fields are filled out with bounds, nothing to do. if (firstEmptyField == bounds->fields.size()) { - IndexBoundsBuilder::alignBounds(bounds, keyPattern); - return; + return IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } // Skip ahead to the firstEmptyField-th element, where we begin filling in bounds. - BSONObjIterator it(keyPattern); + BSONObjIterator it(nodeIndex->keyPattern); for (size_t i = 0; i < firstEmptyField; ++i) { verify(it.more()); it.next(); @@ -667,7 +719,7 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr // We create bounds assuming a forward direction but can easily reverse bounds to align // according to our desired direction. - IndexBoundsBuilder::alignBounds(bounds, keyPattern); + IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node, diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index e2097f2ab62..cf559315f2b 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -388,6 +388,8 @@ private: static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index); + static void finishAllPathsIndexScanNode(QuerySolutionNode* node, const IndexEntry& index); + /** * Add the filter 'match' to the query solution node 'node'. Takes * ownership of 'match'. diff --git a/src/mongo/db/query/planner_allpaths_helpers.cpp b/src/mongo/db/query/planner_allpaths_helpers.cpp new file mode 100644 index 00000000000..b56de29e577 --- /dev/null +++ b/src/mongo/db/query/planner_allpaths_helpers.cpp @@ -0,0 +1,249 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/planner_allpaths_helpers.h" + +#include <vector> + +#include "mongo/bson/util/builder.h" +#include "mongo/util/log.h" + +namespace mongo { +namespace all_paths_planning { +namespace { +/** + * Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array + * indices present in the former if they are not present in the latter. The 'multikeyPathComponents' + * set contains the path positions that are known to be arrays; only numerical path components that + * immediately follow an array field are considered array indices. If 'fieldNameOrArrayIndexPath' is + * 'a.0.b', it will match 'staticComparisonPath' 'a.0.b', and it will also match 'a.b' but only if + * 'multikeyPathComponents' indicates that 'a' is an array. + */ +bool fieldNameOrArrayIndexPathMatches(const FieldRef& fieldNameOrArrayIndexPath, + const FieldRef& staticComparisonPath, + const std::set<size_t>& multikeyPathComponents) { + // Can't be equal if 'staticComparisonPath' has more parts than 'fieldNameOrArrayIndexPath'. + if (staticComparisonPath.numParts() > fieldNameOrArrayIndexPath.numParts()) { + return false; + } + size_t offset = 0; + for (size_t i = 0; i < fieldNameOrArrayIndexPath.numParts(); ++i) { + if (i - offset >= staticComparisonPath.numParts()) { + return false; + } + if (fieldNameOrArrayIndexPath.getPart(i) == staticComparisonPath.getPart(i - offset)) { + continue; + } else if (multikeyPathComponents.count(i - 1) && + fieldNameOrArrayIndexPath.isNumericPathComponent(i)) { + ++offset; + continue; + } + return false; + } + // Ensure that we matched the entire 'staticComparisonPath' dotted string. + return fieldNameOrArrayIndexPath.numParts() == staticComparisonPath.numParts() + offset; +} + +/** + * Returns true if 'multikeyPathSet' contains a FieldRef that matches 'pathToLookup' exactly, or + * matches 'pathToLookup' when the latter's array indices are ignored. + */ +bool fieldNameOrArrayIndexPathSetContains(const std::set<FieldRef>& multikeyPathSet, + const std::set<std::size_t>& multikeyPathComponents, + const FieldRef& pathToLookup) { + // Fast-path check for an exact match. If there is no exact match and 'pathToLookup' has no + // numeric path components, then 'multikeyPathSet' does not contain the path. + if (multikeyPathSet.count(pathToLookup)) { + return true; + } else if (!pathToLookup.hasNumericPathComponents()) { + return false; + } + // Determine whether any of the 'multikeyPathSet' entries match 'pathToLookup' under relaxed + // fieldname-or-array-index constraints. + return std::any_of( + multikeyPathSet.begin(), multikeyPathSet.end(), [&](const auto& multikeyPath) { + return fieldNameOrArrayIndexPathMatches( + pathToLookup, multikeyPath, multikeyPathComponents); + }); +} + +/** + * Returns the positions of all path components in 'queryPath' that may be interpreted as array + * indices by the query system. We obtain this list by finding all multikey path components that + * have a numerical path component immediately after. + */ +std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& multikeyPaths, + const FieldRef& queryPath) { + std::vector<size_t> arrayIndices; + for (auto i : multikeyPaths) { + if (i < queryPath.numParts() - 1 && queryPath.isNumericPathComponent(i + 1)) { + arrayIndices.push_back(i + 1); + } + } + return arrayIndices; +} + +/** + * Returns an std::string of the full dotted field, minus the parts listed in 'skipParts'. + */ +FieldRef pathWithoutSpecifiedComponents(const FieldRef& path, + const std::set<size_t>& skipComponents) { + // If 'skipComponents' is empty, we return 'path' immediately. + if (skipComponents.empty()) { + return path; + } + StringBuilder ss; + size_t startPart = 0; + for (const auto& skipPart : skipComponents) { + ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "") + << path.dottedSubstring(startPart, skipPart); + startPart = skipPart + 1; + } + if (startPart < path.numParts()) { + ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "") + << path.dottedSubstring(startPart, path.numParts()); + } + return FieldRef{ss.str()}; +} +} // namespace + +MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry( + const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) { + FieldRef pathToLookup; + std::set<std::size_t> multikeyPaths; + for (size_t i = 0; i < indexedPath.numParts(); ++i) { + pathToLookup.appendPart(indexedPath.getPart(i)); + if (fieldNameOrArrayIndexPathSetContains(multikeyPathSet, multikeyPaths, pathToLookup)) { + multikeyPaths.insert(i); + } + } + return {multikeyPaths}; +} + +std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths, + const FieldRef& queryPath) { + // We iterate over the power set of array index positions to generate all necessary paths. + // The algorithm is unavoidably O(n2^n), but we enforce that 'n' is never more than single + // digits during the planner's index selection phase. + const auto potentialArrayIndices = findArrayIndexPathComponents(multikeyPaths, queryPath); + invariant(potentialArrayIndices.size() <= kAllPathsMaxArrayIndexTraversalDepth); + invariant(potentialArrayIndices.size() < sizeof(size_t) * 8u); + // We iterate over every value [0..2^n), where 'n' is the size of 'potentialArrayIndices', + // treating each value as a 'bitMask' of 'n' bits. Each bit in 'bitMask' represents the + // entry at the equivalent position in the 'potentialArrayIndices' vector. When a given bit + // is set, we treat the corresponding numeric path component as an array index, and generate + // a path that omits it. When a bit is not set, we treat the numeric path component as a + // literal fieldname, and we generate a path that includes it. Because we iterate over every + // value [0..2^n), we ensure that we generate every combination of 'n' bits, and therefore + // every possible fieldname and array index path. + std::set<FieldRef> paths; + for (size_t bitMask = 0; bitMask < (size_t{1} << potentialArrayIndices.size()); ++bitMask) { + std::set<size_t> arrayIndicesToSkip; + for (size_t i = 0; i < potentialArrayIndices.size(); ++i) { + if (bitMask & (size_t{1} << i)) { + arrayIndicesToSkip.insert(potentialArrayIndices[i]); + } + } + paths.insert(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip)); + } + return paths; +} + +BoundsTightness applyAllPathsIndexScanBoundsTightness(const IndexEntry& index, + BoundsTightness tightnessIn) { + // This method should only ever be called for a $** IndexEntry. We expect to be called during + // planning, *before* finishAllPathsIndexScanNode has been invoked. The IndexEntry should thus + // only have a single keyPattern field and multikeyPath entry, but this is sufficient to + // determine whether it will be necessary to adjust the tightness. + invariant(index.type == IndexType::INDEX_ALLPATHS); + invariant(index.keyPattern.nFields() == 1); + invariant(index.multikeyPaths.size() == 1); + + // If the tightness is already INEXACT_FETCH, any further changes are redundant. + if (tightnessIn == BoundsTightness::INEXACT_FETCH) { + return tightnessIn; + } + + // If the query passes through any array indices, we must always fetch and filter the documents. + const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents( + index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()}); + + // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH. + return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH); +} + +bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, + const std::set<FieldRef>& includedPaths, + const FieldRef& queryPath) { + // $** multikeyPaths always have a singleton set, since they are single-element indexes. + invariant(multikeyPaths.size() == 1); + + // Find the positions of all multikey path components in 'queryPath' that have a numerical path + // component immediately after. For a queryPath of 'a.2.b' this will return position 0; that is, + // 'a'. If no such multikey path was found, we are clear to proceed with planning. + const auto arrayIndices = findArrayIndexPathComponents(multikeyPaths.front(), queryPath); + if (arrayIndices.empty()) { + return true; + } + // To support $** fieldname-or-array-index semantics, the planner must generate the power set of + // all paths with and without array indices. Because this is O(2^n), we decline to answer + // queries that traverse more than 8 levels of array indices. + if (arrayIndices.size() > kAllPathsMaxArrayIndexTraversalDepth) { + LOG(2) << "Declining to answer query on field '" << queryPath.dottedField() + << "' with $** index, as it traverses through more than " + << kAllPathsMaxArrayIndexTraversalDepth << " nested array indices."; + return false; + } + // If 'includedPaths' is empty, then either the $** projection is an exclusion, or no explicit + // projection was provided. In either case, it is not possible for the query path to lie along + // an array index projection, and so we are safe to proceed with planning. + if (includedPaths.empty()) { + return true; + } + // Find the $** projected field which prefixes or is equal to the query path. If 'includedPaths' + // is non-empty then we are guaranteed that exactly one entry will prefix the query path, since + // (a) if no such inclusion exists, an IndexEntry would not have been created for this path, and + // (b) conflicting paths, such as 'a.b' and 'a.b.c', are not permitted in projections. + auto includePath = std::find_if( + includedPaths.begin(), includedPaths.end(), [&queryPath](const auto& includedPath) { + return includedPath.isPrefixOfOrEqualTo(queryPath); + }); + invariant(std::next(includePath) == includedPaths.end() || *std::next(includePath) > queryPath); + + // If the projectedPath responsible for including this queryPath prefixes it up to and including + // the numerical array index field, then the queryPath lies along a projection through the array + // index, and we cannot support the query for the reasons outlined above. + return arrayIndices[0] >= includePath->numParts(); +} + +} // namespace all_paths_planning +} // namespace mongo diff --git a/src/mongo/db/query/planner_allpaths_helpers.h b/src/mongo/db/query/planner_allpaths_helpers.h new file mode 100644 index 00000000000..4bbb333dead --- /dev/null +++ b/src/mongo/db/query/planner_allpaths_helpers.h @@ -0,0 +1,101 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include <set> + +#include "mongo/db/field_ref.h" +#include "mongo/db/index/multikey_paths.h" +#include "mongo/db/query/index_bounds_builder.h" +#include "mongo/db/query/query_solution.h" + +namespace mongo { +namespace all_paths_planning { + +using BoundsTightness = IndexBoundsBuilder::BoundsTightness; + +/** + * Specifies the maximum depth of nested array indices through which a query may traverse before a + * $** declines to answer it, due to the exponential complexity of the bounds required. + */ +static constexpr size_t kAllPathsMaxArrayIndexTraversalDepth = 8u; + +/** + * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by + * looking up multikeyness in 'multikeyPathSet'. + */ +MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry( + const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet); + +/** + * Given a query path and the set of known multikey path components, this function outputs the power + * set of paths generated by considering each relevant numerical path component first as a literal + * fieldname, and then as an array index. In other words, for each numerical path component that + * immediately follows a multikey field, we will output one path with the numerical component and + * one path without. If both 'a' and 'b' are multikey, then the queryPath 'a.0.b.1.c' will produce + * ['a.0.b.1.c', 'a.0.b.c', 'a.b.1.c', 'a.b.c']. + */ +std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths, + const FieldRef& queryPath); + +/** + * In certain circumstances, it is necessary to adjust the tightness of the bounds generated by the + * planner for $** indexes. For instance, if the query traverses through one or more arrays via + * specific indices, then we must enforce INEXACT_FETCH to ensure correctness, regardless of the + * predicate. Given an IndexEntry representing an expanded $** index, we apply any necessary changes + * to the tightness here. + */ +BoundsTightness applyAllPathsIndexScanBoundsTightness(const IndexEntry& index, + BoundsTightness tightnessIn); + +/** + * Returns false if 'queryPath' includes any numerical path components which render it unanswerable + * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either + * of the following scenarios occur: + * + * - The query path traverses through more than 'kAllPathsMaxArrayIndexTraversalDepth' nested arrays + * via explicit array indices. + * - The query path lies along a $** projection through an array index. + * + * For an example of the latter case, say that our query path is 'a.0.b', our projection includes + * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or + * array index against the documents, but because the $** index always projects numeric path + * components strictly as field names, the projection {'a.0': 1} cannot correctly support this + * query. + * + * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but + * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of + * the values [1, 2, 3] for 'a'. + */ +bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, + const std::set<FieldRef>& includedPaths, + const FieldRef& queryPath); + +} // namespace all_paths_planning +} // namespace mongo diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 4ff04cd959a..8dd796ac18c 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -44,6 +44,7 @@ #include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/query/index_tag.h" #include "mongo/db/query/indexability.h" +#include "mongo/db/query/planner_allpaths_helpers.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/util/log.h" @@ -51,29 +52,14 @@ namespace mongo { namespace { +namespace app = ::mongo::all_paths_planning; + std::size_t numPathComponents(StringData path) { return FieldRef{path}.numParts(); } /** - * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by - * looking up multikeyness in 'multikeyPathSet'. - */ -MultikeyPaths buildMultiKeyPathsForExpandedAllPathsIndexEntry( - const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) { - FieldRef pathToLookup; - std::set<std::size_t> multikeyPathComponents; - for (size_t i = 0; i < indexedPath.numParts(); ++i) { - pathToLookup.appendPart(indexedPath.getPart(i)); - if (multikeyPathSet.count(pathToLookup)) { - multikeyPathComponents.insert(i); - } - } - return {multikeyPathComponents}; -} - -/** - * Given a single allPaths index, and a set of fields which are being queried, create a virtual + * Given a single allPaths index, and a set of fields which are being queried, create 'mock' * IndexEntry for each of the appropriate fields. */ void expandIndex(const IndexEntry& allPathsIndex, @@ -94,15 +80,25 @@ void expandIndex(const IndexEntry& allPathsIndex, const auto projectedFields = projExec->applyProjectionToFields(fields); + const auto& includedPaths = projExec->getExhaustivePaths(); + out->reserve(out->size() + projectedFields.size()); for (auto&& fieldName : projectedFields) { + // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks. + const auto queryPath = FieldRef{fieldName}; // $** indices hold multikey metadata directly in the index keys, rather than in the index // catalog. In turn, the index key data is used to produce a set of multikey paths // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector // which will indicate to the downstream planning code which components of 'fieldName' are // multikey. - auto multikeyPaths = buildMultiKeyPathsForExpandedAllPathsIndexEntry( - FieldRef{fieldName}, allPathsIndex.multikeyPathSet); + auto multikeyPaths = app::buildMultiKeyPathsForExpandedAllPathsIndexEntry( + queryPath, allPathsIndex.multikeyPathSet); + + // Check whether a query on the current fieldpath is answerable by the $** index, given any + // numerical path components that may be present in the path string. + if (!app::validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) { + continue; + } // The expanded IndexEntry is only considered multikey if the particular path represented by // this IndexEntry has a multikey path component. For instance, suppose we have index {$**: diff --git a/src/mongo/db/query/query_planner_all_paths_index_test.cpp b/src/mongo/db/query/query_planner_all_paths_index_test.cpp index 66b6dd8eeac..0f8aa3830e9 100644 --- a/src/mongo/db/query/query_planner_all_paths_index_test.cpp +++ b/src/mongo/db/query/query_planner_all_paths_index_test.cpp @@ -151,7 +151,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverMultikeyFieldNoElemMatch) addAllPathsIndex(BSON("$**" << 1), {"a"}); runQuery(fromjson("{a: {$gt: 0, $lt: 9}}")); - ASSERT_EQUALS(getNumSolutions(), 2U); + assertNumSolutions(2U); assertSolutionExists( "{fetch: {filter: {a: {$gt: 0}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, a: 1}," @@ -166,7 +166,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverMultikeyFieldWithElemMatc addAllPathsIndex(BSON("$**" << 1), {"a"}); runQuery(fromjson("{a: {$elemMatch: {$gt: 0, $lt: 9}}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: {a: {$elemMatch: {$gt: 0, $lt: 9}}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, a: 1}," @@ -177,7 +177,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNonMultikeyFieldWithMulti addAllPathsIndex(BSON("$**" << 1), {"b"}); runQuery(fromjson("{a: {$gt: 0, $lt: 9}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, a: 1}," "bounds: {'$_path': [['a','a',true,true]], a: [[0,9,false,false]]}}}}}"); @@ -187,7 +187,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithFirstCompo addAllPathsIndex(BSON("$**" << 1), {"a"}); runQuery(fromjson("{'a.b': {$gt: 0, $lt: 9}}")); - ASSERT_EQUALS(getNumSolutions(), 2U); + assertNumSolutions(2U); assertSolutionExists( "{fetch: {filter: {'a.b': {$gt: 0}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1}," @@ -202,7 +202,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithElemMatchO addAllPathsIndex(BSON("$**" << 1), {"a"}); runQuery(fromjson("{a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: {a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1}," @@ -214,7 +214,7 @@ TEST_F(QueryPlannerAllPathsTest, addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"}); runQuery(fromjson("{a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}")); - ASSERT_EQUALS(getNumSolutions(), 2U); + assertNumSolutions(2U); assertSolutionExists( "{fetch: {filter: {a: {$elemMatch: {b: {$gt: 0, $lt: 9}}}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1}," @@ -229,7 +229,7 @@ TEST_F(QueryPlannerAllPathsTest, MultiplePredicatesOverNestedFieldWithTwoElemMat addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"}); runQuery(fromjson("{a: {$elemMatch: {b: {$elemMatch: {$gt: 0, $lt: 9}}}}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: {a: {$elemMatch: {b: {$elemMatch: {$gt: 0, $lt: 9}}}}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b': 1}," @@ -240,7 +240,7 @@ TEST_F(QueryPlannerAllPathsTest, ElemMatchOnInnermostMultikeyPathPermitsTightBou addAllPathsIndex(BSON("$**" << 1), {"a", "a.b", "a.b.c"}); runQuery(fromjson("{'a.b.c': {$elemMatch: {'d.e.f': {$gt: 0, $lt: 9}}}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: {'a.b.c': {$elemMatch: {'d.e.f': {$gt: 0, $lt: 9}}}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.b.c.d.e.f': 1}," @@ -253,7 +253,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPredsEligibleForIndexUseGenerateCandidatePla runQuery( fromjson("{'a.b': {$gt: 0, $lt: 9}, 'a.c': {$gt: 11, $lt: 20}, d: {$gt: 31, $lt: 40}}")); - ASSERT_EQUALS(getNumSolutions(), 4U); + assertNumSolutions(4U); assertSolutionExists( "{fetch: {filter: {'a.b':{$gt:0,$lt: 9},'a.c':{$gt:11},d:{$gt:31,$lt:40}}, node: " "{ixscan: {filter: null, pattern: {'$_path': 1, 'a.c': 1}," @@ -678,7 +678,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect // Run a point query on both fields and confirm that an AND_SORTED plan is generated. runQuery(fromjson("{a:10, b:10}")); // Three plans are generated: one IXSCAN for each index, and an AND_SORTED on both. - ASSERT_EQUALS(getNumSolutions(), 3U); + assertNumSolutions(3U); assertSolutionExists( "{fetch: {filter: {a:10}, node: {ixscan: {filter: null, pattern: {b:1}}}}}"); assertSolutionExists( @@ -690,7 +690,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect // Run a range query on both fields and confirm that an AND_HASH plan is generated. runQuery(fromjson("{a:{$gt: 10}, b:{$gt: 10}}")); // Three plans are generated: one IXSCAN for each index, and an AND_HASH on both. - ASSERT_EQUALS(getNumSolutions(), 3U); + assertNumSolutions(3U); assertSolutionExists( "{fetch: {filter: {a:{$gt: 10}}, node: {ixscan: {filter: null, pattern: {b:1}}}}}"); assertSolutionExists( @@ -705,7 +705,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect // First re-run the AND_SORTED test. runQuery(fromjson("{a:10, b:10}")); // Solution count has increased from 3 to 5, as $** 'duplicates' the {a:1} and {b:1} IXSCANS. - ASSERT_EQUALS(getNumSolutions(), 5U); + assertNumSolutions(5U); assertSolutionExists( "{fetch: {filter: {a:10}, node: {ixscan: {filter: null, pattern: {b:1}}}}}"); assertSolutionExists( @@ -723,7 +723,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotParticipateInIndexIntersect // Now re-run the AND_HASH test. runQuery(fromjson("{a:{$gt: 10}, b:{$gt: 10}}")); // Solution count has increased from 3 to 5, as $** 'duplicates' the {a:1} and {b:1} IXSCANS. - ASSERT_EQUALS(getNumSolutions(), 5U); + assertNumSolutions(5U); assertSolutionExists( "{fetch: {filter: {a:{$gt:10}}, node: {ixscan: {filter: null, pattern: {b:1}}}}}"); assertSolutionExists( @@ -753,7 +753,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotSupplyCandidatePlanForTextS // Confirm that the allPaths index generates candidate plans for queries which do not include a // $text predicate. runQuery(fromjson("{a: 10, b: 10}")); - ASSERT_EQUALS(getNumSolutions(), 2U); + assertNumSolutions(2U); assertSolutionExists( "{fetch: {filter: {b: 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, a: 1}}}}}"); assertSolutionExists( @@ -762,7 +762,7 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexDoesNotSupplyCandidatePlanForTextS // Confirm that the allPaths index does not produce any candidate plans when a query includes a // $text predicate, even for non-$text predicates which may be present in the query. runQuery(fromjson("{a: 10, b: 10, $text: {$search: 'banana'}}")); - ASSERT_EQUALS(getNumSolutions(), 1U); + assertNumSolutions(1U); assertSolutionExists( "{fetch: {filter: {b: 10}, node: {text: {prefix: {a: 10}, search: 'banana'}}}}"); } @@ -1094,4 +1094,341 @@ TEST_F(QueryPlannerAllPathsTest, AllPathsIndexMustUseBlockingSortWhenFieldIsNotI // TODO SERVER-36517: Add testing for DISTINCT_SCAN. // TODO SERVER-35331: Add testing for hints. +// +// Field name or array index tests. +// + +TEST_F(QueryPlannerAllPathsTest, CannotAnswerQueryThroughArrayIndexProjection) { + addAllPathsIndex(BSON("$**" << 1), {"a"}, fromjson("{'a.0': 1, 'a.b': 1}")); + + // Queries whose path lies along a projected array index cannot be answered. + runQuery(fromjson("{'a.0': 10}")); + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); + + runQuery(fromjson("{'a.0.b': 10}")); + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); +} + +TEST_F(QueryPlannerAllPathsTest, CanAnswerQueryOnNonProjectedArrayIndex) { + addAllPathsIndex(BSON("$**" << 1), {"a", "c"}, fromjson("{'a.0': 1, b: 1, c: 1}")); + + // Queries on fields that are not projected array indices can be answered, even when such a + // projection is present in the 'starPathsTempName' spec. + runQuery(fromjson("{b: 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, b: 1}, " + "bounds: {'$_path': [['b','b',true,true]], b: [[10,10,true,true]]}}}}}"); + + // Queries on indices of arrays that have been projected in their entirety can be answered. + runQuery(fromjson("{'c.0': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'c.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'c.0': " + "1}, bounds: {'$_path': [['c','c',true,true], ['c.0','c.0',true,true]], 'c.0': " + "[[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldGenerateAllPotentialPathBoundsForArrayIndexQueries) { + addAllPathsIndex(BSON("a.$**" << 1), {"a", "a.b"}); + + // A numeric path component immediately following a multikey path may represent an array index, + // a fieldname, or both. The $** index does not record array indices explicitly, so for all such + // potential array indices in the query path, we generate bounds that both include and exclude + // the numeric path components. + runQuery(fromjson("{'a.0': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0': " + "1}, bounds: {'$_path': [['a','a',true,true], ['a.0','a.0',true,true]], 'a.0': " + "[[10,10,true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], ['a.b','a.b',true,true]], " + "'a.0.b': [[10,10,true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b.1.c': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'a.0.b.1.c': 1}, bounds: {'$_path': [['a.0.b.1.c','a.0.b.1.c',true,true], " + "['a.0.b.c','a.0.b.c',true,true], ['a.b.1.c','a.b.1.c',true,true], " + "['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': [[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldAddAllPredicatesToFetchFilterForArrayIndexQueries) { + addAllPathsIndex(BSON("$**" << 1), {"a", "a.b"}); + + // Ordinarily, a query such as {'a': {$gt: 0, $lt: 5}} on a multikey field 'a' produces an EXACT + // IXSCAN on one of the predicates and a FETCH filter on the other. For fieldname-or-array-index + // queries on 'a.0' or similar, however, *all* predicates must be added to the FETCH filter to + // ensure correctness. + runQuery(fromjson("{'a.0': {$gt: 10, $lt: 20}}")); + assertNumSolutions(2U); + assertSolutionExists( + "{fetch: {filter: {'a.0': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'a.0': 1}, bounds: {'$_path': [['a','a',true,true], " + "['a.0','a.0',true,true]], 'a.0': [[-inf,20,true,false]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'a.0': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'a.0': 1}, bounds: {'$_path': [['a','a',true,true], " + "['a.0','a.0',true,true]], 'a.0': [[10,inf,false,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}")); + assertNumSolutions(2U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}, node: {ixscan: {filter: null, " + "pattern: {'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " + "['a.b','a.b',true,true]], 'a.0.b': [[-inf,20.0,true,true]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'a.0.b': {$gte: 10, $lte: 20, $ne: 15}}, node: {ixscan: {filter: null, " + "pattern: {'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " + "['a.b','a.b',true,true]], 'a.0.b': [[10,inf,true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b.1.c': {$in: [10, 20, 30]}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': {$in: [10, 20, 30]}}, node: {ixscan: {filter: null, " + "pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': " + "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], " + "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': " + "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}")); + assertNumSolutions(3U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, " + "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': " + "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], " + "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': " + "[[-inf,30,true,true]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, " + "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': " + "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.c','a.0.b.c',true,true], " + "['a.b.1.c','a.b.1.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.0.b.1.c': " + "[[10,inf,true,true]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': {$gte: 10, $lte: 30}, 'a.1.b.0.c': {$in: [10, 20, 30]}}, " + "node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.1.b.0.c': 1}, bounds: {'$_path': " + "[['a.1.b.0.c','a.1.b.0.c',true,true], ['a.1.b.c','a.1.b.c',true,true], " + "['a.b.0.c','a.b.0.c',true,true], ['a.b.c','a.b.c',true,true]], 'a.1.b.0.c': " + "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}"); + + // Finally, confirm that queries which include a numeric path component on a field that is *not* + // multikey produce EXACT bounds and do *not* add their predicates to the filter. + runQuery(fromjson("{'d.0': {$gt: 10, $lt: 20}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'd.0': 1}, " + "bounds: {'$_path': [['d.0','d.0',true,true]], 'd.0': [[10,20,false,false]]}}}}}"); + + // Here, in contrast to the multikey case, we generate one indexed solution for each predicate + // and only filter the *other* predicate in the FETCH stage. + runQuery(fromjson("{'d.0.e': {$gt: 10, $lt: 20}, 'd.1.f': {$in: [10, 20, 30]}}")); + assertNumSolutions(2U); + assertSolutionExists( + "{fetch: {filter: {'d.0.e': {$gt: 10, $lt: 20}}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'd.1.f': 1}, bounds: {'$_path': [['d.1.f','d.1.f',true,true]], 'd.1.f': " + "[[10,10,true,true], [20,20,true,true], [30,30,true,true]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'d.1.f': {$in: [10, 20, 30]}}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'd.0.e': 1}, bounds: {'$_path': [['d.0.e','d.0.e',true,true]], 'd.0.e': " + "[[10,20,false,false]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldOnlyBuildSpecialBoundsForMultikeyPaths) { + addAllPathsIndex(BSON("a.$**" << 1), {"a.b", "a.b.c.d"}); + + // 'a' is not multikey, so 'a.0' refers specifically to a fieldname rather than an array index, + // and special bounds will not be generated. + runQuery(fromjson("{'a.0': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0': 1}, " + "bounds: {'$_path': [['a.0','a.0',true,true]], 'a.0': [[10,10,true,true]]}}}}}"); + + // 'a' is not multikey, so 'a.0.b' does not refer to multikey path 'a.b' and 'b.1' is therefore + // also strictly a fieldname. Special bounds will not be generated for either. + runQuery(fromjson("{'a.0.b.1': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.0.b.1': " + "1}, bounds: {'$_path': [['a.0.b.1','a.0.b.1',true,true]], 'a.0.b.1': " + "[[10,10,true,true]]}}}}}"); + + // 'a.b' is multikey but 'a.b.c' is not, so special bounds will only be generated for 'b.1'. + runQuery(fromjson("{'a.b.1.c.2.d': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.1.c.2.d': 10}, node: {ixscan: {filter: null, pattern: {'$_path': " + "1, 'a.b.1.c.2.d': 1}, bounds: {'$_path': [['a.b.1.c.2.d','a.b.1.c.2.d',true,true], " + "['a.b.c.2.d','a.b.c.2.d',true,true]], 'a.b.1.c.2.d': [[10,10,true,true]]}}}}}"); + + // 'a.b' is multikey but 'a.b.c' is not. 'a.b.c.2.d' therefore refers to a different path than + // multikey path 'a.[b].c.[d]', so special bounds will be generated for 'b.1' but not for 'd.3'. + runQuery(fromjson("{'a.b.1.c.2.d.3.e': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.1.c.2.d.3.e': 10}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'a.b.1.c.2.d.3.e': 1}, bounds: " + "{'$_path':[['a.b.1.c.2.d.3.e','a.b.1.c.2.d.3.e',true,true], " + "['a.b.c.2.d.3.e','a.b.c.2.d.3.e',true,true]], 'a.b.1.c.2.d.3.e':[[10,10,true,true]]}}}}}"); + + // 'a.b' and 'a.b.c.d' are both multikey. Special bounds will be generated for 'b.1' and 'd.3'. + runQuery(fromjson("{'a.b.1.c.d.3.e': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.1.c.d.3.e': 10}, node: {ixscan: {filter: null, pattern: {'$_path': " + "1, 'a.b.1.c.d.3.e': 1}, bounds: {'$_path':[['a.b.1.c.d.3.e','a.b.1.c.d.3.e',true,true], " + "['a.b.1.c.d.e','a.b.1.c.d.e',true,true], ['a.b.c.d.3.e','a.b.c.d.3.e',true,true], " + "['a.b.c.d.e','a.b.c.d.e',true,true]], 'a.b.1.c.d.3.e':[[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldGenerateSpecialBoundsForNumericMultikeyPaths) { + addAllPathsIndex(BSON("a.$**" << 1), {"a.b", "a.b.0"}); + + // 'a.b.0' is itself a multikey path, but since 'a.b' is multikey 'b.0' may refer to an array + // element of 'b'. We generate special bounds for 'b.0'. + runQuery(fromjson("{'a.b.0': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.0': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'a.b.0': 1}, bounds: {'$_path': [['a.b','a.b',true,true], ['a.b.0','a.b.0',true,true]], " + "'a.b.0': [[10,10,true,true]]}}}}}"); + + // 'a.b' and 'a.b.0' are both multikey paths; we generate special bounds for 'b.0' and '0.1'. + runQuery(fromjson("{'a.b.0.1.c': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.0.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'a.b.0.1.c': 1}, bounds: {'$_path': [['a.b.0.1.c','a.b.0.1.c',true,true], " + "['a.b.0.c','a.b.0.c',true,true], ['a.b.1.c','a.b.1.c',true,true], " + "['a.b.c','a.b.c',true,true]], 'a.b.0.1.c': [[10,10,true,true]]}}}}}"); + + // 'a.b' is multikey but 'a.b.1' is not, so we only generate special bounds for 'b.1'. + runQuery(fromjson("{'a.b.1.1.c': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b.1.1.c': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'a.b.1.1.c': 1}, bounds: {'$_path': [['a.b.1.1.c','a.b.1.1.c',true,true], " + "['a.b.1.c','a.b.1.c',true,true]], 'a.b.1.1.c': [[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldNotGenerateSpecialBoundsForFieldNamesWithLeadingZeroes) { + addAllPathsIndex(BSON("a.$**" << 1), {"a.b"}); + + // 'a.b' is multikey, but '01' is not eligible for consideration as a numeric array index; it is + // always strictly a field name. We therefore do not generate special bounds. + runQuery(fromjson("{'a.b.01': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, 'a.b.01': 1}, " + "bounds: {'$_path': [['a.b.01','a.b.01',true,true]], 'a.b.01': [[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, InitialNumericPathComponentIsAlwaysFieldName) { + addAllPathsIndex(BSON("$**" << 1), {"0"}, fromjson("{'0': 1}")); + + // '0' is multikey, but the first component in a path can never be an array index since it must + // always be a field name. We therefore do not generate special bounds for '0.b'. + runQuery(fromjson("{'0.b': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: {'$_path': 1, '0.b': 1}, " + "bounds: {'$_path': [['0.b','0.b',true,true]], '0.b': [[10,10,true,true]]}}}}}"); + + // '0' is multikey, so a query on '0.1.b' will generate special bounds for '1' but not for '0'. + runQuery(fromjson("{'0.1.b': 10}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'0.1.b': 10}, node: {ixscan: {filter: null, pattern: {'$_path': 1, " + "'0.1.b': 1}, bounds: {'$_path': [['0.1.b','0.1.b',true,true], ['0.b','0.b',true,true]], " + "'0.1.b': [[10,10,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldGenerateSpecialBoundsForNullAndExistenceQueries) { + addAllPathsIndex(BSON("a.$**" << 1), {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4"}); + + runQuery(fromjson("{'a.0.b': {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b': {$exists: true}}, node: {ixscan: {filter: null, pattern: " + "{'$_path': 1, 'a.0.b': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " + "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], " + "'a.0.b': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b.1.c': {$exists: true, $eq: null}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1.c': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, " + "pattern:{'$_path': 1, 'a.0.b.1.c': 1}, bounds: {'$_path': " + "[['a.0.b.1.c','a.0.b.1.c',true,true], ['a.0.b.1.c.','a.0.b.1.c/',true,false], " + "['a.0.b.c','a.0.b.c',true,true], ['a.0.b.c.','a.0.b.c/',true,false], " + "['a.b.1.c','a.b.1.c',true,true], ['a.b.1.c.','a.b.1.c/',true,false], " + "['a.b.c','a.b.c',true,true], ['a.b.c.','a.b.c/',true,false]], 'a.0.b.1.c': [[{$minKey: " + "1},{$maxKey: 1},true,true]]}}}}}"); + + // Confirm that any trailing array index fields are trimmed before the fieldname-or-array-index + // pathset is generated, such that the subpath bounds do not overlap. + runQuery(fromjson("{'a.0.b.1': {$exists: true, $eq: null}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.1': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, " + "pattern:{'$_path': 1, 'a.0.b.1': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " + "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], " + "'a.0.b.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + + runQuery(fromjson("{'a.0.b.2.3.4': {$exists: true, $eq: null}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.0.b.2.3.4': {$exists: true, $eq: null}}, node: {ixscan: {filter:null," + "pattern:{'$_path': 1, 'a.0.b.2.3.4': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " + "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], " + "'a.0.b.2.3.4': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); +} + +TEST_F(QueryPlannerAllPathsTest, ShouldDeclineToAnswerQueriesThatTraverseTooManyArrays) { + addAllPathsIndex(BSON("$**" << 1), + {"a", + "a.b", + "a.b.c", + "a.b.c.d", + "a.b.c.d.e", + "a.b.c.d.e.f", + "a.b.c.d.e.f.g", + "a.b.c.d.e.f.g.h", + "a.b.c.d.e.f.g.h.i", + "a.b.c.d.e.f.g.h.i.j", + "a.b.c.d.e.f.g.h.i.j.k"}); + + // We can query through 8 levels of arrays via explicit array indices... + runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8': 1}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {$_path: 1, 'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8': 1}}}}}"); + + // ... but after that, we decline to answer the query using the $** index. + runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.9': 1}")); + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); + + // However, we continue to allow implicit (non-indices) traversal of arrays to any depth. + runQuery(fromjson("{'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.j.k': 1}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch:{node:{ixscan:{pattern:{$_path:1, 'a.1.b.2.c.3.d.4.e.5.f.6.g.7.h.8.i.j.k': 1}}}}}"); +} + +// TODO SERVER-35335: Add testing for Min/Max. +// TODO SERVER-36517: Add testing for DISTINCT_SCAN. +// TODO SERVER-35336: Add testing for partialFilterExpression. +// TODO SERVER-35331: Add testing for hints. +// TODO SERVER-36145: Add testing for non-blocking sort. + } // namespace mongo |