From 0d6b561250a8093d55deffae24afbdee4adb4758 Mon Sep 17 00:00:00 2001 From: Hana Pearlman Date: Wed, 14 Apr 2021 13:56:25 +0000 Subject: Make some changes to planner_wildcard_helpers to allow for compound wc indexes --- src/mongo/db/index/wildcard_key_generator.cpp | 11 +- src/mongo/db/query/SConscript | 1 + src/mongo/db/query/planner_ixselect.cpp | 3 + src/mongo/db/query/planner_wildcard_helpers.cpp | 133 ++++++--- src/mongo/db/query/planner_wildcard_helpers.h | 2 +- .../db/query/planner_wildcard_helpers_test.cpp | 318 +++++++++++++++++++++ src/mongo/db/query/query_solution.cpp | 28 +- 7 files changed, 441 insertions(+), 55 deletions(-) create mode 100644 src/mongo/db/query/planner_wildcard_helpers_test.cpp diff --git a/src/mongo/db/index/wildcard_key_generator.cpp b/src/mongo/db/index/wildcard_key_generator.cpp index 8b66f209a31..318f2082ac0 100644 --- a/src/mongo/db/index/wildcard_key_generator.cpp +++ b/src/mongo/db/index/wildcard_key_generator.cpp @@ -67,12 +67,15 @@ constexpr StringData WildcardKeyGenerator::kSubtreeSuffix; WildcardProjection WildcardKeyGenerator::createProjectionExecutor(BSONObj keyPattern, BSONObj pathProjection) { - // We should never have a key pattern that contains more than a single element. - invariant(keyPattern.nFields() == 1); - // The _keyPattern is either { "$**": ±1 } for all paths or { "path.$**": ±1 } for a single // subtree. If we are indexing a single subtree, then we will project just that path. - auto indexRoot = keyPattern.firstElement().fieldNameStringData(); + // The folllowing change is a simple fix to obtain the single element in the keyPattern + // representing the wildcard component. + auto wcElem = std::find_if(keyPattern.begin(), keyPattern.end(), [](auto&& elem) { + return elem.fieldNameStringData().endsWith("$**"_sd); + }); + invariant(wcElem != keyPattern.end()); + auto indexRoot = wcElem->fieldNameStringData(); auto suffixPos = indexRoot.find(kSubtreeSuffix); // If we're indexing a single subtree, we can't also specify a path projection. diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index d4c4a0627f7..b549f2a9dac 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -335,6 +335,7 @@ env.CppUnitTest( "planner_access_test.cpp", "planner_analysis_test.cpp", "planner_ixselect_test.cpp", + "planner_wildcard_helpers_test.cpp", "projection_ast_test.cpp", "projection_test.cpp", "query_planner_array_test.cpp", diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index cde273593ad..9b9ec81ce99 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -661,6 +661,9 @@ bool QueryPlannerIXSelect::nodeIsSupportedByWildcardIndex(const MatchExpression* // store keys for nested objects, meaning that any kind of comparison to an object or array // cannot be answered by the index (including with a $in). + // TODO: we should confirm whether this is correct after compound wildcard indexes are + // supported: wildcard indexes can support object queries on the non-WC fields in the index. + if (ComparisonMatchExpression::isComparisonMatchExpression(queryExpr)) { const ComparisonMatchExpression* cmpExpr = static_cast(queryExpr); diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp index 395d0253e04..7969a2c0d6e 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.cpp +++ b/src/mongo/db/query/planner_wildcard_helpers.cpp @@ -35,6 +35,7 @@ #include +#include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/util/builder.h" #include "mongo/db/exec/projection_executor_utils.h" #include "mongo/db/index/wildcard_key_generator.h" @@ -44,6 +45,42 @@ namespace mongo { namespace wildcard_planning { namespace { +auto getElement(const BSONObj& keyPattern, int i) { + auto it = keyPattern.begin(); + for (int j = 0; j < i; j++) { + it++; + } + return it; +} + +auto getLastElement(const BSONObj& keyPattern) { + return getElement(keyPattern, keyPattern.nFields() - 1); +} + +auto expandIndexKey(const BSONObj& keyPattern, const StringData& fieldName) { + BSONObjBuilder bb; + for (auto& elem : keyPattern) { + if (elem.fieldNameStringData().endsWith("$**"_sd)) { + bb.appendAs(elem, fieldName); + } else { + bb.append(elem); + } + } + return bb.obj(); +} + +auto insertPathPlaceHolder(const BSONObj& keyPattern) { + BSONObjBuilder bb; + for (auto it = keyPattern.begin(); it != keyPattern.end(); it++) { + if (std::next(it) == keyPattern.end()) { + // 'it' is pointing to the wildcard element, the last field in the keyPattern. + bb.appendAs(*it, "$_path"); + } + bb.append(*it); + } + return bb.obj(); +} + /** * Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array * indices present in the former if they are not present in the latter. The 'multikeyPathComponents' @@ -142,16 +179,33 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path, * looking up multikeyness in 'multikeyPathSet'. */ MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry( - const FieldRef& indexedPath, const std::set& multikeyPathSet) { + const FieldRef& indexedPath, + const std::set& multikeyPathSet, + const BSONObj& keyPattern) { FieldRef pathToLookup; - MultikeyComponents multikeyPaths; + MultikeyPaths multikeyPaths = {}; + + // TODO: what if the non-wildcard entries do represent arrays? Then they should be multikey. + // For each non-wildcard index entry, indicate that they are not multikey. + // Note this assumes only one wildcard entry. + for (auto& elem : keyPattern) { + if (!elem.fieldNameStringData().endsWith("$**")) { + MultikeyComponents comps; + multikeyPaths.insert(multikeyPaths.end(), comps); + } + } + + // Check each part of the wildcard entry 'indexedPath' to determine if it's multikey. + MultikeyComponents comps; for (size_t i = 0; i < indexedPath.numParts(); ++i) { pathToLookup.appendPart(indexedPath.getPart(i)); - if (fieldNameOrArrayIndexPathSetContains(multikeyPathSet, multikeyPaths, pathToLookup)) { - multikeyPaths.insert(i); + if (fieldNameOrArrayIndexPathSetContains(multikeyPathSet, comps, pathToLookup)) { + comps.insert(i); } } - return {multikeyPaths}; + multikeyPaths.insert(multikeyPaths.end(), comps); + + return multikeyPaths; } std::set generateFieldNameOrArrayIndexPathSet(const MultikeyComponents& multikeyPaths, @@ -231,13 +285,10 @@ std::set generateFieldNameOrArrayIndexPathSet(const MultikeyComponents bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, const std::set& includedPaths, const FieldRef& queryPath) { - // $** multikeyPaths always have a singleton set, since they are single-element indexes. - invariant(multikeyPaths.size() == 1); - // Find the positions of all multikey path components in 'queryPath' that have a numerical path // component immediately after. For a queryPath of 'a.2.b' this will return position 0; that is, // 'a'. If no such multikey path was found, we are clear to proceed with planning. - const auto arrayIndices = findArrayIndexPathComponents(multikeyPaths.front(), queryPath); + const auto arrayIndices = findArrayIndexPathComponents(multikeyPaths.back(), queryPath); if (arrayIndices.empty()) { return true; } @@ -349,9 +400,9 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, std::vector* out) { invariant(out); invariant(wildcardIndex.type == INDEX_WILDCARD); - // Should only have one field of the form {"path.$**" : 1}. - invariant(wildcardIndex.keyPattern.nFields() == 1); - invariant(wildcardIndex.keyPattern.firstElement().fieldNameStringData().endsWith("$**")); + + // Should have a wilcard key in the index. + invariant(getLastElement(wildcardIndex.keyPattern)->fieldNameStringData().endsWith("$**")); // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a @@ -378,7 +429,7 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, // which will indicate to the downstream planning code which components of 'fieldName' are // multikey. auto multikeyPaths = buildMultiKeyPathsForExpandedWildcardIndexEntry( - queryPath, wildcardIndex.multikeyPathSet); + queryPath, wildcardIndex.multikeyPathSet, wildcardIndex.keyPattern); // Check whether a query on the current fieldpath is answerable by the $** index, given any // numerical path components that may be present in the path string. @@ -392,10 +443,10 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be // marked as multikey. - invariant(multikeyPaths.size() == 1u); - const bool isMultikey = !multikeyPaths[0].empty(); + invariant((int)multikeyPaths.size() == wildcardIndex.keyPattern.nFields()); + const bool isMultikey = !multikeyPaths.back().empty(); - IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()), + IndexEntry entry(expandIndexKey(wildcardIndex.keyPattern, fieldName), IndexType::INDEX_WILDCARD, IndexDescriptor::kLatestIndexVersion, isMultikey, @@ -424,8 +475,7 @@ BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index // only have a single keyPattern field and multikeyPath entry, but this is sufficient to // determine whether it will be necessary to adjust the tightness. invariant(index.type == IndexType::INDEX_WILDCARD); - invariant(index.keyPattern.nFields() == 1); - invariant(index.multikeyPaths.size() == 1); + invariant(index.keyPattern.nFields() == (int)index.multikeyPaths.size()); invariant(oil); // If our bounds include any objects -- anything in the range ({}, []) -- then we will need to @@ -436,14 +486,19 @@ BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index // result set should include documents such as {a: {b: null}}; however, the wildcard index key // for this object will be {"": "a.b", "": null}, which means that the original bounds would // skip this document. We must also set the tightness to INEXACT_FETCH to avoid false positives. - if (boundsOverlapObjectTypeBracket(*oil) && !oil->intervals.front().isMinToMax()) { + + // TODO: Not convinced that we are only checking the bounds corresponding to the $** component. + if (boundsOverlapObjectTypeBracket(*oil) && !oil->intervals.back().isMinToMax()) { oil->intervals = {IndexBoundsBuilder::allValues()}; return BoundsTightness::INEXACT_FETCH; } // If the query passes through any array indices, we must always fetch and filter the documents. - const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents( - index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()}); + // Here we know that the last field in key pattern is the wildcard field-- after expansion, we + // can no longer tell just from looking at the keys. + auto wcElem = getLastElement(index.keyPattern); + const auto arrayIndicesTraversedByQuery = + findArrayIndexPathComponents(index.multikeyPaths.back(), FieldRef{wcElem->fieldName()}); // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH. return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH); @@ -455,23 +510,23 @@ void finalizeWildcardIndexScanConfiguration(IndexScanNode* scan) { // We should only ever reach this point when processing a $** index. Sanity check the arguments. invariant(index && index->type == IndexType::INDEX_WILDCARD); - invariant(index->keyPattern.nFields() == 1); - invariant(index->multikeyPaths.size() == 1); - invariant(bounds && bounds->fields.size() == 1); + invariant(index->keyPattern.nFields() == (int)index->multikeyPaths.size()); + invariant(bounds && bounds->fields.size() == index->multikeyPaths.size()); invariant(bounds->fields.front().name == index->keyPattern.firstElementFieldName()); // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's - // keyPattern and its multikeyPaths vector. - index->multikeyPaths.insert(index->multikeyPaths.begin(), MultikeyComponents{}); - bounds->fields.insert(bounds->fields.begin(), {"$_path"}); - index->keyPattern = - BSON("$_path" << index->keyPattern.firstElement() << index->keyPattern.firstElement()); + // keyPattern and its multikeyPaths vector. Note that these key patterns may be prefixed by + // other fields, representing a compound index. The last field is the wildcard element, so we + // add the $_path placeholders right before it. + index->multikeyPaths.insert(std::prev(index->multikeyPaths.end()), MultikeyComponents{}); + bounds->fields.insert(std::prev(bounds->fields.end()), {"$_path"}); + index->keyPattern = insertPathPlaceHolder(index->keyPattern); // Create a FieldRef to perform any necessary manipulations on the query path string. - FieldRef queryPath{std::next(index->keyPattern.begin())->fieldNameStringData()}; + FieldRef queryPath{getLastElement(index->keyPattern)->fieldNameStringData()}; auto& multikeyPaths = index->multikeyPaths.back(); // If the bounds overlap the object type bracket, then we must retrieve all documents which @@ -489,7 +544,7 @@ void finalizeWildcardIndexScanConfiguration(IndexScanNode* scan) { // Add a $_path point-interval for each path that needs to be traversed in the index. If subpath // bounds are required, then we must add a further range interval on ["path.","path/"). static const char subPathStart = '.', subPathEnd = static_cast('.' + 1); - auto& pathIntervals = bounds->fields.front().intervals; + auto& pathIntervals = std::prev(std::prev(bounds->fields.end()))->intervals; for (const auto& fieldPath : paths) { auto path = fieldPath.dottedField().toString(); pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path)); @@ -514,12 +569,16 @@ bool isWildcardObjectSubpathScan(const IndexScanNode* node) { } // We expect consistent arguments, representing a $** index which has already been finalized. - invariant(node->index.keyPattern.nFields() == 2); - invariant(node->index.multikeyPaths.size() == 2); - invariant(node->bounds.fields.size() == 2); - invariant(node->bounds.fields.front().name == node->index.keyPattern.firstElementFieldName()); - invariant(node->bounds.fields.back().name == - std::next(node->index.keyPattern.begin())->fieldName()); + auto numFields = node->index.keyPattern.nFields(); + invariant((int)node->index.multikeyPaths.size() == numFields); + invariant((int)node->bounds.fields.size() == numFields); + + // The last two elements of the vounds and keyPattern are the $_path placeholder followed by the + // wildcard element. + auto wcElem = getLastElement(node->index.keyPattern); + invariant(node->bounds.fields[numFields - 2].name == + getElement(node->index.keyPattern, numFields - 2)->fieldName()); + invariant(node->bounds.fields[numFields - 1].name == wcElem->fieldName()); // Check the bounds on the query field for any intersections with the object type bracket. return boundsOverlapObjectTypeBracket(node->bounds.fields.back()); diff --git a/src/mongo/db/query/planner_wildcard_helpers.h b/src/mongo/db/query/planner_wildcard_helpers.h index d152ce18cd2..dabcbfd63a3 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.h +++ b/src/mongo/db/query/planner_wildcard_helpers.h @@ -87,7 +87,7 @@ bool isWildcardObjectSubpathScan(const IndexScanNode* node); * Return true if the intervals on the 'value' field will include subobjects, and * thus require the bounds on $_path to include ["path.", "path/"). */ -bool requiresSubpathBounds(const OrderedIntervalList& intervals); +// bool requiresSubpathBounds(const OrderedIntervalList& intervals); } // namespace wildcard_planning } // namespace mongo diff --git a/src/mongo/db/query/planner_wildcard_helpers_test.cpp b/src/mongo/db/query/planner_wildcard_helpers_test.cpp new file mode 100644 index 00000000000..579cb99963b --- /dev/null +++ b/src/mongo/db/query/planner_wildcard_helpers_test.cpp @@ -0,0 +1,318 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +/** + * This file contains tests for mongo/db/query/planner_ixselect.cpp + */ + +#include "mongo/db/query/index_entry.h" + +#include "mongo/db/index/wildcard_key_generator.h" +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/query/planner_wildcard_helpers.h" +#include "mongo/db/query/query_planner_test_fixture.h" +#include "mongo/unittest/bson_test_util.h" + +#include + +namespace mongo { + +using PlannerWildcardHelpersTest = AggregationContextFixture; + +auto getLastElement(const BSONObj& keyPattern) { + auto it = keyPattern.begin(); + while (std::next(it) != keyPattern.end()) { + it++; + } + return it; +} + +/**************** The following section can be moved to planner_ixselect_test.cpp ****************/ +/* + * Will compare 'keyPatterns' with 'entries'. As part of comparing, it will sort both of them. + */ +bool indexEntryKeyPatternsMatch(std::vector* keyPatterns, + std::vector* entries) { + ASSERT_EQ(entries->size(), keyPatterns->size()); + + const auto cmpFn = [](const IndexEntry& a, const IndexEntry& b) { + return SimpleBSONObjComparator::kInstance.evaluate(a.keyPattern < b.keyPattern); + }; + + std::sort(entries->begin(), entries->end(), cmpFn); + std::sort(keyPatterns->begin(), keyPatterns->end(), [](const BSONObj& a, const BSONObj& b) { + return SimpleBSONObjComparator::kInstance.evaluate(a < b); + }); + + return std::equal(keyPatterns->begin(), + keyPatterns->end(), + entries->begin(), + [](const BSONObj& keyPattern, const IndexEntry& ie) -> bool { + return SimpleBSONObjComparator::kInstance.evaluate(keyPattern == + ie.keyPattern); + }); +} + +// Helper which constructs an IndexEntry and returns it along with an owned ProjectionExecutor, +// which is non-null if the requested entry represents a wildcard index and null otherwise. When +// non-null, it simulates the ProjectionExecutor that is owned by the $** IndexAccessMethod. +auto makeIndexEntry(BSONObj keyPattern, + MultikeyPaths multiKeyPaths, + std::set multiKeyPathSet = {}, + BSONObj infoObj = BSONObj()) { + auto wcElem = getLastElement(keyPattern); + auto wcProj = wcElem->fieldNameStringData().endsWith("$**"_sd) + ? std::make_unique(WildcardKeyGenerator::createProjectionExecutor( + keyPattern, infoObj.getObjectField("wildcardProjection"))) + : std::unique_ptr(nullptr); + + auto multiKey = !multiKeyPathSet.empty() || + std::any_of(multiKeyPaths.cbegin(), multiKeyPaths.cend(), [](const auto& entry) { + return !entry.empty(); + }); + return std::make_pair(IndexEntry(keyPattern, + IndexNames::nameToType(IndexNames::findPluginName(keyPattern)), + IndexDescriptor::kLatestIndexVersion, + multiKey, + multiKeyPaths, + multiKeyPathSet, + false, + false, + CoreIndexInfo::Identifier("test_foo"), + nullptr, + {}, + nullptr, + wcProj.get()), + std::move(wcProj)); +} + +std::unique_ptr parseMatchExpression(const BSONObj& obj) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + StatusWithMatchExpression status = MatchExpressionParser::parse(obj, std::move(expCtx)); + ASSERT_TRUE(status.isOK()); + return std::unique_ptr(status.getValue().release()); +} + +TEST_F(PlannerWildcardHelpersTest, ExpandSimpleWildcardIndexEntry) { + std::vector out; + stdx::unordered_set fields{"a"}; + const auto indexEntry = makeIndexEntry(BSON("$**" << 1), {}); + wildcard_planning::expandWildcardIndexEntry(indexEntry.first, fields, &out); + + ASSERT_EQ(out.size(), 1u); + ASSERT_BSONOBJ_EQ(out[0].keyPattern, fromjson("{a: 1}")); +} + +TEST_F(PlannerWildcardHelpersTest, ExpandCompoundWildcardIndexEntry) { + std::vector out; + stdx::unordered_set fields{"a", "b", "c"}; + const auto indexEntry = makeIndexEntry( + BSON("a" << 1 << "$**" << 1), {}, {}, {fromjson("{wildcardProjection: {a: 0}}")}); + wildcard_planning::expandWildcardIndexEntry(indexEntry.first, fields, &out); + + // TODO: if we don't exclude 'a' via the wildcardProjection, then we also get {a: 1, a: 1} in + // the output set. Is this desirable? + ASSERT_EQ(out.size(), 2u); + std::vector expectedKeyPatterns = {fromjson("{a: 1, b: 1}"), fromjson("{a: 1, c: 1}")}; + indexEntryKeyPatternsMatch(&expectedKeyPatterns, &out); +} + +TEST_F(PlannerWildcardHelpersTest, ExpandCompoundWildcardIndexEntryNoMatch) { + std::vector out; + stdx::unordered_set fields{"c", "b"}; + const auto indexEntry = makeIndexEntry( + BSON("a" << 1 << "$**" << 1), {}, {}, {fromjson("{wildcardProjection: {a: 0}}")}); + wildcard_planning::expandWildcardIndexEntry(indexEntry.first, fields, &out); + + // TODO: as a result of the changes to expandWildcardIndexEntry(), we are now outputting some + // indexes that are not useful for the query, e.g. {a: 1, b: 1} for a query referencing only + // 'c' and 'b'. We should make sure we are doing some kind of filtering somewhere for this-- I + // assume it exists for regular indexes somewhere in the query planner. we need to make sure the + // query matches the prefix of the expanded index. + ASSERT_EQ(out.size(), 2u); + std::vector expectedKeyPatterns = {fromjson("{a: 1, b: 1}"), fromjson("{a: 1, c: 1}")}; + indexEntryKeyPatternsMatch(&expectedKeyPatterns, &out); +} + +/*************************************** end section ***************************************/ + +// translateWildcardIndexBoundsAndTightness + +TEST_F(PlannerWildcardHelpersTest, TranslateBoundsWithWildcard) { + // expand first + std::vector out; + stdx::unordered_set fields{"a", "b"}; + const auto indexEntry = makeIndexEntry( + BSON("a" << 1 << "$**" << 1), {}, {}, {fromjson("{wildcardProjection: {a: 0}}")}); + wildcard_planning::expandWildcardIndexEntry(indexEntry.first, fields, &out); + + // This expression can only be over one field. WTS that given a query on a field and a compound + // index on that field (followed by wildcard) that we translate properly. + BSONObj obj = fromjson("{a: {$lte: 1}}"); + auto expr = parseMatchExpression(obj); + BSONElement elt = obj.firstElement(); + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate(expr.get(), elt, out[0], &oil, &tightness); + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(oil.intervals.size(), 1U); + ASSERT_EQUALS( + Interval::INTERVAL_EQUALS, + oil.intervals[0].compare(Interval(fromjson("{'': -Infinity, '': 1}"), true, true))); + ASSERT(tightness == IndexBoundsBuilder::EXACT); +} + +// How to test? +// finalizeWildcardIndexScanConfiguration(IndexScanNode* scan); +// isWildcardObjectSubpathScan(const IndexScanNode* node); + + +/********** The following section can be moved to query_planner_wildcard_index_test.cpp **********/ +class QueryPlannerWildcardTest : public QueryPlannerTest { +protected: + void setUp() final { + QueryPlannerTest::setUp(); + + // We're interested in testing plans that use a $** index, so don't generate collection + // scans. + params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN; + } + + void addWildcardIndex(BSONObj keyPattern, + const std::set& multikeyPathSet = {}, + BSONObj wildcardProjection = BSONObj{}, + MatchExpression* partialFilterExpr = nullptr, + CollatorInterface* collator = nullptr, + const std::string& indexName = "indexName") { + // Convert the set of std::string to a set of FieldRef. + std::set multikeyFieldRefs; + for (auto&& path : multikeyPathSet) { + ASSERT_TRUE(multikeyFieldRefs.emplace(path).second); + } + ASSERT_EQ(multikeyPathSet.size(), multikeyFieldRefs.size()); + + const bool isMultikey = !multikeyPathSet.empty(); + BSONObj infoObj = BSON("wildcardProjection" << wildcardProjection); + + _proj = WildcardKeyGenerator::createProjectionExecutor(keyPattern, wildcardProjection); + + params.indices.push_back({std::move(keyPattern), + IndexType::INDEX_WILDCARD, + IndexDescriptor::kLatestIndexVersion, + isMultikey, + {}, // multikeyPaths + std::move(multikeyFieldRefs), + false, // sparse + false, // unique + IndexEntry::Identifier{indexName}, + partialFilterExpr, + std::move(infoObj), + collator, + _proj.get_ptr()}); + } + + boost::optional _proj; +}; + +TEST_F(QueryPlannerWildcardTest, CompoundWildcardIndexBasic) { + // TODO: if wildcard projection is not used here, we fail. Same with most of the tests below. + addWildcardIndex(fromjson("{a: 1, '$**': 1}"), {}, fromjson("{a: 0}") /* wildcardProjection */); + + runQuery(fromjson("{a: {$eq: 5}, x: {$lt: 3}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {a: 1, $_path: 1, x: 1}, bounds: {'a': " + "[[5, 5, true, true]], '$_path': [['x', 'x', true, true]], 'x': [[-Infinity, 3, true, " + "false]]}}}}}"); +} + +// TODO: failing because the bounds for 'x' are invalid, which causes an invariant failure. The +// bounds look empty at the invariant. look empty, causing an invariant failure. +// TEST_F(QueryPlannerWildcardTest, CompoundEqualsNullQueriesDontUseWildcardIndexes) { +// addWildcardIndex(fromjson("{a: 1, '$**': 1}"), {}, fromjson("{a: 0}")); + +// runQuery(fromjson("{a: {$lt: 2}, x: {$eq: null}}")); +// // It's unclear to me what solution we want to see here. Seems to me that we should be able +// // to do an IXSCAN followed by a filter. +// assertNumSolutions(1U); +// assertSolutionExists("{cscan: {dir: 1}}"); +// } + +TEST_F(QueryPlannerWildcardTest, CompoundWildcardWithMultikeyField) { + addWildcardIndex( + fromjson("{a: 1, '$**': 1}"), {"b"} /* 'b' marked as multikey field */, fromjson("{a: 0}")); + runQuery(fromjson("{a: {$eq: 5}, b: {$gt: 0}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {a: 1, $_path: 1, b: 1}, bounds: {'a': " + "[[5, 5, true, true]], '$_path': [['b','b',true,true]], b: " + "[[0,Infinity,false,true]]}}}}}}"); +} + +TEST_F(QueryPlannerWildcardTest, + CompoundWildcardMultiplePredicatesOverNestedFieldWithFirstComponentMultikey) { + addWildcardIndex(fromjson("{x: 1, '$**': 1}"), {"a"}, fromjson("{x: 0}")); + runQuery(fromjson("{x: {$lt: 2}, 'a.b': {$gt: 0, $lt: 9}}")); + + // TODO: in query_planner_wildcard_index_test.cpp, the original test gave 2 solutions, not one. + // Feels like a bug... + // I have to admit I'm confused about the bounds in the original test too -- For example, in the + // solution here, we can't we use the bounds for 'a.b': [[0, 20, false false]]? + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.b': {$gt: 0}}, node: " + "{ixscan: {filter: null, pattern: {'x': 1, '$_path': 1, 'a.b': 1}," + "bounds: {'x': [[-Infinity, 2, true, false]], '$_path': [['a.b','a.b',true,true]], 'a.b': " + "[[-Infinity,9,true,false]]}}}}}"); +} + + +TEST_F(QueryPlannerWildcardTest, + CompoundWildcardAllPredsEligibleForIndexUseGenerateCandidatePlans) { + addWildcardIndex(fromjson("{x: 1, 'a.$**': 1}"), {"a.b", "a.c"}); + runQuery( + fromjson("{x: {$eq: 2}, 'a.b': {$gt: 0, $lt: 9}, 'a.c': {$gt: 11, $lt: 20}, d: {$gt: 31, " + "$lt: 40}}")); + + // TODO: Same as above: the original test gave 4 solutions, not two. + assertNumSolutions(2U); + assertSolutionExists( + "{fetch: {filter: {'a.b':{$gt:0,$lt: 9},'a.c':{$gt:11},d:{$gt:31,$lt:40}}, node: " + "{ixscan: {filter: null, pattern: {'x': 1, '$_path': 1, 'a.c': 1}," + "bounds: {'x': [[2, 2, true, true]], '$_path': [['a.c','a.c',true,true]], 'a.c': " + "[[-Infinity,20,true,false]]}}}}}"); + assertSolutionExists( + "{fetch: {filter: {'a.b':{$gt:0},'a.c':{$gt:11,$lt:20},d:{$gt:31,$lt:40}}, node: " + "{ixscan: {filter: null, pattern: {'x': 1, '$_path': 1, 'a.b': 1}," + "bounds: {'x': [[2, 2, true, true]], '$_path': [['a.b','a.b',true,true]], 'a.b': " + "[[-Infinity,9,true,false]]}}}}}"); +} +} // namespace mongo \ No newline at end of file diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 28663f40e1c..a0c35079455 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -536,9 +536,9 @@ FieldAvailability IndexScanNode::getFieldAvailability(const string& field) const size_t keyPatternFieldIndex = 0; for (auto&& elt : index.keyPattern) { // For $** indexes, the keyPattern is prefixed by a virtual field, '$_path'. We therefore - // skip the first keyPattern field when deciding whether we can provide the requested field. - if (index.type == IndexType::INDEX_WILDCARD && !keyPatternFieldIndex) { - invariant(elt.fieldNameStringData() == "$_path"_sd); + // skip this keyPattern field when deciding whether we can provide the requested field. + if (index.type == IndexType::INDEX_WILDCARD && !keyPatternFieldIndex && + elt.fieldNameStringData() == "$_path"_sd) { ++keyPatternFieldIndex; continue; } @@ -766,25 +766,27 @@ ProvidedSortSet computeSortsForScan(const IndexEntry& index, // fact, $-prefixed path components are illegal in queries in most contexts, so misinterpreting // this as a path in user-data could trigger subsequent assertions. if (index.type == IndexType::INDEX_WILDCARD) { - invariant(bounds.fields.size() == 2u); + // invariant(bounds.fields.size() == 2u); // No sorts are provided if the bounds for '$_path' consist of multiple intervals. This can // happen for existence queries. For example, {a: {$exists: true}} results in bounds // [["a","a"], ["a.", "a/")] for '$_path' so that keys from documents where "a" is a nested - // object are in bounds. - if (bounds.fields[0].intervals.size() != 1u) { + // object are in bounds. The '$_path' field must be the second to last field in bounds. + if (bounds.fields[bounds.fields.size() - 2].intervals.size() != 1u) { return {}; } // Strip '$_path' out of 'sortPattern' and then proceed with regular sort analysis. BSONObjIterator it{sortPatternProvidedByIndex}; - invariant(it.more()); - auto pathElement = it.next(); - invariant(pathElement.fieldNameStringData() == "$_path"_sd); - invariant(it.more()); - auto secondElement = it.next(); - invariant(!it.more()); - sortPatternProvidedByIndex = BSONObjBuilder{}.append(secondElement).obj(); + while (it.more()) { + auto pathElement = it.next(); + if (pathElement.fieldNameStringData() == "$_path"_sd) { + invariant(it.more()); + it.next(); + invariant(!it.more()); + } + } + sortPatternProvidedByIndex = sortPatternProvidedByIndex.removeField("$_path"_sd); } // -- cgit v1.2.1