summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernard Gorman <bernard.gorman@gmail.com>2018-09-12 15:30:56 +0100
committerBernard Gorman <bernard.gorman@gmail.com>2018-10-11 01:23:07 +0100
commit8dab2485c1badc2fd09b84667b4dcfc8b53fef63 (patch)
tree3479da286c74a91a75b1703f422aa886e59bd33d
parente01cdb673978f6d3a9d53bb3a6ba35b34c281b0e (diff)
downloadmongo-8dab2485c1badc2fd09b84667b4dcfc8b53fef63.tar.gz
SERVER-36517 Allow wildcard indexes to provide DISTINCT_SCAN
-rw-r--r--jstests/core/wildcard_index_distinct_scan.js198
-rw-r--r--src/mongo/bson/ordering.h4
-rw-r--r--src/mongo/db/exec/distinct_scan.cpp38
-rw-r--r--src/mongo/db/exec/distinct_scan.h62
-rw-r--r--src/mongo/db/query/get_executor.cpp43
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp4
-rw-r--r--src/mongo/db/query/planner_access.cpp77
-rw-r--r--src/mongo/db/query/planner_access.h2
-rw-r--r--src/mongo/db/query/planner_ixselect.cpp74
-rw-r--r--src/mongo/db/query/planner_wildcard_helpers.cpp310
-rw-r--r--src/mongo/db/query/planner_wildcard_helpers.h57
-rw-r--r--src/mongo/db/query/query_planner_wildcard_index_test.cpp52
-rw-r--r--src/mongo/db/query/query_solution.cpp9
-rw-r--r--src/mongo/db/query/query_solution.h6
-rw-r--r--src/mongo/db/query/stage_builder.cpp19
-rw-r--r--src/mongo/dbtests/query_stage_distinct.cpp26
16 files changed, 663 insertions, 318 deletions
diff --git a/jstests/core/wildcard_index_distinct_scan.js b/jstests/core/wildcard_index_distinct_scan.js
new file mode 100644
index 00000000000..df831cbc5c9
--- /dev/null
+++ b/jstests/core/wildcard_index_distinct_scan.js
@@ -0,0 +1,198 @@
+/**
+ * Tests that a $** index can provide a DISTINCT_SCAN or indexed solution where appropriate.
+ */
+(function() {
+ "use strict";
+
+ load("jstests/aggregation/extras/utils.js"); // For arrayEq.
+ load("jstests/libs/analyze_plan.js"); // For planHasStage and getPlanStages.
+
+ const assertArrayEq = (l, r) => assert(arrayEq(l, r), tojson(l) + " != " + tojson(r));
+
+ const coll = db.all_paths_distinct_scan;
+ coll.drop();
+
+ // Records whether the field which we are distinct-ing over is multikey.
+ let distinctFieldIsMultikey = false;
+
+ // Insert a set of documents into the collection. The 'listOfValues' argument contains values of
+ // various types, and we insert numerous documents containing each of the values. This allows us
+ // to confirm that 'distinct' with a wildcard index (1) can return values of any type, (2) will
+ // only return the set of unique values, and (3) handles multikey values appropriately in cases
+ // where 'listOfValues' includes an array.
+ function insertTestData(fieldName, listOfValues) {
+ distinctFieldIsMultikey = listOfValues.some((val) => Array.isArray(val));
+ const bulk = coll.initializeUnorderedBulkOp();
+ coll.drop();
+ for (let i = 0; i < 200; i++) {
+ const didx = (i % listOfValues.length);
+ bulk.insert({[fieldName]: listOfValues[didx], b: didx, c: (-i)});
+ }
+ assert.commandWorked(bulk.execute());
+ }
+
+ /**
+ * Runs a single wildcard distinct scan test. If 'expectedPath' is non-null, verifies that there
+ * is an indexed solution that uses the $** index with the given path string. If 'expectedPath'
+ * is null, verifies that no indexed solution was found.
+ */
+ function assertWildcardDistinctScan(
+ {distinctKey, query, pathProjection, expectedScanType, expectedResults, expectedPath}) {
+ // Drop all indexes before running the test. This allows us to perform the distinct with a
+ // COLLSCAN at first, to confirm that the results are as expected.
+ assert.commandWorked(coll.dropIndexes());
+
+ // Confirm that the distinct runs with a COLLSCAN.
+ let winningPlan = coll.explain().distinct(distinctKey, query).queryPlanner.winningPlan;
+ assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
+ // Run the distinct and confirm that it produces the expected results.
+ assertArrayEq(coll.distinct(distinctKey, query), expectedResults);
+
+ // Build a wildcard index on the collection and re-run the test.
+ const options = (pathProjection ? {wildcardProjection: pathProjection} : {});
+ assert.commandWorked(coll.createIndex({"$**": 1}, options));
+
+ // We expect the following outcomes for a 'distinct' that attempts to use a $** index:
+ // - No query: COLLSCAN.
+ // - Query for object value on distinct field: COLLSCAN.
+ // - Query for non-object value on non-multikey distinct field: DISTINCT_SCAN.
+ // - Query for non-object value on multikey distinct field: IXSCAN with FETCH.
+ // - Query for non-object value on field other than the distinct field: IXSCAN with FETCH.
+ const fetchIsExpected = (expectedScanType !== "DISTINCT_SCAN");
+
+ // Explain the query, and determine whether an indexed solution is available. If
+ // 'expectedPath' is null, then we do not expect the $** index to provide a plan.
+ winningPlan = coll.explain().distinct(distinctKey, query).queryPlanner.winningPlan;
+ if (!expectedPath) {
+ assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
+ assert.eq(expectedScanType, "COLLSCAN");
+ return;
+ }
+
+ // Confirm that the $** distinct scan produces the expected results.
+ assertArrayEq(coll.distinct(distinctKey, query), expectedResults);
+ // Confirm that the $** plan adheres to 'fetchIsExpected' and 'expectedScanType'.
+ assert.eq(planHasStage(coll.getDB(), winningPlan, "FETCH"), fetchIsExpected);
+ assert(planHasStage(coll.getDB(), winningPlan, expectedScanType));
+ assert.docEq({$_path: 1, [expectedPath]: 1},
+ getPlanStages(winningPlan, expectedScanType).shift().keyPattern);
+ }
+
+ // The set of distinct values that should be produced by each of the test listed below.
+ const distinctValues = [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}];
+
+ // Define the set of values that the distinct field may take. The first test case consists
+ // entirely of non-multikey fields, while the second includes multikey fields.
+ const testCases = [
+ // Non-multikey field values.
+ {
+ insertField: "a",
+ queryField: "a",
+ fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}]
+ },
+ // Multikey field values. Note that values within arrays are unwrapped by the distinct
+ // scan, and empty arrays are thus not included.
+ {
+ insertField: "a",
+ queryField: "a",
+ fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, [], [9, 10], [{e: 11}]]
+ },
+ // Non-multikey dotted field values.
+ {
+ insertField: "a",
+ queryField: "a.x",
+ fieldValues: [
+ {x: 1},
+ {x: 2},
+ {x: "3"},
+ {x: null},
+ {x: {c: 5, d: 6}},
+ {x: {d: 6, c: 5}},
+ {x: {}},
+ {x: 9},
+ {x: 10},
+ {x: {e: 11}}
+ ]
+ },
+ // Multikey dotted field values.
+ {
+ insertField: "a",
+ queryField: "a.x",
+ fieldValues: [
+ [{x: 1}],
+ [{x: 2}],
+ [{x: "3"}],
+ [{x: null}],
+ [{x: {c: 5, d: 6}}],
+ [{x: {d: 6, c: 5}}],
+ [{x: {}}],
+ [{x: []}],
+ [{x: 9}, {x: 10}],
+ [{x: [{e: 11}]}]
+ ]
+ }
+ ];
+
+ // Run all combinations of query, no-query, multikey and non-multikey distinct tests.
+ for (let testCase of testCases) {
+ // Log the start of the test and create the dataset.
+ jsTestLog("Test case: " + tojson(testCase));
+ insertTestData(testCase.insertField, testCase.fieldValues);
+
+ // Test that a $** index cannot provide an indexed 'distinct' without a query.
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {},
+ expectedScanType: "COLLSCAN",
+ expectedResults: distinctValues,
+ expectedPath: null
+ });
+
+ // Test that a $** index can provide an indexed 'distinct' for distinct-key queries.
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {[testCase.queryField]: {$lt: 3}},
+ expectedScanType: (distinctFieldIsMultikey ? "IXSCAN" : "DISTINCT_SCAN"),
+ expectedResults: [1, 2],
+ expectedPath: testCase.queryField
+ });
+
+ // Test that a $** index can provide an indexed 'distinct' for a query on another field.
+ const offset = Math.floor(testCase.fieldValues.length / 2);
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {b: {$gte: offset}},
+ expectedScanType: "IXSCAN",
+ expectedResults: distinctValues.slice(offset),
+ expectedPath: "b"
+ });
+
+ // Test that a $** index cannot provide an indexed 'distinct' for object value queries.
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {[testCase.queryField]: {$gte: {c: 5}}},
+ expectedScanType: "COLLSCAN",
+ expectedResults: [{c: 5, d: 6}, {d: 6, c: 5}, {e: 11}],
+ expectedPath: null
+ });
+
+ // Test that a $** index can provide an indexed 'distinct' for a MinMax query.
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {[testCase.queryField]: {$gte: MinKey, $lte: MaxKey}},
+ expectedScanType: "IXSCAN",
+ expectedResults: distinctValues,
+ expectedPath: testCase.queryField
+ });
+
+ // Test that a $** index cannot provide an indexed 'distinct' for excluded fields.
+ assertWildcardDistinctScan({
+ distinctKey: testCase.queryField,
+ query: {c: {$lt: 0}},
+ pathProjection: {c: 0},
+ expectedScanType: "COLLSCAN",
+ expectedResults: distinctValues,
+ expectedPath: null
+ });
+ }
+})();
diff --git a/src/mongo/bson/ordering.h b/src/mongo/bson/ordering.h
index d6e26e0aadc..b407709f6ca 100644
--- a/src/mongo/bson/ordering.h
+++ b/src/mongo/bson/ordering.h
@@ -45,6 +45,8 @@ class Ordering {
Ordering(unsigned b) : bits(b) {}
public:
+ static constexpr size_t kMaxCompoundIndexKeys = size_t{32};
+
Ordering(const Ordering& r) : bits(r.bits) {}
void operator=(const Ordering& r) {
bits = r.bits;
@@ -78,7 +80,7 @@ public:
BSONElement e = k.next();
if (e.eoo())
break;
- uassert(13103, "too many compound keys", n <= 31);
+ uassert(13103, "too many compound keys", n < kMaxCompoundIndexKeys);
if (e.number() < 0)
b |= (1 << n);
n++;
diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp
index 5b1d5ccc545..90b91bcc49a 100644
--- a/src/mongo/db/exec/distinct_scan.cpp
+++ b/src/mongo/db/exec/distinct_scan.cpp
@@ -46,28 +46,22 @@ using stdx::make_unique;
// static
const char* DistinctScan::kStageType = "DISTINCT_SCAN";
-DistinctScan::DistinctScan(OperationContext* opCtx,
- const DistinctParams& params,
- WorkingSet* workingSet)
+DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet)
: PlanStage(kStageType, opCtx),
+ _params(std::move(params)),
_workingSet(workingSet),
- _descriptor(params.descriptor),
- _iam(params.descriptor->getIndexCatalog()->getIndex(params.descriptor)),
- _params(params),
- _checker(&_params.bounds, _descriptor->keyPattern(), _params.direction) {
- _specificStats.keyPattern = _params.descriptor->keyPattern();
- if (BSONElement collationElement = _params.descriptor->getInfoElement("collation")) {
- invariant(collationElement.isABSONObj());
- _specificStats.collation = collationElement.Obj().getOwned();
- }
- _specificStats.indexName = _params.descriptor->indexName();
- _specificStats.indexVersion = static_cast<int>(_params.descriptor->version());
- _specificStats.isMultiKey = _params.descriptor->isMultikey(getOpCtx());
- _specificStats.multiKeyPaths = _params.descriptor->getMultikeyPaths(getOpCtx());
- _specificStats.isUnique = _params.descriptor->unique();
- _specificStats.isSparse = _params.descriptor->isSparse();
- _specificStats.isPartial = _params.descriptor->isPartial();
- _specificStats.direction = _params.direction;
+ _iam(_params.accessMethod),
+ _checker(&_params.bounds, _params.keyPattern, _params.scanDirection) {
+ _specificStats.keyPattern = _params.keyPattern;
+ _specificStats.indexName = _params.name;
+ _specificStats.indexVersion = static_cast<int>(_params.version);
+ _specificStats.isMultiKey = _params.isMultiKey;
+ _specificStats.multiKeyPaths = _params.multikeyPaths;
+ _specificStats.isUnique = _params.isUnique;
+ _specificStats.isSparse = _params.isSparse;
+ _specificStats.isPartial = _params.isPartial;
+ _specificStats.direction = _params.scanDirection;
+ _specificStats.collation = _params.collation.getOwned();
// Set up our initial seek. If there is no valid data, just mark as EOF.
_commonStats.isEOF = !_checker.getStartSeekPoint(&_seekPoint);
@@ -80,7 +74,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
boost::optional<IndexKeyEntry> kv;
try {
if (!_cursor)
- _cursor = _iam->newCursor(getOpCtx(), _params.direction == 1);
+ _cursor = _iam->newCursor(getOpCtx(), _params.scanDirection == 1);
kv = _cursor->seek(_seekPoint);
} catch (const WriteConflictException&) {
*out = WorkingSet::INVALID_ID;
@@ -119,7 +113,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
WorkingSetID id = _workingSet->allocate();
WorkingSetMember* member = _workingSet->get(id);
member->recordId = kv->loc;
- member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), kv->key, _iam));
+ member->keyData.push_back(IndexKeyDatum(_params.keyPattern, kv->key, _iam));
_workingSet->transitionToRecordIdAndIdx(id);
*out = id;
diff --git a/src/mongo/db/exec/distinct_scan.h b/src/mongo/db/exec/distinct_scan.h
index 44ae137dac6..debec3bb30e 100644
--- a/src/mongo/db/exec/distinct_scan.h
+++ b/src/mongo/db/exec/distinct_scan.h
@@ -42,16 +42,51 @@ class IndexAccessMethod;
class IndexDescriptor;
class WorkingSet;
-// TODO SERVER-36517: keyPattern, indexName and multikeyPaths info should be provided explicitly
-// here and adopted by DistinctScan, rather than being resolved via the IndexDescriptor.
struct DistinctParams {
- DistinctParams() : descriptor(NULL), direction(1), fieldNo(0) {}
+ DistinctParams(const IndexDescriptor& descriptor,
+ std::string indexName,
+ BSONObj keyPattern,
+ MultikeyPaths multikeyPaths,
+ bool multikey)
+ : accessMethod(descriptor.getIndexCatalog()->getIndex(&descriptor)),
+ name(std::move(indexName)),
+ keyPattern(std::move(keyPattern)),
+ multikeyPaths(std::move(multikeyPaths)),
+ isMultiKey(multikey),
+ isSparse(descriptor.isSparse()),
+ isUnique(descriptor.unique()),
+ isPartial(descriptor.isPartial()),
+ version(descriptor.version()),
+ collation(descriptor.infoObj()
+ .getObjectField(IndexDescriptor::kCollationFieldName)
+ .getOwned()) {
+ invariant(accessMethod);
+ }
+
+ DistinctParams(OperationContext* opCtx, const IndexDescriptor& descriptor)
+ : DistinctParams(descriptor,
+ descriptor.indexName(),
+ descriptor.keyPattern(),
+ descriptor.getMultikeyPaths(opCtx),
+ descriptor.isMultikey(opCtx)) {}
+
+ const IndexAccessMethod* accessMethod;
+ std::string name;
+
+ BSONObj keyPattern;
+
+ MultikeyPaths multikeyPaths;
+ bool isMultiKey;
+
+ bool isSparse;
+ bool isUnique;
+ bool isPartial;
- // What index are we traversing?
- const IndexDescriptor* descriptor;
+ IndexDescriptor::IndexVersion version;
- // And in what direction?
- int direction;
+ BSONObj collation;
+
+ int scanDirection{1};
// What are the bounds?
IndexBounds bounds;
@@ -61,7 +96,7 @@ struct DistinctParams {
// If we have an index {a:1, b:1} we could use it to distinct over either 'a' or 'b'.
// If we distinct over 'a' the position is 0.
// If we distinct over 'b' the position is 1.
- int fieldNo;
+ int fieldNo{0};
};
/**
@@ -75,7 +110,7 @@ struct DistinctParams {
*/
class DistinctScan final : public PlanStage {
public:
- DistinctScan(OperationContext* opCtx, const DistinctParams& params, WorkingSet* workingSet);
+ DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet);
StageState doWork(WorkingSetID* out) final;
bool isEOF() final;
@@ -95,18 +130,17 @@ public:
static const char* kStageType;
private:
+ // The parameters used to configure this DistinctScan stage.
+ DistinctParams _params;
+
// The WorkingSet we annotate with results. Not owned by us.
WorkingSet* _workingSet;
- // Index access.
- const IndexDescriptor* _descriptor; // owned by Collection -> IndexCatalog
- const IndexAccessMethod* _iam; // owned by Collection -> IndexCatalog
+ const IndexAccessMethod* _iam;
// The cursor we use to navigate the tree.
std::unique_ptr<SortedDataInterface::Cursor> _cursor;
- DistinctParams _params;
-
// _checker gives us our start key and ensures we stay in bounds.
IndexBoundsChecker _checker;
IndexSeekPoint _seekPoint;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 161b73386dc..572d7007b39 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -66,6 +66,7 @@
#include "mongo/db/query/plan_executor.h"
#include "mongo/db/query/planner_access.h"
#include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_wildcard_helpers.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_common.h"
@@ -117,6 +118,7 @@ void filterAllowedIndexEntries(const AllowedIndicesFilter& allowedIndicesFilter,
}
namespace {
+namespace wcp = ::mongo::wildcard_planning;
// The body is below in the "count hack" section but getExecutor calls it.
bool turnIxscanIntoCount(QuerySolution* soln);
} // namespace
@@ -1335,6 +1337,22 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
}
+ if (indexScanNode->index.type == IndexType::INDEX_WILDCARD) {
+ // If the query is on a field other than the distinct key, we may have generated a $** plan
+ // which does not actually contain the distinct key field.
+ if (field != std::next(indexScanNode->index.keyPattern.begin())->fieldName()) {
+ return false;
+ }
+ // If the query includes object bounds, we cannot turn this IXSCAN into a DISTINCT_SCAN.
+ // Wildcard indexes contain multiple keys per object, one for each subpath in ascending
+ // (Path, Value, RecordId) order. If the distinct fields in two successive documents are
+ // objects with the same leaf path values but in different field order, e.g. {a: 1, b: 2}
+ // and {b: 2, a: 1}, we would therefore only return the first document and skip the other.
+ if (wcp::isWildcardObjectSubpathScan(indexScanNode)) {
+ return false;
+ }
+ }
+
// An additional filter must be applied to the data in the key, so we can't just skip
// all the keys with a given value; we must examine every one to find the one that (may)
// pass the filter.
@@ -1440,16 +1458,24 @@ namespace {
QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
Collection* collection,
size_t plannerOptions,
- const std::string& distinctKey) {
+ const ParsedDistinct& parsedDistinct) {
QueryPlannerParams plannerParams;
plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions;
IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
+ auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter();
while (ii.more()) {
const IndexDescriptor* desc = ii.next();
IndexCatalogEntry* ice = ii.catalogEntry(desc);
- if (desc->keyPattern().hasField(distinctKey)) {
+ if (desc->keyPattern().hasField(parsedDistinct.getKey())) {
plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice));
+ } else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) {
+ // Check whether the $** projection captures the field over which we are distinct-ing.
+ const auto proj = WildcardKeyGenerator::createProjectionExec(desc->keyPattern(),
+ desc->pathProjection());
+ if (proj->applyProjectionToOneField(parsedDistinct.getKey())) {
+ plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice));
+ }
}
}
@@ -1485,9 +1511,8 @@ Status getExecutorForSimpleDistinct(OperationContext* opCtx,
invariant(queryOrExecutor->cq);
invariant(!queryOrExecutor->executor);
- // If there's no query, we can just distinct-scan one of the indices.
- // Not every index in plannerParams.indices may be suitable. Refer to
- // getDistinctNodeIndex().
+ // If there's no query, we can just distinct-scan one of the indices. Not every index in
+ // plannerParams.indices may be suitable. Refer to getDistinctNodeIndex().
size_t distinctNodeIndex = 0;
if (!parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() ||
!parsedDistinct->getQuery()->getQueryRequest().getSort().isEmpty() ||
@@ -1632,8 +1657,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
// We go through normal planning (with limited parameters) to see if we can produce
// a soln with the above properties.
- auto plannerParams = fillOutPlannerParamsForDistinct(
- opCtx, collection, plannerOptions, parsedDistinct->getKey());
+ auto plannerParams =
+ fillOutPlannerParamsForDistinct(opCtx, collection, plannerOptions, *parsedDistinct);
const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
@@ -1657,8 +1682,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest());
// Applying a projection allows the planner to try to give us covered plans that we can turn
- // into the projection hack. getDistinctProjection deals with .find() projection semantics
- // (ie _id:1 being implied by default).
+ // into the projection hack. The getDistinctProjection() function deals with .find() projection
+ // semantics (ie _id:1 being implied by default).
if (qr->getProj().isEmpty()) {
BSONObj projection = getDistinctProjection(parsedDistinct->getKey());
qr->setProj(projection);
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 0962ded1297..6fcf5e8ceb1 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -57,7 +57,7 @@ namespace mongo {
namespace {
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
// Helper for checking that an OIL "appears" to be ascending given one interval.
void assertOILIsAscendingLocally(const vector<Interval>& intervals, size_t idx) {
@@ -345,7 +345,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
// adjusted regardless of the predicate. Having filled out the initial bounds, we apply any
// necessary changes to the tightness here.
if (index.type == IndexType::INDEX_WILDCARD) {
- *tightnessOut = app::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut);
+ *tightnessOut = wcp::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut);
}
}
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 6cc93126dad..7bcb08f0832 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -57,7 +57,7 @@ namespace {
using namespace mongo;
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
namespace dps = ::mongo::dotted_path_support;
/**
@@ -530,79 +530,6 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
tn->indexPrefix = prefixBob.obj();
}
-void QueryPlannerAccess::finishWildcardIndexScanNode(QuerySolutionNode* node,
- const IndexEntry& plannerIndex) {
- // We should only ever reach this point if we are an IndexScanNode for a $** index.
- invariant(plannerIndex.type == IndexType::INDEX_WILDCARD);
- invariant(node && node->getType() == STAGE_IXSCAN);
-
- // Sanity check the QuerySolutionNode's copy of the IndexEntry.
- IndexScanNode* scan = static_cast<IndexScanNode*>(node);
- invariant(scan->index.multikeyPaths.size() == 1);
- invariant(scan->index == plannerIndex);
-
- // Obtain some references to make the remainder of this function more legible.
- auto& bounds = scan->bounds;
- auto& index = scan->index;
-
- // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
- // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
- // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the
- // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's
- // keyPattern and its multikeyPaths vector.
- invariant(plannerIndex.keyPattern.nFields() == 1);
- invariant(bounds.fields.size() == 1);
- invariant(!bounds.fields.front().name.empty());
- index.multikeyPaths.insert(index.multikeyPaths.begin(), std::set<std::size_t>{});
- bounds.fields.insert(bounds.fields.begin(), {"$_path"});
- index.keyPattern =
- BSON("$_path" << index.keyPattern.firstElement() << index.keyPattern.firstElement());
-
- // Create a FieldRef to perform any necessary manipulations on the query path string.
- FieldRef queryPath{plannerIndex.keyPattern.firstElementFieldName()};
- auto& multikeyPaths = index.multikeyPaths.back();
-
- // Helper function to check whether the final path component in 'queryPath' is an array index.
- const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
- return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
- queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
- };
-
- // If the bounds intersect with any objects, we must add bounds that encompass all its
- // subpaths, specifically the interval ["path.","path/") on "$_path".
- const bool requiresSubpathBounds = app::requiresSubpathBounds(bounds.fields.back());
-
- // For bounds which contain objects, we build a range interval on all subpaths of the query
- // path(s). We must therefore trim any trailing array indices from the query path before
- // generating the fieldname-or-array power set, in order to avoid overlapping the final set of
- // bounds. For instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a'
- // is multikey, and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'],
- // ['a.0.','a.0/']]. The latter two are subsets of the ['a.', 'a/'] interval.
- while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
- queryPath.removeLastPart();
- }
-
- // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
- // indices in their keys, so if this query traverses one or more multikey fields via an array
- // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
- // and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
- auto paths = app::generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
-
- // Add a $_path point-interval for each path that needs to be traversed in the index. If the
- // bounds on these paths include objects, then for each applicable path we must add a range
- // interval on all its subpaths, i.e. ["path.","path/").
- static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1);
- auto& pathIntervals = bounds.fields.front().intervals;
- for (const auto& fieldPath : paths) {
- auto path = fieldPath.dottedField().toString();
- pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path));
- if (requiresSubpathBounds) {
- pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval(
- path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly));
- }
- }
-}
-
bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) {
if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) {
return false;
@@ -675,7 +602,7 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
// If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths.
if (index.type == IndexType::INDEX_WILDCARD) {
- finishWildcardIndexScanNode(node, index);
+ wcp::finalizeWildcardIndexScanConfiguration(nodeIndex, bounds);
}
// Find the first field in the scan's bounds that was not filled out.
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index fec5ef6b793..e2097f2ab62 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -388,8 +388,6 @@ private:
static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index);
- static void finishWildcardIndexScanNode(QuerySolutionNode* node, const IndexEntry& index);
-
/**
* Add the filter 'match' to the query solution node 'node'. Takes
* ownership of 'match'.
diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp
index 4fa9142c12c..62771cbf8b6 100644
--- a/src/mongo/db/query/planner_ixselect.cpp
+++ b/src/mongo/db/query/planner_ixselect.cpp
@@ -52,82 +52,12 @@ namespace mongo {
namespace {
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
std::size_t numPathComponents(StringData path) {
return FieldRef{path}.numParts();
}
-/**
- * Given a single wildcard index, and a set of fields which are being queried, create 'mock'
- * IndexEntry for each of the appropriate fields.
- */
-void expandIndex(const IndexEntry& wildcardIndex,
- const stdx::unordered_set<std::string>& fields,
- vector<IndexEntry>* out) {
- invariant(out);
- invariant(wildcardIndex.type == INDEX_WILDCARD);
- // Should only have one field of the form {"path.$**" : 1}.
- invariant(wildcardIndex.keyPattern.nFields() == 1);
-
- // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount
- // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a
- // fixed-size vector of multikey metadata until after they are expanded.
- invariant(wildcardIndex.multikeyPaths.empty());
-
- const auto projExec = WildcardKeyGenerator::createProjectionExec(
- wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection"));
-
- const auto projectedFields = projExec->applyProjectionToFields(fields);
-
- const auto& includedPaths = projExec->getExhaustivePaths();
-
- out->reserve(out->size() + projectedFields.size());
- for (auto&& fieldName : projectedFields) {
- // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks.
- const auto queryPath = FieldRef{fieldName};
- // $** indices hold multikey metadata directly in the index keys, rather than in the index
- // catalog. In turn, the index key data is used to produce a set of multikey paths
- // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector
- // which will indicate to the downstream planning code which components of 'fieldName' are
- // multikey.
- auto multikeyPaths = app::buildMultiKeyPathsForExpandedWildcardIndexEntry(
- queryPath, wildcardIndex.multikeyPathSet);
-
- // Check whether a query on the current fieldpath is answerable by the $** index, given any
- // numerical path components that may be present in the path string.
- if (!app::validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) {
- continue;
- }
-
- // The expanded IndexEntry is only considered multikey if the particular path represented by
- // this IndexEntry has a multikey path component. For instance, suppose we have index {$**:
- // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then
- // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry
- // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be
- // marked as multikey.
- invariant(multikeyPaths.size() == 1u);
- const bool isMultikey = !multikeyPaths[0].empty();
-
- IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()),
- IndexType::INDEX_WILDCARD,
- isMultikey,
- std::move(multikeyPaths),
- // Expanded index entries always use the fixed-size multikey paths
- // representation, so we purposefully discard 'multikeyPathSet'.
- {},
- true, // sparse
- false, // unique
- {wildcardIndex.identifier.catalogName, fieldName},
- wildcardIndex.filterExpr,
- wildcardIndex.infoObj,
- wildcardIndex.collator);
-
- invariant("$_path"_sd != fieldName);
- out->push_back(std::move(entry));
- }
-}
-
bool canUseWildcardIndex(BSONElement elt, MatchExpression::MatchType matchType) {
if (elt.type() == BSONType::Object) {
// $** indices break nested objects into separate keys, which means we can't naturally
@@ -378,7 +308,7 @@ std::vector<IndexEntry> QueryPlannerIXSelect::expandIndexes(
std::vector<IndexEntry> out;
for (auto&& entry : relevantIndices) {
if (entry.type == IndexType::INDEX_WILDCARD) {
- expandIndex(entry, fields, &out);
+ wcp::expandWildcardIndexEntry(entry, fields, &out);
} else {
out.push_back(entry);
}
diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp
index d928fc47638..1c0610823be 100644
--- a/src/mongo/db/query/planner_wildcard_helpers.cpp
+++ b/src/mongo/db/query/planner_wildcard_helpers.cpp
@@ -35,27 +35,14 @@
#include <vector>
#include "mongo/bson/util/builder.h"
+#include "mongo/db/index/wildcard_key_generator.h"
+#include "mongo/db/query/index_bounds.h"
#include "mongo/util/log.h"
namespace mongo {
namespace wildcard_planning {
namespace {
/**
- * Returns an OIL that called 'name', with just the interval [{}, []).
- */
-OrderedIntervalList getAllObjectsOIL(const std::string& name) {
- OrderedIntervalList oil(name);
- BSONObjBuilder bob;
- bob.appendMinForType("", BSONType::Object);
- bob.appendMaxForType("", BSONType::Object);
- oil.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(
- bob.obj(), BoundInclusion::kExcludeBothStartAndEndKeys));
-
- return oil;
-}
-
-
-/**
* Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array
* indices present in the former if they are not present in the latter. The 'multikeyPathComponents'
* set contains the path positions that are known to be arrays; only numerical path components that
@@ -114,7 +101,9 @@ bool fieldNameOrArrayIndexPathSetContains(const std::set<FieldRef>& multikeyPath
/**
* Returns the positions of all path components in 'queryPath' that may be interpreted as array
* indices by the query system. We obtain this list by finding all multikey path components that
- * have a numerical path component immediately after.
+ * have a numerical path component immediately after. Note that the 'queryPath' argument may be a
+ * prefix of the full path used to generate 'multikeyPaths', and so we must avoid checking path
+ * components beyond the end of 'queryPath'.
*/
std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& multikeyPaths,
const FieldRef& queryPath) {
@@ -149,8 +138,11 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
}
return FieldRef{ss.str()};
}
-} // namespace
+/**
+ * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
+ * looking up multikeyness in 'multikeyPathSet'.
+ */
MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) {
FieldRef pathToLookup;
@@ -193,44 +185,25 @@ std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size
return paths;
}
-BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index,
- BoundsTightness tightnessIn,
- OrderedIntervalList* oil) {
- // This method should only ever be called for a $** IndexEntry. We expect to be called during
- // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus
- // only have a single keyPattern field and multikeyPath entry, but this is sufficient to
- // determine whether it will be necessary to adjust the tightness.
- invariant(index.type == IndexType::INDEX_WILDCARD);
- invariant(index.keyPattern.nFields() == 1);
- invariant(index.multikeyPaths.size() == 1);
- invariant(oil);
-
- auto* intervals = &oil->intervals;
-
- // If our bounds include any objects -- that is, anything in the range [{}, []) -- then we will
- // need to use "subpath bounds", and include the additional interval ["path.","path/") on
- // "$_path".
- if (requiresSubpathBounds(*oil) && !intervals->front().isMinToMax()) {
- // If we require subpath bounds (and aren't already using bounds from MinKey to MaxKey),
- // then we must use [MinKey, MaxKey] bounds for the value. For example, say our value
- // bounds are [[MinKey, undefined), (null, MaxKey]] (the bounds generated by a {$ne: null}
- // query). Our result set should include documents such as {a: {b: null}}. However, the
- // index key for this object will be {"": "a.b", "": null}. This means if we were to use
- // the original bounds, we would miss this document. We therefore expand the value bounds
- // to included everything. We must also adjust the tightness to be inexact, to avoid false
- // positives.
- *intervals = {IndexBoundsBuilder::allValues()};
- return BoundsTightness::INEXACT_FETCH;
- }
-
- // If the query passes through any array indices, we must always fetch and filter the documents.
- const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents(
- index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()});
-
- // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH.
- return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH);
-}
-
+/**
+ * Returns false if 'queryPath' includes any numerical path components which render it unanswerable
+ * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either
+ * of the following scenarios occur:
+ *
+ * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays
+ * via explicit array indices.
+ * - The query path lies along a $** projection through an array index.
+ *
+ * For an example of the latter case, say that our query path is 'a.0.b', our projection includes
+ * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or
+ * array index against the documents, but because the $** index always projects numeric path
+ * components strictly as field names, the projection {'a.0': 1} cannot correctly support this
+ * query.
+ *
+ * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but
+ * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of
+ * the values [1, 2, 3] for 'a'.
+ */
bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
const std::set<FieldRef>& includedPaths,
const FieldRef& queryPath) {
@@ -275,11 +248,228 @@ bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
return arrayIndices[0] >= includePath->numParts();
}
-bool requiresSubpathBounds(const OrderedIntervalList& oil) {
- // For intersectize() to work, the OILs' names must match.
- OrderedIntervalList allObjects = getAllObjectsOIL(oil.name);
- IndexBoundsBuilder::intersectize(oil, &allObjects);
- return !allObjects.intervals.empty();
+/**
+ * Queries whose bounds overlap the Object type bracket may require special handling, since the $**
+ * index does not index complete objects but instead only contains the leaves along each of its
+ * subpaths. Since we ban all object-value queries except those on the empty object {}, this will
+ * typically only be relevant for bounds involving MinKey and MaxKey, such as {$exists: true}.
+ */
+bool boundsOverlapObjectTypeBracket(const OrderedIntervalList& oil) {
+ // Create an Interval representing the subrange ({}, []) of the object type bracket. We exclude
+ // both ends of the bracket because $** indexes support queries on empty objects and arrays.
+ static const Interval objectTypeBracketBounds = []() {
+ BSONObjBuilder objBracketBounds;
+ objBracketBounds.appendMinForType("", BSONType::Object);
+ objBracketBounds.appendMaxForType("", BSONType::Object);
+ return IndexBoundsBuilder::makeRangeInterval(objBracketBounds.obj(),
+ BoundInclusion::kExcludeBothStartAndEndKeys);
+ }();
+
+ // Determine whether any of the ordered intervals overlap with the object type bracket. If the
+ // current interval precedes the bracket, we must check the next interval in sequence. If the
+ // interval succeeds the bracket then we can stop checking, since the ordered list is never
+ // descending. If we neither precede nor succeed the object type bracket, then we overlap it.
+ invariant(oil.computeDirection() != Interval::Direction::kDirectionDescending);
+ for (const auto& interval : oil.intervals) {
+ switch (interval.compare(objectTypeBracketBounds)) {
+ case Interval::IntervalComparison::INTERVAL_PRECEDES_COULD_UNION:
+ case Interval::IntervalComparison::INTERVAL_PRECEDES:
+ // Break out of the switch and proceed to check the next interval.
+ break;
+
+ case Interval::IntervalComparison::INTERVAL_SUCCEEDS:
+ return false;
+
+ default:
+ return true;
+ }
+ }
+ // If we're here, then all the OIL's bounds precede the object type bracket.
+ return false;
+}
+} // namespace
+
+void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
+ const stdx::unordered_set<std::string>& fields,
+ std::vector<IndexEntry>* out) {
+ invariant(out);
+ invariant(wildcardIndex.type == INDEX_WILDCARD);
+ // Should only have one field of the form {"path.$**" : 1}.
+ invariant(wildcardIndex.keyPattern.nFields() == 1);
+ invariant(wildcardIndex.keyPattern.firstElement().fieldNameStringData().endsWith("$**"));
+
+ // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount
+ // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a
+ // fixed-size vector of multikey metadata until after they are expanded.
+ invariant(wildcardIndex.multikeyPaths.empty());
+
+ const auto projExec = WildcardKeyGenerator::createProjectionExec(
+ wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection"));
+
+ const auto projectedFields = projExec->applyProjectionToFields(fields);
+
+ const auto& includedPaths = projExec->getExhaustivePaths();
+
+ out->reserve(out->size() + projectedFields.size());
+ for (auto&& fieldName : projectedFields) {
+ // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks.
+ auto queryPath = FieldRef{fieldName};
+ // $** indices hold multikey metadata directly in the index keys, rather than in the index
+ // catalog. In turn, the index key data is used to produce a set of multikey paths
+ // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector
+ // which will indicate to the downstream planning code which components of 'fieldName' are
+ // multikey.
+ auto multikeyPaths = buildMultiKeyPathsForExpandedWildcardIndexEntry(
+ queryPath, wildcardIndex.multikeyPathSet);
+
+ // Check whether a query on the current fieldpath is answerable by the $** index, given any
+ // numerical path components that may be present in the path string.
+ if (!validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) {
+ continue;
+ }
+
+ // The expanded IndexEntry is only considered multikey if the particular path represented by
+ // this IndexEntry has a multikey path component. For instance, suppose we have index {$**:
+ // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then
+ // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry
+ // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be
+ // marked as multikey.
+ invariant(multikeyPaths.size() == 1u);
+ const bool isMultikey = !multikeyPaths[0].empty();
+
+ IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()),
+ IndexType::INDEX_WILDCARD,
+ isMultikey,
+ std::move(multikeyPaths),
+ // Expanded index entries always use the fixed-size multikey paths
+ // representation, so we purposefully discard 'multikeyPathSet'.
+ {},
+ true, // sparse
+ false, // unique
+ {wildcardIndex.identifier.catalogName, fieldName},
+ wildcardIndex.filterExpr,
+ wildcardIndex.infoObj,
+ wildcardIndex.collator);
+
+ invariant("$_path"_sd != fieldName);
+ out->push_back(std::move(entry));
+ }
+}
+
+BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index,
+ BoundsTightness tightnessIn,
+ OrderedIntervalList* oil) {
+ // This method should only ever be called for a $** IndexEntry. We expect to be called during
+ // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus
+ // only have a single keyPattern field and multikeyPath entry, but this is sufficient to
+ // determine whether it will be necessary to adjust the tightness.
+ invariant(index.type == IndexType::INDEX_WILDCARD);
+ invariant(index.keyPattern.nFields() == 1);
+ invariant(index.multikeyPaths.size() == 1);
+ invariant(oil);
+
+ // If our bounds include any objects -- anything in the range ({}, []) -- then we will need to
+ // use subpath bounds; that is, we will add the interval ["path.","path/") at the point where we
+ // finalize the index scan. If the subpath interval is required but the bounds do not already
+ // run from MinKey to MaxKey, then we must expand them to [MinKey, MaxKey]. Consider the case
+ // where out bounds are [[MinKey, undefined), (null, MaxKey]] as generated by {$ne: null}. Our
+ // result set should include documents such as {a: {b: null}}; however, the wildcard index key
+ // for this object will be {"": "a.b", "": null}, which means that the original bounds would
+ // skip this document. We must also set the tightness to INEXACT_FETCH to avoid false positives.
+ if (boundsOverlapObjectTypeBracket(*oil) && !oil->intervals.front().isMinToMax()) {
+ oil->intervals = {IndexBoundsBuilder::allValues()};
+ return BoundsTightness::INEXACT_FETCH;
+ }
+
+ // If the query passes through any array indices, we must always fetch and filter the documents.
+ const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents(
+ index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()});
+
+ // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH.
+ return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH);
+}
+
+void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds) {
+ // We should only ever reach this point when processing a $** index. Sanity check the arguments.
+ invariant(index && index->type == IndexType::INDEX_WILDCARD);
+ invariant(index->keyPattern.nFields() == 1);
+ invariant(index->multikeyPaths.size() == 1);
+ invariant(bounds && bounds->fields.size() == 1);
+ invariant(bounds->fields.front().name == index->keyPattern.firstElementFieldName());
+
+ // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
+ // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
+ // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the
+ // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's
+ // keyPattern and its multikeyPaths vector.
+ index->multikeyPaths.insert(index->multikeyPaths.begin(), std::set<std::size_t>{});
+ bounds->fields.insert(bounds->fields.begin(), {"$_path"});
+ index->keyPattern =
+ BSON("$_path" << index->keyPattern.firstElement() << index->keyPattern.firstElement());
+
+ // Create a FieldRef to perform any necessary manipulations on the query path string.
+ FieldRef queryPath{std::next(index->keyPattern.begin())->fieldNameStringData()};
+ auto& multikeyPaths = index->multikeyPaths.back();
+
+ // If the bounds overlap the object type bracket, then we must retrieve all documents which
+ // include the given path. We must therefore add bounds that encompass all its subpaths,
+ // specifically the interval ["path.","path/") on "$_path".
+ const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back());
+
+ // Helper function to check whether the final path component in 'queryPath' is an array index.
+ const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
+ return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
+ queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
+ };
+
+ // If subpath bounds are needed, we build a range interval on all subpaths of the query path(s).
+ // We must therefore trim any trailing array indices from the query path before generating the
+ // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For
+ // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey,
+ // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']].
+ // The latter two are subsets of the ['a.', 'a/'] interval.
+ while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
+ queryPath.removeLastPart();
+ }
+
+ // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
+ // indices in their keys, so if this query traverses one or more multikey fields via an array
+ // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
+ // and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
+ auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
+
+ // Add a $_path point-interval for each path that needs to be traversed in the index. If subpath
+ // bounds are required, then we must add a further range interval on ["path.","path/").
+ static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1);
+ auto& pathIntervals = bounds->fields.front().intervals;
+ for (const auto& fieldPath : paths) {
+ auto path = fieldPath.dottedField().toString();
+ pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path));
+ if (requiresSubpathBounds) {
+ pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval(
+ path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly));
+ }
+ }
+ // Ensure that the bounds' intervals are correctly aligned.
+ IndexBoundsBuilder::alignBounds(bounds, index->keyPattern);
+}
+
+bool isWildcardObjectSubpathScan(const IndexScanNode* node) {
+ // If the node is not a $** index scan, return false immediately.
+ if (!node || node->index.type != IndexType::INDEX_WILDCARD) {
+ return false;
+ }
+
+ // We expect consistent arguments, representing a $** index which has already been finalized.
+ invariant(node->index.keyPattern.nFields() == 2);
+ invariant(node->index.multikeyPaths.size() == 2);
+ invariant(node->bounds.fields.size() == 2);
+ invariant(node->bounds.fields.front().name == node->index.keyPattern.firstElementFieldName());
+ invariant(node->bounds.fields.back().name ==
+ std::next(node->index.keyPattern.begin())->fieldName());
+
+ // Check the bounds on the query field for any intersections with the object type bracket.
+ return boundsOverlapObjectTypeBracket(node->bounds.fields.back());
}
} // namespace wildcard_planning
diff --git a/src/mongo/db/query/planner_wildcard_helpers.h b/src/mongo/db/query/planner_wildcard_helpers.h
index ddd0185d7c3..f05c475295f 100644
--- a/src/mongo/db/query/planner_wildcard_helpers.h
+++ b/src/mongo/db/query/planner_wildcard_helpers.h
@@ -33,6 +33,7 @@
#include "mongo/db/field_ref.h"
#include "mongo/db/index/multikey_paths.h"
#include "mongo/db/query/index_bounds_builder.h"
+#include "mongo/db/query/index_entry.h"
#include "mongo/db/query/query_solution.h"
namespace mongo {
@@ -42,27 +43,17 @@ using BoundsTightness = IndexBoundsBuilder::BoundsTightness;
/**
* Specifies the maximum depth of nested array indices through which a query may traverse before a
- * $** declines to answer it, due to the exponential complexity of the bounds required.
+ * $** index declines to answer it, due to the exponential complexity of the bounds required.
*/
static constexpr size_t kWildcardMaxArrayIndexTraversalDepth = 8u;
/**
- * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
- * looking up multikeyness in 'multikeyPathSet'.
+ * Given a single wildcard index, and a set of fields which are being queried, create a 'mock'
+ * IndexEntry for each of the query fields and add them into the provided vector.
*/
-MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
- const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet);
-
-/**
- * Given a query path and the set of known multikey path components, this function outputs the power
- * set of paths generated by considering each relevant numerical path component first as a literal
- * fieldname, and then as an array index. In other words, for each numerical path component that
- * immediately follows a multikey field, we will output one path with the numerical component and
- * one path without. If both 'a' and 'b' are multikey, then the queryPath 'a.0.b.1.c' will produce
- * ['a.0.b.1.c', 'a.0.b.c', 'a.b.1.c', 'a.b.c'].
- */
-std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
- const FieldRef& queryPath);
+void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
+ const stdx::unordered_set<std::string>& fields,
+ std::vector<IndexEntry>* out);
/**
* In certain circumstances, it is necessary to adjust the bounds and tightness generated by the
@@ -76,27 +67,21 @@ BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index
OrderedIntervalList* oil);
/**
- * Returns false if 'queryPath' includes any numerical path components which render it unanswerable
- * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either
- * of the following scenarios occur:
- *
- * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays
- * via explicit array indices.
- * - The query path lies along a $** projection through an array index.
- *
- * For an example of the latter case, say that our query path is 'a.0.b', our projection includes
- * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or
- * array index against the documents, but because the $** index always projects numeric path
- * components strictly as field names, the projection {'a.0': 1} cannot correctly support this
- * query.
- *
- * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but
- * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of
- * the values [1, 2, 3] for 'a'.
+ * During planning, the expanded $** IndexEntry's keyPattern and bounds are in the single-field
+ * format {'path': 1}. Once planning is complete, it is necessary to call this method in order to
+ * prepare the IndexEntry and bounds for execution. This function performs the following actions:
+ * - Converts the keyPattern to the {$_path: 1, "path": 1} format expected by the $** index.
+ * - Adds a new entry '$_path' to the bounds vector, and computes the necessary intervals on it.
+ * - Adds a new, empty entry to 'multikeyPaths' for '$_path'.
+ */
+void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds);
+
+/**
+ * Returns true if the given IndexScanNode is a $** scan whose bounds overlap the object type
+ * bracket. Scans whose bounds include the object bracket have certain limitations for planning
+ * purposes; for instance, they cannot provide covered results or be converted to DISTINCT_SCAN.
*/
-bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
- const std::set<FieldRef>& includedPaths,
- const FieldRef& queryPath);
+bool isWildcardObjectSubpathScan(const IndexScanNode* node);
/**
* Return true if the intervals on the 'value' field will include subobjects, and
diff --git a/src/mongo/db/query/query_planner_wildcard_index_test.cpp b/src/mongo/db/query/query_planner_wildcard_index_test.cpp
index 5bc23eb2892..025f1ce9fa5 100644
--- a/src/mongo/db/query/query_planner_wildcard_index_test.cpp
+++ b/src/mongo/db/query/query_planner_wildcard_index_test.cpp
@@ -28,10 +28,14 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/query/planner_wildcard_helpers.h"
#include "mongo/db/query/query_planner_test_fixture.h"
+#include "mongo/unittest/death_test.h"
namespace mongo {
+namespace wcp = ::mongo::wildcard_planning;
+
const std::string kIndexName = "indexName";
/**
@@ -77,6 +81,24 @@ protected:
};
//
+// General planning tests.
+//
+
+DEATH_TEST_F(QueryPlannerWildcardTest, CannotExpandPreExpandedWildcardIndexEntry, "Invariant") {
+ addIndex(BSON("$**" << 1));
+ ASSERT_EQ(params.indices.size(), 2U);
+
+ // Expand the $** index and add the expanded entry to the list of available indices.
+ std::vector<IndexEntry> expandedIndex;
+ wcp::expandWildcardIndexEntry(params.indices.back(), {"a"}, &expandedIndex);
+ ASSERT_EQ(expandedIndex.size(), 1U);
+ params.indices.push_back(expandedIndex.front());
+
+ // Now run a query. This will invariant when the planner expands the expanded index.
+ runQuery(fromjson("{a: 1}"));
+}
+
+//
// Null comparison and existence tests.
//
@@ -619,6 +641,10 @@ TEST_F(QueryPlannerWildcardTest, InBasic) {
"bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}");
}
+//
+// Array tests.
+//
+
TEST_F(QueryPlannerWildcardTest, EqualsEmptyArray) {
addWildcardIndex(BSON("$**" << 1));
runQuery(fromjson("{a: []}"));
@@ -651,6 +677,10 @@ TEST_F(QueryPlannerWildcardTest, EqualsArrayWithValue) {
assertHasOnlyCollscan();
}
+//
+// $in tests.
+//
+
TEST_F(QueryPlannerWildcardTest, InEmptyArray) {
addWildcardIndex(BSON("$**" << 1));
runQuery(fromjson("{a: {$in: [[]]}}"));
@@ -696,6 +726,10 @@ TEST_F(QueryPlannerWildcardTest, InBasicOrEquivalent) {
"bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}");
}
+//
+// Partial index tests.
+//
+
TEST_F(QueryPlannerWildcardTest, PartialIndexCanAnswerPredicateOnFilteredField) {
auto filterObj = fromjson("{a: {$gt: 0}}");
auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj);
@@ -877,6 +911,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexDoesNotSupplyCandidatePlanForTextS
"{fetch: {filter: {b: 10}, node: {text: {prefix: {a: 10}, search: 'banana'}}}}");
}
+//
+// Negation tests.
+//
+
TEST_F(QueryPlannerWildcardTest, WildcardDoesNotSupportNegationPredicate) {
// Wildcard indexes can't support negation queries because they are sparse, and {a: {$ne: 5}}
// will match documents which don't have an "a" field.
@@ -1005,6 +1043,10 @@ TEST_F(QueryPlannerTest, MultipleWildcardIndexesHintWithPartialFilter) {
assertNumSolutions(0U);
}
+//
+// Object comparison tests.
+//
+
TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectEquality) {
addIndex(BSON("$**" << 1));
@@ -1053,6 +1095,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectInequality) {
assertSolutionExists("{fetch: {node: {ixscan: {pattern: {$_path: 1, x: 1}}}}}");
}
+//
+// Unsupported values tests.
+//
+
TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportInWithUnsupportedValues) {
addIndex(BSON("$**" << 1));
@@ -1096,6 +1142,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportElemMatchObject) {
assertHasOnlyCollscan();
}
+//
+// Sorting tests.
+//
+
TEST_F(QueryPlannerWildcardTest, WildcardIndexCanProvideNonBlockingSort) {
addWildcardIndex(BSON("$**" << 1));
@@ -1767,6 +1817,4 @@ TEST_F(QueryPlannerWildcardTest, ContainedOrPushdownWorksWithWildcardIndex) {
"{$_path: [['a','a',true,true]], a:[[1,1,true,true]]}}}}}");
}
-// TODO SERVER-36517: Add testing for DISTINCT_SCAN.
-
} // namespace mongo
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 567cb958e0a..6ba95a37232 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -38,12 +38,15 @@
#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_wildcard_helpers.h"
#include "mongo/db/query/query_planner_common.h"
namespace mongo {
namespace {
+namespace wcp = ::mongo::wildcard_planning;
+
// Create an ordred interval list which represents the bounds for all BSON elements of type String,
// Object, or Array.
OrderedIntervalList buildStringBoundsOil(const std::string& keyName) {
@@ -536,6 +539,12 @@ void IndexScanNode::appendToString(mongoutils::str::stream* ss, int indent) cons
}
bool IndexScanNode::hasField(const string& field) const {
+ // A $** index whose bounds overlap the object type bracket cannot provide covering, since the
+ // index only contains the leaf nodes along each of the object's subpaths.
+ if (index.type == IndexType::INDEX_WILDCARD && wcp::isWildcardObjectSubpathScan(this)) {
+ return false;
+ }
+
// The index is multikey but does not have any path-level multikeyness information. Such indexes
// can never provide covering.
if (index.multikey && index.multikeyPaths.empty()) {
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 3eb550761c9..ffd0985a679 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -884,11 +884,13 @@ struct DistinctNode : public QuerySolutionNode {
BSONObjSet sorts;
IndexEntry index;
- int direction;
IndexBounds bounds;
+
const CollatorInterface* queryCollator;
+
// We are distinct-ing over the 'fieldNo'-th field of 'index.keyPattern'.
- int fieldNo;
+ int fieldNo{0};
+ int direction{1};
};
/**
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index d45f6592308..f368a4699f8 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -311,15 +311,22 @@ PlanStage* buildStages(OperationContext* opCtx,
return nullptr;
}
- DistinctParams params;
-
- params.descriptor = collection->getIndexCatalog()->findIndexByName(
+ auto descriptor = collection->getIndexCatalog()->findIndexByName(
opCtx, dn->index.identifier.catalogName);
- invariant(params.descriptor);
- params.direction = dn->direction;
+ invariant(descriptor);
+
+ // We use the node's internal name, keyPattern and multikey details here. For $**
+ // indexes, these may differ from the information recorded in the index's descriptor.
+ DistinctParams params{*descriptor,
+ dn->index.identifier.catalogName,
+ dn->index.keyPattern,
+ dn->index.multikeyPaths,
+ dn->index.multikey};
+
+ params.scanDirection = dn->direction;
params.bounds = dn->bounds;
params.fieldNo = dn->fieldNo;
- return new DistinctScan(opCtx, params, ws);
+ return new DistinctScan(opCtx, std::move(params), ws);
}
case STAGE_COUNT_SCAN: {
const CountScanNode* csn = static_cast<const CountScanNode*>(root);
diff --git a/src/mongo/dbtests/query_stage_distinct.cpp b/src/mongo/dbtests/query_stage_distinct.cpp
index 1fb9aad1350..d4bbd8a38b8 100644
--- a/src/mongo/dbtests/query_stage_distinct.cpp
+++ b/src/mongo/dbtests/query_stage_distinct.cpp
@@ -130,9 +130,8 @@ public:
coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes);
ASSERT_EQ(indexes.size(), 1U);
- DistinctParams params;
- params.descriptor = indexes[0];
- params.direction = 1;
+ DistinctParams params{&_opCtx, *indexes[0]};
+ params.scanDirection = 1;
// Distinct-ing over the 0-th field of the keypattern.
params.fieldNo = 0;
// We'll look at all values in the bounds.
@@ -142,7 +141,7 @@ public:
params.bounds.fields.push_back(oil);
WorkingSet ws;
- DistinctScan distinct(&_opCtx, params, &ws);
+ DistinctScan distinct(&_opCtx, std::move(params), &ws);
WorkingSetID wsid;
// Get our first result.
@@ -197,12 +196,11 @@ public:
coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes);
verify(indexes.size() == 1);
- DistinctParams params;
- params.descriptor = indexes[0];
- ASSERT_TRUE(params.descriptor->isMultikey(&_opCtx));
+ DistinctParams params{&_opCtx, *indexes[0]};
+ ASSERT_TRUE(params.isMultiKey);
- verify(params.descriptor);
- params.direction = 1;
+ verify(params.accessMethod);
+ params.scanDirection = 1;
// Distinct-ing over the 0-th field of the keypattern.
params.fieldNo = 0;
// We'll look at all values in the bounds.
@@ -212,7 +210,7 @@ public:
params.bounds.fields.push_back(oil);
WorkingSet ws;
- DistinctScan distinct(&_opCtx, params, &ws);
+ DistinctScan distinct(&_opCtx, std::move(params), &ws);
// We should see each number in the range [1, 6] exactly once.
std::set<int> seen;
@@ -266,11 +264,9 @@ public:
&_opCtx, BSON("a" << 1 << "b" << 1), false, &indices);
ASSERT_EQ(1U, indices.size());
- DistinctParams params;
- params.descriptor = indices[0];
- ASSERT_TRUE(params.descriptor);
+ DistinctParams params{&_opCtx, *indices[0]};
- params.direction = 1;
+ params.scanDirection = 1;
params.fieldNo = 1;
params.bounds.isSimpleRange = false;
@@ -283,7 +279,7 @@ public:
params.bounds.fields.push_back(bOil);
WorkingSet ws;
- DistinctScan distinct(&_opCtx, params, &ws);
+ DistinctScan distinct(&_opCtx, std::move(params), &ws);
WorkingSetID wsid;
PlanStage::StageState state;