SERVER-36517 Allow wildcard indexes to provide DISTINCT_SCAN

author: Bernard Gorman <bernard.gorman@gmail.com> 2018-09-12 15:30:56 +0100
committer: Bernard Gorman <bernard.gorman@gmail.com> 2018-10-11 01:23:07 +0100
commit: 8dab2485c1badc2fd09b84667b4dcfc8b53fef63 (patch)
tree: 3479da286c74a91a75b1703f422aa886e59bd33d
parent: e01cdb673978f6d3a9d53bb3a6ba35b34c281b0e (diff)
download: mongo-8dab2485c1badc2fd09b84667b4dcfc8b53fef63.tar.gz
16 files changed, 663 insertions, 318 deletions
diff --git a/jstests/core/wildcard_index_distinct_scan.js b/jstests/core/wildcard_index_distinct_scan.js
new file mode 100644
index 00000000000..df831cbc5c9
--- /dev/null
+++ b/jstests/core/wildcard_index_distinct_scan.js
@@ -0,0 +1,198 @@
+/**
+ * Tests that a $** index can provide a DISTINCT_SCAN or indexed solution where appropriate.
+ */
+(function() {
+    "use strict";
+
+    load("jstests/aggregation/extras/utils.js");  // For arrayEq.
+    load("jstests/libs/analyze_plan.js");         // For planHasStage and getPlanStages.
+
+    const assertArrayEq = (l, r) => assert(arrayEq(l, r), tojson(l) + " != " + tojson(r));
+
+    const coll = db.all_paths_distinct_scan;
+    coll.drop();
+
+    // Records whether the field which we are distinct-ing over is multikey.
+    let distinctFieldIsMultikey = false;
+
+    // Insert a set of documents into the collection. The 'listOfValues' argument contains values of
+    // various types, and we insert numerous documents containing each of the values. This allows us
+    // to confirm that 'distinct' with a wildcard index (1) can return values of any type, (2) will
+    // only return the set of unique values, and (3) handles multikey values appropriately in cases
+    // where 'listOfValues' includes an array.
+    function insertTestData(fieldName, listOfValues) {
+        distinctFieldIsMultikey = listOfValues.some((val) => Array.isArray(val));
+        const bulk = coll.initializeUnorderedBulkOp();
+        coll.drop();
+        for (let i = 0; i < 200; i++) {
+            const didx = (i % listOfValues.length);
+            bulk.insert({[fieldName]: listOfValues[didx], b: didx, c: (-i)});
+        }
+        assert.commandWorked(bulk.execute());
+    }
+
+    /**
+     * Runs a single wildcard distinct scan test. If 'expectedPath' is non-null, verifies that there
+     * is an indexed solution that uses the $** index with the given path string. If 'expectedPath'
+     * is null, verifies that no indexed solution was found.
+     */
+    function assertWildcardDistinctScan(
+        {distinctKey, query, pathProjection, expectedScanType, expectedResults, expectedPath}) {
+        // Drop all indexes before running the test. This allows us to perform the distinct with a
+        // COLLSCAN at first, to confirm that the results are as expected.
+        assert.commandWorked(coll.dropIndexes());
+
+        // Confirm that the distinct runs with a COLLSCAN.
+        let winningPlan = coll.explain().distinct(distinctKey, query).queryPlanner.winningPlan;
+        assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
+        // Run the distinct and confirm that it produces the expected results.
+        assertArrayEq(coll.distinct(distinctKey, query), expectedResults);
+
+        // Build a wildcard index on the collection and re-run the test.
+        const options = (pathProjection ? {wildcardProjection: pathProjection} : {});
+        assert.commandWorked(coll.createIndex({"$**": 1}, options));
+
+        // We expect the following outcomes for a 'distinct' that attempts to use a $** index:
+        // - No query: COLLSCAN.
+        // - Query for object value on distinct field: COLLSCAN.
+        // - Query for non-object value on non-multikey distinct field: DISTINCT_SCAN.
+        // - Query for non-object value on multikey distinct field: IXSCAN with FETCH.
+        // - Query for non-object value on field other than the distinct field: IXSCAN with FETCH.
+        const fetchIsExpected = (expectedScanType !== "DISTINCT_SCAN");
+
+        // Explain the query, and determine whether an indexed solution is available. If
+        // 'expectedPath' is null, then we do not expect the $** index to provide a plan.
+        winningPlan = coll.explain().distinct(distinctKey, query).queryPlanner.winningPlan;
+        if (!expectedPath) {
+            assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
+            assert.eq(expectedScanType, "COLLSCAN");
+            return;
+        }
+
+        // Confirm that the $** distinct scan produces the expected results.
+        assertArrayEq(coll.distinct(distinctKey, query), expectedResults);
+        // Confirm that the $** plan adheres to 'fetchIsExpected' and 'expectedScanType'.
+        assert.eq(planHasStage(coll.getDB(), winningPlan, "FETCH"), fetchIsExpected);
+        assert(planHasStage(coll.getDB(), winningPlan, expectedScanType));
+        assert.docEq({$_path: 1, [expectedPath]: 1},
+                     getPlanStages(winningPlan, expectedScanType).shift().keyPattern);
+    }
+
+    // The set of distinct values that should be produced by each of the test listed below.
+    const distinctValues = [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}];
+
+    // Define the set of values that the distinct field may take. The first test case consists
+    // entirely of non-multikey fields, while the second includes multikey fields.
+    const testCases = [
+        // Non-multikey field values.
+        {
+          insertField: "a",
+          queryField: "a",
+          fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}]
+        },
+        // Multikey field values. Note that values within arrays are unwrapped by the distinct
+        // scan, and empty arrays are thus not included.
+        {
+          insertField: "a",
+          queryField: "a",
+          fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, [], [9, 10], [{e: 11}]]
+        },
+        // Non-multikey dotted field values.
+        {
+          insertField: "a",
+          queryField: "a.x",
+          fieldValues: [
+              {x: 1},
+              {x: 2},
+              {x: "3"},
+              {x: null},
+              {x: {c: 5, d: 6}},
+              {x: {d: 6, c: 5}},
+              {x: {}},
+              {x: 9},
+              {x: 10},
+              {x: {e: 11}}
+          ]
+        },
+        // Multikey dotted field values.
+        {
+          insertField: "a",
+          queryField: "a.x",
+          fieldValues: [
+              [{x: 1}],
+              [{x: 2}],
+              [{x: "3"}],
+              [{x: null}],
+              [{x: {c: 5, d: 6}}],
+              [{x: {d: 6, c: 5}}],
+              [{x: {}}],
+              [{x: []}],
+              [{x: 9}, {x: 10}],
+              [{x: [{e: 11}]}]
+          ]
+        }
+    ];
+
+    // Run all combinations of query, no-query, multikey and non-multikey distinct tests.
+    for (let testCase of testCases) {
+        // Log the start of the test and create the dataset.
+        jsTestLog("Test case: " + tojson(testCase));
+        insertTestData(testCase.insertField, testCase.fieldValues);
+
+        // Test that a $** index cannot provide an indexed 'distinct' without a query.
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {},
+            expectedScanType: "COLLSCAN",
+            expectedResults: distinctValues,
+            expectedPath: null
+        });
+
+        // Test that a $** index can provide an indexed 'distinct' for distinct-key queries.
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {[testCase.queryField]: {$lt: 3}},
+            expectedScanType: (distinctFieldIsMultikey ? "IXSCAN" : "DISTINCT_SCAN"),
+            expectedResults: [1, 2],
+            expectedPath: testCase.queryField
+        });
+
+        // Test that a $** index can provide an indexed 'distinct' for a query on another field.
+        const offset = Math.floor(testCase.fieldValues.length / 2);
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {b: {$gte: offset}},
+            expectedScanType: "IXSCAN",
+            expectedResults: distinctValues.slice(offset),
+            expectedPath: "b"
+        });
+
+        // Test that a $** index cannot provide an indexed 'distinct' for object value queries.
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {[testCase.queryField]: {$gte: {c: 5}}},
+            expectedScanType: "COLLSCAN",
+            expectedResults: [{c: 5, d: 6}, {d: 6, c: 5}, {e: 11}],
+            expectedPath: null
+        });
+
+        // Test that a $** index can provide an indexed 'distinct' for a MinMax query.
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {[testCase.queryField]: {$gte: MinKey, $lte: MaxKey}},
+            expectedScanType: "IXSCAN",
+            expectedResults: distinctValues,
+            expectedPath: testCase.queryField
+        });
+
+        // Test that a $** index cannot provide an indexed 'distinct' for excluded fields.
+        assertWildcardDistinctScan({
+            distinctKey: testCase.queryField,
+            query: {c: {$lt: 0}},
+            pathProjection: {c: 0},
+            expectedScanType: "COLLSCAN",
+            expectedResults: distinctValues,
+            expectedPath: null
+        });
+    }
+})();
diff --git a/src/mongo/bson/ordering.h b/src/mongo/bson/ordering.h
index d6e26e0aadc..b407709f6ca 100644
--- a/src/mongo/bson/ordering.h
+++ b/src/mongo/bson/ordering.h
@@ -45,6 +45,8 @@ class Ordering {
     Ordering(unsigned b) : bits(b) {}
 
 public:
+    static constexpr size_t kMaxCompoundIndexKeys = size_t{32};
+
     Ordering(const Ordering& r) : bits(r.bits) {}
     void operator=(const Ordering& r) {
         bits = r.bits;
@@ -78,7 +80,7 @@ public:
             BSONElement e = k.next();
             if (e.eoo())
                 break;
-            uassert(13103, "too many compound keys", n <= 31);
+            uassert(13103, "too many compound keys", n < kMaxCompoundIndexKeys);
             if (e.number() < 0)
                 b |= (1 << n);
             n++;
diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp
index 5b1d5ccc545..90b91bcc49a 100644
--- a/src/mongo/db/exec/distinct_scan.cpp
+++ b/src/mongo/db/exec/distinct_scan.cpp
@@ -46,28 +46,22 @@ using stdx::make_unique;
 // static
 const char* DistinctScan::kStageType = "DISTINCT_SCAN";
 
-DistinctScan::DistinctScan(OperationContext* opCtx,
-                           const DistinctParams& params,
-                           WorkingSet* workingSet)
+DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet)
     : PlanStage(kStageType, opCtx),
+      _params(std::move(params)),
       _workingSet(workingSet),
-      _descriptor(params.descriptor),
-      _iam(params.descriptor->getIndexCatalog()->getIndex(params.descriptor)),
-      _params(params),
-      _checker(&_params.bounds, _descriptor->keyPattern(), _params.direction) {
-    _specificStats.keyPattern = _params.descriptor->keyPattern();
-    if (BSONElement collationElement = _params.descriptor->getInfoElement("collation")) {
-        invariant(collationElement.isABSONObj());
-        _specificStats.collation = collationElement.Obj().getOwned();
-    }
-    _specificStats.indexName = _params.descriptor->indexName();
-    _specificStats.indexVersion = static_cast<int>(_params.descriptor->version());
-    _specificStats.isMultiKey = _params.descriptor->isMultikey(getOpCtx());
-    _specificStats.multiKeyPaths = _params.descriptor->getMultikeyPaths(getOpCtx());
-    _specificStats.isUnique = _params.descriptor->unique();
-    _specificStats.isSparse = _params.descriptor->isSparse();
-    _specificStats.isPartial = _params.descriptor->isPartial();
-    _specificStats.direction = _params.direction;
+      _iam(_params.accessMethod),
+      _checker(&_params.bounds, _params.keyPattern, _params.scanDirection) {
+    _specificStats.keyPattern = _params.keyPattern;
+    _specificStats.indexName = _params.name;
+    _specificStats.indexVersion = static_cast<int>(_params.version);
+    _specificStats.isMultiKey = _params.isMultiKey;
+    _specificStats.multiKeyPaths = _params.multikeyPaths;
+    _specificStats.isUnique = _params.isUnique;
+    _specificStats.isSparse = _params.isSparse;
+    _specificStats.isPartial = _params.isPartial;
+    _specificStats.direction = _params.scanDirection;
+    _specificStats.collation = _params.collation.getOwned();
 
     // Set up our initial seek. If there is no valid data, just mark as EOF.
     _commonStats.isEOF = !_checker.getStartSeekPoint(&_seekPoint);
@@ -80,7 +74,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
     boost::optional<IndexKeyEntry> kv;
     try {
         if (!_cursor)
-            _cursor = _iam->newCursor(getOpCtx(), _params.direction == 1);
+            _cursor = _iam->newCursor(getOpCtx(), _params.scanDirection == 1);
         kv = _cursor->seek(_seekPoint);
     } catch (const WriteConflictException&) {
         *out = WorkingSet::INVALID_ID;
@@ -119,7 +113,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
             WorkingSetID id = _workingSet->allocate();
             WorkingSetMember* member = _workingSet->get(id);
             member->recordId = kv->loc;
-            member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), kv->key, _iam));
+            member->keyData.push_back(IndexKeyDatum(_params.keyPattern, kv->key, _iam));
             _workingSet->transitionToRecordIdAndIdx(id);
 
             *out = id;
diff --git a/src/mongo/db/exec/distinct_scan.h b/src/mongo/db/exec/distinct_scan.h
index 44ae137dac6..debec3bb30e 100644
--- a/src/mongo/db/exec/distinct_scan.h
+++ b/src/mongo/db/exec/distinct_scan.h
@@ -42,16 +42,51 @@ class IndexAccessMethod;
 class IndexDescriptor;
 class WorkingSet;
 
-// TODO SERVER-36517: keyPattern, indexName and multikeyPaths info should be provided explicitly
-// here and adopted by DistinctScan, rather than being resolved via the IndexDescriptor.
 struct DistinctParams {
-    DistinctParams() : descriptor(NULL), direction(1), fieldNo(0) {}
+    DistinctParams(const IndexDescriptor& descriptor,
+                   std::string indexName,
+                   BSONObj keyPattern,
+                   MultikeyPaths multikeyPaths,
+                   bool multikey)
+        : accessMethod(descriptor.getIndexCatalog()->getIndex(&descriptor)),
+          name(std::move(indexName)),
+          keyPattern(std::move(keyPattern)),
+          multikeyPaths(std::move(multikeyPaths)),
+          isMultiKey(multikey),
+          isSparse(descriptor.isSparse()),
+          isUnique(descriptor.unique()),
+          isPartial(descriptor.isPartial()),
+          version(descriptor.version()),
+          collation(descriptor.infoObj()
+                        .getObjectField(IndexDescriptor::kCollationFieldName)
+                        .getOwned()) {
+        invariant(accessMethod);
+    }
+
+    DistinctParams(OperationContext* opCtx, const IndexDescriptor& descriptor)
+        : DistinctParams(descriptor,
+                         descriptor.indexName(),
+                         descriptor.keyPattern(),
+                         descriptor.getMultikeyPaths(opCtx),
+                         descriptor.isMultikey(opCtx)) {}
+
+    const IndexAccessMethod* accessMethod;
+    std::string name;
+
+    BSONObj keyPattern;
+
+    MultikeyPaths multikeyPaths;
+    bool isMultiKey;
+
+    bool isSparse;
+    bool isUnique;
+    bool isPartial;
 
-    // What index are we traversing?
-    const IndexDescriptor* descriptor;
+    IndexDescriptor::IndexVersion version;
 
-    // And in what direction?
-    int direction;
+    BSONObj collation;
+
+    int scanDirection{1};
 
     // What are the bounds?
     IndexBounds bounds;
@@ -61,7 +96,7 @@ struct DistinctParams {
     // If we have an index {a:1, b:1} we could use it to distinct over either 'a' or 'b'.
     // If we distinct over 'a' the position is 0.
     // If we distinct over 'b' the position is 1.
-    int fieldNo;
+    int fieldNo{0};
 };
 
 /**
@@ -75,7 +110,7 @@ struct DistinctParams {
  */
 class DistinctScan final : public PlanStage {
 public:
-    DistinctScan(OperationContext* opCtx, const DistinctParams& params, WorkingSet* workingSet);
+    DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet);
 
     StageState doWork(WorkingSetID* out) final;
     bool isEOF() final;
@@ -95,18 +130,17 @@ public:
     static const char* kStageType;
 
 private:
+    // The parameters used to configure this DistinctScan stage.
+    DistinctParams _params;
+
     // The WorkingSet we annotate with results.  Not owned by us.
     WorkingSet* _workingSet;
 
-    // Index access.
-    const IndexDescriptor* _descriptor;  // owned by Collection -> IndexCatalog
-    const IndexAccessMethod* _iam;       // owned by Collection -> IndexCatalog
+    const IndexAccessMethod* _iam;
 
     // The cursor we use to navigate the tree.
     std::unique_ptr<SortedDataInterface::Cursor> _cursor;
 
-    DistinctParams _params;
-
     // _checker gives us our start key and ensures we stay in bounds.
     IndexBoundsChecker _checker;
     IndexSeekPoint _seekPoint;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 161b73386dc..572d7007b39 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -66,6 +66,7 @@
 #include "mongo/db/query/plan_executor.h"
 #include "mongo/db/query/planner_access.h"
 #include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_wildcard_helpers.h"
 #include "mongo/db/query/query_knobs.h"
 #include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/query_planner_common.h"
@@ -117,6 +118,7 @@ void filterAllowedIndexEntries(const AllowedIndicesFilter& allowedIndicesFilter,
 }
 
 namespace {
+namespace wcp = ::mongo::wildcard_planning;
 // The body is below in the "count hack" section but getExecutor calls it.
 bool turnIxscanIntoCount(QuerySolution* soln);
 }  // namespace
@@ -1335,6 +1337,22 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
         indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
     }
 
+    if (indexScanNode->index.type == IndexType::INDEX_WILDCARD) {
+        // If the query is on a field other than the distinct key, we may have generated a $** plan
+        // which does not actually contain the distinct key field.
+        if (field != std::next(indexScanNode->index.keyPattern.begin())->fieldName()) {
+            return false;
+        }
+        // If the query includes object bounds, we cannot turn this IXSCAN into a DISTINCT_SCAN.
+        // Wildcard indexes contain multiple keys per object, one for each subpath in ascending
+        // (Path, Value, RecordId) order. If the distinct fields in two successive documents are
+        // objects with the same leaf path values but in different field order, e.g. {a: 1, b: 2}
+        // and {b: 2, a: 1}, we would therefore only return the first document and skip the other.
+        if (wcp::isWildcardObjectSubpathScan(indexScanNode)) {
+            return false;
+        }
+    }
+
     // An additional filter must be applied to the data in the key, so we can't just skip
     // all the keys with a given value; we must examine every one to find the one that (may)
     // pass the filter.
@@ -1440,16 +1458,24 @@ namespace {
 QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
                                                    Collection* collection,
                                                    size_t plannerOptions,
-                                                   const std::string& distinctKey) {
+                                                   const ParsedDistinct& parsedDistinct) {
     QueryPlannerParams plannerParams;
     plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions;
 
     IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
+    auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter();
     while (ii.more()) {
         const IndexDescriptor* desc = ii.next();
         IndexCatalogEntry* ice = ii.catalogEntry(desc);
-        if (desc->keyPattern().hasField(distinctKey)) {
+        if (desc->keyPattern().hasField(parsedDistinct.getKey())) {
             plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice));
+        } else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) {
+            // Check whether the $** projection captures the field over which we are distinct-ing.
+            const auto proj = WildcardKeyGenerator::createProjectionExec(desc->keyPattern(),
+                                                                         desc->pathProjection());
+            if (proj->applyProjectionToOneField(parsedDistinct.getKey())) {
+                plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice));
+            }
         }
     }
 
@@ -1485,9 +1511,8 @@ Status getExecutorForSimpleDistinct(OperationContext* opCtx,
     invariant(queryOrExecutor->cq);
     invariant(!queryOrExecutor->executor);
 
-    // If there's no query, we can just distinct-scan one of the indices.
-    // Not every index in plannerParams.indices may be suitable. Refer to
-    // getDistinctNodeIndex().
+    // If there's no query, we can just distinct-scan one of the indices. Not every index in
+    // plannerParams.indices may be suitable. Refer to getDistinctNodeIndex().
     size_t distinctNodeIndex = 0;
     if (!parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() ||
         !parsedDistinct->getQuery()->getQueryRequest().getSort().isEmpty() ||
@@ -1632,8 +1657,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
     // We go through normal planning (with limited parameters) to see if we can produce
     // a soln with the above properties.
 
-    auto plannerParams = fillOutPlannerParamsForDistinct(
-        opCtx, collection, plannerOptions, parsedDistinct->getKey());
+    auto plannerParams =
+        fillOutPlannerParamsForDistinct(opCtx, collection, plannerOptions, *parsedDistinct);
 
     const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
 
@@ -1657,8 +1682,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
     auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest());
 
     // Applying a projection allows the planner to try to give us covered plans that we can turn
-    // into the projection hack.  getDistinctProjection deals with .find() projection semantics
-    // (ie _id:1 being implied by default).
+    // into the projection hack. The getDistinctProjection() function deals with .find() projection
+    // semantics (ie _id:1 being implied by default).
     if (qr->getProj().isEmpty()) {
         BSONObj projection = getDistinctProjection(parsedDistinct->getKey());
         qr->setProj(projection);
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 0962ded1297..6fcf5e8ceb1 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -57,7 +57,7 @@ namespace mongo {
 
 namespace {
 
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
 
 // Helper for checking that an OIL "appears" to be ascending given one interval.
 void assertOILIsAscendingLocally(const vector<Interval>& intervals, size_t idx) {
@@ -345,7 +345,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
     // adjusted regardless of the predicate. Having filled out the initial bounds, we apply any
     // necessary changes to the tightness here.
     if (index.type == IndexType::INDEX_WILDCARD) {
-        *tightnessOut = app::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut);
+        *tightnessOut = wcp::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut);
     }
 }
 
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 6cc93126dad..7bcb08f0832 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -57,7 +57,7 @@ namespace {
 
 using namespace mongo;
 
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
 namespace dps = ::mongo::dotted_path_support;
 
 /**
@@ -530,79 +530,6 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
     tn->indexPrefix = prefixBob.obj();
 }
 
-void QueryPlannerAccess::finishWildcardIndexScanNode(QuerySolutionNode* node,
-                                                     const IndexEntry& plannerIndex) {
-    // We should only ever reach this point if we are an IndexScanNode for a $** index.
-    invariant(plannerIndex.type == IndexType::INDEX_WILDCARD);
-    invariant(node && node->getType() == STAGE_IXSCAN);
-
-    // Sanity check the QuerySolutionNode's copy of the IndexEntry.
-    IndexScanNode* scan = static_cast<IndexScanNode*>(node);
-    invariant(scan->index.multikeyPaths.size() == 1);
-    invariant(scan->index == plannerIndex);
-
-    // Obtain some references to make the remainder of this function more legible.
-    auto& bounds = scan->bounds;
-    auto& index = scan->index;
-
-    // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
-    // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
-    // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the
-    // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's
-    // keyPattern and its multikeyPaths vector.
-    invariant(plannerIndex.keyPattern.nFields() == 1);
-    invariant(bounds.fields.size() == 1);
-    invariant(!bounds.fields.front().name.empty());
-    index.multikeyPaths.insert(index.multikeyPaths.begin(), std::set<std::size_t>{});
-    bounds.fields.insert(bounds.fields.begin(), {"$_path"});
-    index.keyPattern =
-        BSON("$_path" << index.keyPattern.firstElement() << index.keyPattern.firstElement());
-
-    // Create a FieldRef to perform any necessary manipulations on the query path string.
-    FieldRef queryPath{plannerIndex.keyPattern.firstElementFieldName()};
-    auto& multikeyPaths = index.multikeyPaths.back();
-
-    // Helper function to check whether the final path component in 'queryPath' is an array index.
-    const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
-        return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
-                queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
-    };
-
-    // If the bounds intersect with any objects, we must add bounds that encompass all its
-    // subpaths, specifically the interval ["path.","path/") on "$_path".
-    const bool requiresSubpathBounds = app::requiresSubpathBounds(bounds.fields.back());
-
-    // For bounds which contain objects, we build a range interval on all subpaths of the query
-    // path(s). We must therefore trim any trailing array indices from the query path before
-    // generating the fieldname-or-array power set, in order to avoid overlapping the final set of
-    // bounds. For instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a'
-    // is multikey, and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'],
-    // ['a.0.','a.0/']]. The latter two are subsets of the ['a.', 'a/'] interval.
-    while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
-        queryPath.removeLastPart();
-    }
-
-    // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
-    // indices in their keys, so if this query traverses one or more multikey fields via an array
-    // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
-    // and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
-    auto paths = app::generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
-
-    // Add a $_path point-interval for each path that needs to be traversed in the index. If the
-    // bounds on these paths include objects, then for each applicable path we must add a range
-    // interval on all its subpaths, i.e. ["path.","path/").
-    static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1);
-    auto& pathIntervals = bounds.fields.front().intervals;
-    for (const auto& fieldPath : paths) {
-        auto path = fieldPath.dottedField().toString();
-        pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path));
-        if (requiresSubpathBounds) {
-            pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval(
-                path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly));
-        }
-    }
-}
-
 bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) {
     if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) {
         return false;
@@ -675,7 +602,7 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr
 
     // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths.
     if (index.type == IndexType::INDEX_WILDCARD) {
-        finishWildcardIndexScanNode(node, index);
+        wcp::finalizeWildcardIndexScanConfiguration(nodeIndex, bounds);
     }
 
     // Find the first field in the scan's bounds that was not filled out.
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index fec5ef6b793..e2097f2ab62 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -388,8 +388,6 @@ private:
 
     static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index);
 
-    static void finishWildcardIndexScanNode(QuerySolutionNode* node, const IndexEntry& index);
-
     /**
      * Add the filter 'match' to the query solution node 'node'. Takes
      * ownership of 'match'.
diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp
index 4fa9142c12c..62771cbf8b6 100644
--- a/src/mongo/db/query/planner_ixselect.cpp
+++ b/src/mongo/db/query/planner_ixselect.cpp
@@ -52,82 +52,12 @@ namespace mongo {
 
 namespace {
 
-namespace app = ::mongo::wildcard_planning;
+namespace wcp = ::mongo::wildcard_planning;
 
 std::size_t numPathComponents(StringData path) {
     return FieldRef{path}.numParts();
 }
 
-/**
- * Given a single wildcard index, and a set of fields which are being queried, create 'mock'
- * IndexEntry for each of the appropriate fields.
- */
-void expandIndex(const IndexEntry& wildcardIndex,
-                 const stdx::unordered_set<std::string>& fields,
-                 vector<IndexEntry>* out) {
-    invariant(out);
-    invariant(wildcardIndex.type == INDEX_WILDCARD);
-    // Should only have one field of the form {"path.$**" : 1}.
-    invariant(wildcardIndex.keyPattern.nFields() == 1);
-
-    // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount
-    // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a
-    // fixed-size vector of multikey metadata until after they are expanded.
-    invariant(wildcardIndex.multikeyPaths.empty());
-
-    const auto projExec = WildcardKeyGenerator::createProjectionExec(
-        wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection"));
-
-    const auto projectedFields = projExec->applyProjectionToFields(fields);
-
-    const auto& includedPaths = projExec->getExhaustivePaths();
-
-    out->reserve(out->size() + projectedFields.size());
-    for (auto&& fieldName : projectedFields) {
-        // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks.
-        const auto queryPath = FieldRef{fieldName};
-        // $** indices hold multikey metadata directly in the index keys, rather than in the index
-        // catalog. In turn, the index key data is used to produce a set of multikey paths
-        // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector
-        // which will indicate to the downstream planning code which components of 'fieldName' are
-        // multikey.
-        auto multikeyPaths = app::buildMultiKeyPathsForExpandedWildcardIndexEntry(
-            queryPath, wildcardIndex.multikeyPathSet);
-
-        // Check whether a query on the current fieldpath is answerable by the $** index, given any
-        // numerical path components that may be present in the path string.
-        if (!app::validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) {
-            continue;
-        }
-
-        // The expanded IndexEntry is only considered multikey if the particular path represented by
-        // this IndexEntry has a multikey path component. For instance, suppose we have index {$**:
-        // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then
-        // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry
-        // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be
-        // marked as multikey.
-        invariant(multikeyPaths.size() == 1u);
-        const bool isMultikey = !multikeyPaths[0].empty();
-
-        IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()),
-                         IndexType::INDEX_WILDCARD,
-                         isMultikey,
-                         std::move(multikeyPaths),
-                         // Expanded index entries always use the fixed-size multikey paths
-                         // representation, so we purposefully discard 'multikeyPathSet'.
-                         {},
-                         true,   // sparse
-                         false,  // unique
-                         {wildcardIndex.identifier.catalogName, fieldName},
-                         wildcardIndex.filterExpr,
-                         wildcardIndex.infoObj,
-                         wildcardIndex.collator);
-
-        invariant("$_path"_sd != fieldName);
-        out->push_back(std::move(entry));
-    }
-}
-
 bool canUseWildcardIndex(BSONElement elt, MatchExpression::MatchType matchType) {
     if (elt.type() == BSONType::Object) {
         // $** indices break nested objects into separate keys, which means we can't naturally
@@ -378,7 +308,7 @@ std::vector<IndexEntry> QueryPlannerIXSelect::expandIndexes(
     std::vector<IndexEntry> out;
     for (auto&& entry : relevantIndices) {
         if (entry.type == IndexType::INDEX_WILDCARD) {
-            expandIndex(entry, fields, &out);
+            wcp::expandWildcardIndexEntry(entry, fields, &out);
         } else {
             out.push_back(entry);
         }
diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp
index d928fc47638..1c0610823be 100644
--- a/src/mongo/db/query/planner_wildcard_helpers.cpp
+++ b/src/mongo/db/query/planner_wildcard_helpers.cpp
@@ -35,27 +35,14 @@
 #include <vector>
 
 #include "mongo/bson/util/builder.h"
+#include "mongo/db/index/wildcard_key_generator.h"
+#include "mongo/db/query/index_bounds.h"
 #include "mongo/util/log.h"
 
 namespace mongo {
 namespace wildcard_planning {
 namespace {
 /**
- * Returns an OIL that called 'name', with just the interval [{}, []).
- */
-OrderedIntervalList getAllObjectsOIL(const std::string& name) {
-    OrderedIntervalList oil(name);
-    BSONObjBuilder bob;
-    bob.appendMinForType("", BSONType::Object);
-    bob.appendMaxForType("", BSONType::Object);
-    oil.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(
-        bob.obj(), BoundInclusion::kExcludeBothStartAndEndKeys));
-
-    return oil;
-}
-
-
-/**
  * Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array
  * indices present in the former if they are not present in the latter. The 'multikeyPathComponents'
  * set contains the path positions that are known to be arrays; only numerical path components that
@@ -114,7 +101,9 @@ bool fieldNameOrArrayIndexPathSetContains(const std::set<FieldRef>& multikeyPath
 /**
  * Returns the positions of all path components in 'queryPath' that may be interpreted as array
  * indices by the query system. We obtain this list by finding all multikey path components that
- * have a numerical path component immediately after.
+ * have a numerical path component immediately after. Note that the 'queryPath' argument may be a
+ * prefix of the full path used to generate 'multikeyPaths', and so we must avoid checking path
+ * components beyond the end of 'queryPath'.
  */
 std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& multikeyPaths,
                                                  const FieldRef& queryPath) {
@@ -149,8 +138,11 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
     }
     return FieldRef{ss.str()};
 }
-}  // namespace
 
+/**
+ * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
+ * looking up multikeyness in 'multikeyPathSet'.
+ */
 MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
     const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) {
     FieldRef pathToLookup;
@@ -193,44 +185,25 @@ std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size
     return paths;
 }
 
-BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index,
-                                                         BoundsTightness tightnessIn,
-                                                         OrderedIntervalList* oil) {
-    // This method should only ever be called for a $** IndexEntry. We expect to be called during
-    // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus
-    // only have a single keyPattern field and multikeyPath entry, but this is sufficient to
-    // determine whether it will be necessary to adjust the tightness.
-    invariant(index.type == IndexType::INDEX_WILDCARD);
-    invariant(index.keyPattern.nFields() == 1);
-    invariant(index.multikeyPaths.size() == 1);
-    invariant(oil);
-
-    auto* intervals = &oil->intervals;
-
-    // If our bounds include any objects -- that is, anything in the range [{}, []) -- then we will
-    // need to use "subpath bounds", and include the additional interval ["path.","path/") on
-    // "$_path".
-    if (requiresSubpathBounds(*oil) && !intervals->front().isMinToMax()) {
-        // If we require subpath bounds (and aren't already using bounds from MinKey to MaxKey),
-        // then we must use [MinKey, MaxKey] bounds for the value.  For example, say our value
-        // bounds are [[MinKey, undefined), (null, MaxKey]] (the bounds generated by a {$ne: null}
-        // query). Our result set should include documents such as {a: {b: null}}. However, the
-        // index key for this object will be {"": "a.b", "": null}. This means if we were to use
-        // the original bounds, we would miss this document. We therefore expand the value bounds
-        // to included everything. We must also adjust the tightness to be inexact, to avoid false
-        // positives.
-        *intervals = {IndexBoundsBuilder::allValues()};
-        return BoundsTightness::INEXACT_FETCH;
-    }
-
-    // If the query passes through any array indices, we must always fetch and filter the documents.
-    const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents(
-        index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()});
-
-    // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH.
-    return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH);
-}
-
+/**
+ * Returns false if 'queryPath' includes any numerical path components which render it unanswerable
+ * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either
+ * of the following scenarios occur:
+ *
+ * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays
+ * via explicit array indices.
+ * - The query path lies along a $** projection through an array index.
+ *
+ * For an example of the latter case, say that our query path is 'a.0.b', our projection includes
+ * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or
+ * array index against the documents, but because the $** index always projects numeric path
+ * components strictly as field names, the projection {'a.0': 1} cannot correctly support this
+ * query.
+ *
+ * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but
+ * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of
+ * the values [1, 2, 3] for 'a'.
+ */
 bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
                                    const std::set<FieldRef>& includedPaths,
                                    const FieldRef& queryPath) {
@@ -275,11 +248,228 @@ bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
     return arrayIndices[0] >= includePath->numParts();
 }
 
-bool requiresSubpathBounds(const OrderedIntervalList& oil) {
-    // For intersectize() to work, the OILs' names must match.
-    OrderedIntervalList allObjects = getAllObjectsOIL(oil.name);
-    IndexBoundsBuilder::intersectize(oil, &allObjects);
-    return !allObjects.intervals.empty();
+/**
+ * Queries whose bounds overlap the Object type bracket may require special handling, since the $**
+ * index does not index complete objects but instead only contains the leaves along each of its
+ * subpaths. Since we ban all object-value queries except those on the empty object {}, this will
+ * typically only be relevant for bounds involving MinKey and MaxKey, such as {$exists: true}.
+ */
+bool boundsOverlapObjectTypeBracket(const OrderedIntervalList& oil) {
+    // Create an Interval representing the subrange ({}, []) of the object type bracket. We exclude
+    // both ends of the bracket because $** indexes support queries on empty objects and arrays.
+    static const Interval objectTypeBracketBounds = []() {
+        BSONObjBuilder objBracketBounds;
+        objBracketBounds.appendMinForType("", BSONType::Object);
+        objBracketBounds.appendMaxForType("", BSONType::Object);
+        return IndexBoundsBuilder::makeRangeInterval(objBracketBounds.obj(),
+                                                     BoundInclusion::kExcludeBothStartAndEndKeys);
+    }();
+
+    // Determine whether any of the ordered intervals overlap with the object type bracket. If the
+    // current interval precedes the bracket, we must check the next interval in sequence. If the
+    // interval succeeds the bracket then we can stop checking, since the ordered list is never
+    // descending. If we neither precede nor succeed the object type bracket, then we overlap it.
+    invariant(oil.computeDirection() != Interval::Direction::kDirectionDescending);
+    for (const auto& interval : oil.intervals) {
+        switch (interval.compare(objectTypeBracketBounds)) {
+            case Interval::IntervalComparison::INTERVAL_PRECEDES_COULD_UNION:
+            case Interval::IntervalComparison::INTERVAL_PRECEDES:
+                // Break out of the switch and proceed to check the next interval.
+                break;
+
+            case Interval::IntervalComparison::INTERVAL_SUCCEEDS:
+                return false;
+
+            default:
+                return true;
+        }
+    }
+    // If we're here, then all the OIL's bounds precede the object type bracket.
+    return false;
+}
+}  // namespace
+
+void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
+                              const stdx::unordered_set<std::string>& fields,
+                              std::vector<IndexEntry>* out) {
+    invariant(out);
+    invariant(wildcardIndex.type == INDEX_WILDCARD);
+    // Should only have one field of the form {"path.$**" : 1}.
+    invariant(wildcardIndex.keyPattern.nFields() == 1);
+    invariant(wildcardIndex.keyPattern.firstElement().fieldNameStringData().endsWith("$**"));
+
+    // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount
+    // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a
+    // fixed-size vector of multikey metadata until after they are expanded.
+    invariant(wildcardIndex.multikeyPaths.empty());
+
+    const auto projExec = WildcardKeyGenerator::createProjectionExec(
+        wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection"));
+
+    const auto projectedFields = projExec->applyProjectionToFields(fields);
+
+    const auto& includedPaths = projExec->getExhaustivePaths();
+
+    out->reserve(out->size() + projectedFields.size());
+    for (auto&& fieldName : projectedFields) {
+        // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks.
+        auto queryPath = FieldRef{fieldName};
+        // $** indices hold multikey metadata directly in the index keys, rather than in the index
+        // catalog. In turn, the index key data is used to produce a set of multikey paths
+        // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector
+        // which will indicate to the downstream planning code which components of 'fieldName' are
+        // multikey.
+        auto multikeyPaths = buildMultiKeyPathsForExpandedWildcardIndexEntry(
+            queryPath, wildcardIndex.multikeyPathSet);
+
+        // Check whether a query on the current fieldpath is answerable by the $** index, given any
+        // numerical path components that may be present in the path string.
+        if (!validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) {
+            continue;
+        }
+
+        // The expanded IndexEntry is only considered multikey if the particular path represented by
+        // this IndexEntry has a multikey path component. For instance, suppose we have index {$**:
+        // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then
+        // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry
+        // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be
+        // marked as multikey.
+        invariant(multikeyPaths.size() == 1u);
+        const bool isMultikey = !multikeyPaths[0].empty();
+
+        IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()),
+                         IndexType::INDEX_WILDCARD,
+                         isMultikey,
+                         std::move(multikeyPaths),
+                         // Expanded index entries always use the fixed-size multikey paths
+                         // representation, so we purposefully discard 'multikeyPathSet'.
+                         {},
+                         true,   // sparse
+                         false,  // unique
+                         {wildcardIndex.identifier.catalogName, fieldName},
+                         wildcardIndex.filterExpr,
+                         wildcardIndex.infoObj,
+                         wildcardIndex.collator);
+
+        invariant("$_path"_sd != fieldName);
+        out->push_back(std::move(entry));
+    }
+}
+
+BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index,
+                                                         BoundsTightness tightnessIn,
+                                                         OrderedIntervalList* oil) {
+    // This method should only ever be called for a $** IndexEntry. We expect to be called during
+    // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus
+    // only have a single keyPattern field and multikeyPath entry, but this is sufficient to
+    // determine whether it will be necessary to adjust the tightness.
+    invariant(index.type == IndexType::INDEX_WILDCARD);
+    invariant(index.keyPattern.nFields() == 1);
+    invariant(index.multikeyPaths.size() == 1);
+    invariant(oil);
+
+    // If our bounds include any objects -- anything in the range ({}, []) -- then we will need to
+    // use subpath bounds; that is, we will add the interval ["path.","path/") at the point where we
+    // finalize the index scan. If the subpath interval is required but the bounds do not already
+    // run from MinKey to MaxKey, then we must expand them to [MinKey, MaxKey]. Consider the case
+    // where out bounds are [[MinKey, undefined), (null, MaxKey]] as generated by {$ne: null}. Our
+    // result set should include documents such as {a: {b: null}}; however, the wildcard index key
+    // for this object will be {"": "a.b", "": null}, which means that the original bounds would
+    // skip this document. We must also set the tightness to INEXACT_FETCH to avoid false positives.
+    if (boundsOverlapObjectTypeBracket(*oil) && !oil->intervals.front().isMinToMax()) {
+        oil->intervals = {IndexBoundsBuilder::allValues()};
+        return BoundsTightness::INEXACT_FETCH;
+    }
+
+    // If the query passes through any array indices, we must always fetch and filter the documents.
+    const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents(
+        index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()});
+
+    // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH.
+    return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH);
+}
+
+void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds) {
+    // We should only ever reach this point when processing a $** index. Sanity check the arguments.
+    invariant(index && index->type == IndexType::INDEX_WILDCARD);
+    invariant(index->keyPattern.nFields() == 1);
+    invariant(index->multikeyPaths.size() == 1);
+    invariant(bounds && bounds->fields.size() == 1);
+    invariant(bounds->fields.front().name == index->keyPattern.firstElementFieldName());
+
+    // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in
+    // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first
+    // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the
+    // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's
+    // keyPattern and its multikeyPaths vector.
+    index->multikeyPaths.insert(index->multikeyPaths.begin(), std::set<std::size_t>{});
+    bounds->fields.insert(bounds->fields.begin(), {"$_path"});
+    index->keyPattern =
+        BSON("$_path" << index->keyPattern.firstElement() << index->keyPattern.firstElement());
+
+    // Create a FieldRef to perform any necessary manipulations on the query path string.
+    FieldRef queryPath{std::next(index->keyPattern.begin())->fieldNameStringData()};
+    auto& multikeyPaths = index->multikeyPaths.back();
+
+    // If the bounds overlap the object type bracket, then we must retrieve all documents which
+    // include the given path. We must therefore add bounds that encompass all its subpaths,
+    // specifically the interval ["path.","path/") on "$_path".
+    const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back());
+
+    // Helper function to check whether the final path component in 'queryPath' is an array index.
+    const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
+        return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
+                queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
+    };
+
+    // If subpath bounds are needed, we build a range interval on all subpaths of the query path(s).
+    // We must therefore trim any trailing array indices from the query path before generating the
+    // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For
+    // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey,
+    // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']].
+    // The latter two are subsets of the ['a.', 'a/'] interval.
+    while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
+        queryPath.removeLastPart();
+    }
+
+    // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
+    // indices in their keys, so if this query traverses one or more multikey fields via an array
+    // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
+    // and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
+    auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
+
+    // Add a $_path point-interval for each path that needs to be traversed in the index. If subpath
+    // bounds are required, then we must add a further range interval on ["path.","path/").
+    static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1);
+    auto& pathIntervals = bounds->fields.front().intervals;
+    for (const auto& fieldPath : paths) {
+        auto path = fieldPath.dottedField().toString();
+        pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path));
+        if (requiresSubpathBounds) {
+            pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval(
+                path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly));
+        }
+    }
+    // Ensure that the bounds' intervals are correctly aligned.
+    IndexBoundsBuilder::alignBounds(bounds, index->keyPattern);
+}
+
+bool isWildcardObjectSubpathScan(const IndexScanNode* node) {
+    // If the node is not a $** index scan, return false immediately.
+    if (!node || node->index.type != IndexType::INDEX_WILDCARD) {
+        return false;
+    }
+
+    // We expect consistent arguments, representing a $** index which has already been finalized.
+    invariant(node->index.keyPattern.nFields() == 2);
+    invariant(node->index.multikeyPaths.size() == 2);
+    invariant(node->bounds.fields.size() == 2);
+    invariant(node->bounds.fields.front().name == node->index.keyPattern.firstElementFieldName());
+    invariant(node->bounds.fields.back().name ==
+              std::next(node->index.keyPattern.begin())->fieldName());
+
+    // Check the bounds on the query field for any intersections with the object type bracket.
+    return boundsOverlapObjectTypeBracket(node->bounds.fields.back());
 }
 
 }  // namespace wildcard_planning
diff --git a/src/mongo/db/query/planner_wildcard_helpers.h b/src/mongo/db/query/planner_wildcard_helpers.h
index ddd0185d7c3..f05c475295f 100644
--- a/src/mongo/db/query/planner_wildcard_helpers.h
+++ b/src/mongo/db/query/planner_wildcard_helpers.h
@@ -33,6 +33,7 @@
 #include "mongo/db/field_ref.h"
 #include "mongo/db/index/multikey_paths.h"
 #include "mongo/db/query/index_bounds_builder.h"
+#include "mongo/db/query/index_entry.h"
 #include "mongo/db/query/query_solution.h"
 
 namespace mongo {
@@ -42,27 +43,17 @@ using BoundsTightness = IndexBoundsBuilder::BoundsTightness;
 
 /**
  * Specifies the maximum depth of nested array indices through which a query may traverse before a
- * $** declines to answer it, due to the exponential complexity of the bounds required.
+ * $** index declines to answer it, due to the exponential complexity of the bounds required.
  */
 static constexpr size_t kWildcardMaxArrayIndexTraversalDepth = 8u;
 
 /**
- * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by
- * looking up multikeyness in 'multikeyPathSet'.
+ * Given a single wildcard index, and a set of fields which are being queried, create a 'mock'
+ * IndexEntry for each of the query fields and add them into the provided vector.
  */
-MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
-    const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet);
-
-/**
- * Given a query path and the set of known multikey path components, this function outputs the power
- * set of paths generated by considering each relevant numerical path component first as a literal
- * fieldname, and then as an array index. In other words, for each numerical path component that
- * immediately follows a multikey field, we will output one path with the numerical component and
- * one path without. If both 'a' and 'b' are multikey, then the queryPath 'a.0.b.1.c' will produce
- * ['a.0.b.1.c', 'a.0.b.c', 'a.b.1.c', 'a.b.c'].
- */
-std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
-                                                        const FieldRef& queryPath);
+void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
+                              const stdx::unordered_set<std::string>& fields,
+                              std::vector<IndexEntry>* out);
 
 /**
  * In certain circumstances, it is necessary to adjust the bounds and tightness generated by the
@@ -76,27 +67,21 @@ BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index
                                                          OrderedIntervalList* oil);
 
 /**
- * Returns false if 'queryPath' includes any numerical path components which render it unanswerable
- * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either
- * of the following scenarios occur:
- *
- * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays
- * via explicit array indices.
- * - The query path lies along a $** projection through an array index.
- *
- * For an example of the latter case, say that our query path is 'a.0.b', our projection includes
- * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or
- * array index against the documents, but because the $** index always projects numeric path
- * components strictly as field names, the projection {'a.0': 1} cannot correctly support this
- * query.
- *
- * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but
- * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of
- * the values [1, 2, 3] for 'a'.
+ * During planning, the expanded $** IndexEntry's keyPattern and bounds are in the single-field
+ * format {'path': 1}. Once planning is complete, it is necessary to call this method in order to
+ * prepare the IndexEntry and bounds for execution. This function performs the following actions:
+ * - Converts the keyPattern to the {$_path: 1, "path": 1} format expected by the $** index.
+ * - Adds a new entry '$_path' to the bounds vector, and computes the necessary intervals on it.
+ * - Adds a new, empty entry to 'multikeyPaths' for '$_path'.
+ */
+void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds);
+
+/**
+ * Returns true if the given IndexScanNode is a $** scan whose bounds overlap the object type
+ * bracket. Scans whose bounds include the object bracket have certain limitations for planning
+ * purposes; for instance, they cannot provide covered results or be converted to DISTINCT_SCAN.
  */
-bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths,
-                                   const std::set<FieldRef>& includedPaths,
-                                   const FieldRef& queryPath);
+bool isWildcardObjectSubpathScan(const IndexScanNode* node);
 
 /**
  * Return true if the intervals on the 'value' field will include subobjects, and
diff --git a/src/mongo/db/query/query_planner_wildcard_index_test.cpp b/src/mongo/db/query/query_planner_wildcard_index_test.cpp
index 5bc23eb2892..025f1ce9fa5 100644
--- a/src/mongo/db/query/query_planner_wildcard_index_test.cpp
+++ b/src/mongo/db/query/query_planner_wildcard_index_test.cpp
@@ -28,10 +28,14 @@
 
 #include "mongo/platform/basic.h"
 
+#include "mongo/db/query/planner_wildcard_helpers.h"
 #include "mongo/db/query/query_planner_test_fixture.h"
+#include "mongo/unittest/death_test.h"
 
 namespace mongo {
 
+namespace wcp = ::mongo::wildcard_planning;
+
 const std::string kIndexName = "indexName";
 
 /**
@@ -77,6 +81,24 @@ protected:
 };
 
 //
+// General planning tests.
+//
+
+DEATH_TEST_F(QueryPlannerWildcardTest, CannotExpandPreExpandedWildcardIndexEntry, "Invariant") {
+    addIndex(BSON("$**" << 1));
+    ASSERT_EQ(params.indices.size(), 2U);
+
+    // Expand the $** index and add the expanded entry to the list of available indices.
+    std::vector<IndexEntry> expandedIndex;
+    wcp::expandWildcardIndexEntry(params.indices.back(), {"a"}, &expandedIndex);
+    ASSERT_EQ(expandedIndex.size(), 1U);
+    params.indices.push_back(expandedIndex.front());
+
+    // Now run a query. This will invariant when the planner expands the expanded index.
+    runQuery(fromjson("{a: 1}"));
+}
+
+//
 // Null comparison and existence tests.
 //
 
@@ -619,6 +641,10 @@ TEST_F(QueryPlannerWildcardTest, InBasic) {
         "bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}");
 }
 
+//
+// Array tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, EqualsEmptyArray) {
     addWildcardIndex(BSON("$**" << 1));
     runQuery(fromjson("{a: []}"));
@@ -651,6 +677,10 @@ TEST_F(QueryPlannerWildcardTest, EqualsArrayWithValue) {
     assertHasOnlyCollscan();
 }
 
+//
+// $in tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, InEmptyArray) {
     addWildcardIndex(BSON("$**" << 1));
     runQuery(fromjson("{a: {$in: [[]]}}"));
@@ -696,6 +726,10 @@ TEST_F(QueryPlannerWildcardTest, InBasicOrEquivalent) {
         "bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}");
 }
 
+//
+// Partial index tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, PartialIndexCanAnswerPredicateOnFilteredField) {
     auto filterObj = fromjson("{a: {$gt: 0}}");
     auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj);
@@ -877,6 +911,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexDoesNotSupplyCandidatePlanForTextS
         "{fetch: {filter: {b: 10}, node: {text: {prefix: {a: 10}, search: 'banana'}}}}");
 }
 
+//
+// Negation tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, WildcardDoesNotSupportNegationPredicate) {
     // Wildcard indexes can't support negation queries because they are sparse, and {a: {$ne: 5}}
     // will match documents which don't have an "a" field.
@@ -1005,6 +1043,10 @@ TEST_F(QueryPlannerTest, MultipleWildcardIndexesHintWithPartialFilter) {
     assertNumSolutions(0U);
 }
 
+//
+// Object comparison tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectEquality) {
     addIndex(BSON("$**" << 1));
 
@@ -1053,6 +1095,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectInequality) {
     assertSolutionExists("{fetch: {node: {ixscan: {pattern: {$_path: 1, x: 1}}}}}");
 }
 
+//
+// Unsupported values tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportInWithUnsupportedValues) {
     addIndex(BSON("$**" << 1));
 
@@ -1096,6 +1142,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportElemMatchObject) {
     assertHasOnlyCollscan();
 }
 
+//
+// Sorting tests.
+//
+
 TEST_F(QueryPlannerWildcardTest, WildcardIndexCanProvideNonBlockingSort) {
     addWildcardIndex(BSON("$**" << 1));
 
@@ -1767,6 +1817,4 @@ TEST_F(QueryPlannerWildcardTest, ContainedOrPushdownWorksWithWildcardIndex) {
         "{$_path: [['a','a',true,true]], a:[[1,1,true,true]]}}}}}");
 }
 
-// TODO SERVER-36517: Add testing for DISTINCT_SCAN.
-
 }  // namespace mongo
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 567cb958e0a..6ba95a37232 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -38,12 +38,15 @@
 #include "mongo/db/query/collation/collation_index_key.h"
 #include "mongo/db/query/index_bounds_builder.h"
 #include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_wildcard_helpers.h"
 #include "mongo/db/query/query_planner_common.h"
 
 namespace mongo {
 
 namespace {
 
+namespace wcp = ::mongo::wildcard_planning;
+
 // Create an ordred interval list which represents the bounds for all BSON elements of type String,
 // Object, or Array.
 OrderedIntervalList buildStringBoundsOil(const std::string& keyName) {
@@ -536,6 +539,12 @@ void IndexScanNode::appendToString(mongoutils::str::stream* ss, int indent) cons
 }
 
 bool IndexScanNode::hasField(const string& field) const {
+    // A $** index whose bounds overlap the object type bracket cannot provide covering, since the
+    // index only contains the leaf nodes along each of the object's subpaths.
+    if (index.type == IndexType::INDEX_WILDCARD && wcp::isWildcardObjectSubpathScan(this)) {
+        return false;
+    }
+
     // The index is multikey but does not have any path-level multikeyness information. Such indexes
     // can never provide covering.
     if (index.multikey && index.multikeyPaths.empty()) {
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 3eb550761c9..ffd0985a679 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -884,11 +884,13 @@ struct DistinctNode : public QuerySolutionNode {
     BSONObjSet sorts;
 
     IndexEntry index;
-    int direction;
     IndexBounds bounds;
+
     const CollatorInterface* queryCollator;
+
     // We are distinct-ing over the 'fieldNo'-th field of 'index.keyPattern'.
-    int fieldNo;
+    int fieldNo{0};
+    int direction{1};
 };
 
 /**
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index d45f6592308..f368a4699f8 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -311,15 +311,22 @@ PlanStage* buildStages(OperationContext* opCtx,
                 return nullptr;
             }
 
-            DistinctParams params;
-
-            params.descriptor = collection->getIndexCatalog()->findIndexByName(
+            auto descriptor = collection->getIndexCatalog()->findIndexByName(
                 opCtx, dn->index.identifier.catalogName);
-            invariant(params.descriptor);
-            params.direction = dn->direction;
+            invariant(descriptor);
+
+            // We use the node's internal name, keyPattern and multikey details here. For $**
+            // indexes, these may differ from the information recorded in the index's descriptor.
+            DistinctParams params{*descriptor,
+                                  dn->index.identifier.catalogName,
+                                  dn->index.keyPattern,
+                                  dn->index.multikeyPaths,
+                                  dn->index.multikey};
+
+            params.scanDirection = dn->direction;
             params.bounds = dn->bounds;
             params.fieldNo = dn->fieldNo;
-            return new DistinctScan(opCtx, params, ws);
+            return new DistinctScan(opCtx, std::move(params), ws);
         }
         case STAGE_COUNT_SCAN: {
             const CountScanNode* csn = static_cast<const CountScanNode*>(root);
diff --git a/src/mongo/dbtests/query_stage_distinct.cpp b/src/mongo/dbtests/query_stage_distinct.cpp
index 1fb9aad1350..d4bbd8a38b8 100644
--- a/src/mongo/dbtests/query_stage_distinct.cpp
+++ b/src/mongo/dbtests/query_stage_distinct.cpp
@@ -130,9 +130,8 @@ public:
         coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes);
         ASSERT_EQ(indexes.size(), 1U);
 
-        DistinctParams params;
-        params.descriptor = indexes[0];
-        params.direction = 1;
+        DistinctParams params{&_opCtx, *indexes[0]};
+        params.scanDirection = 1;
         // Distinct-ing over the 0-th field of the keypattern.
         params.fieldNo = 0;
         // We'll look at all values in the bounds.
@@ -142,7 +141,7 @@ public:
         params.bounds.fields.push_back(oil);
 
         WorkingSet ws;
-        DistinctScan distinct(&_opCtx, params, &ws);
+        DistinctScan distinct(&_opCtx, std::move(params), &ws);
 
         WorkingSetID wsid;
         // Get our first result.
@@ -197,12 +196,11 @@ public:
         coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes);
         verify(indexes.size() == 1);
 
-        DistinctParams params;
-        params.descriptor = indexes[0];
-        ASSERT_TRUE(params.descriptor->isMultikey(&_opCtx));
+        DistinctParams params{&_opCtx, *indexes[0]};
+        ASSERT_TRUE(params.isMultiKey);
 
-        verify(params.descriptor);
-        params.direction = 1;
+        verify(params.accessMethod);
+        params.scanDirection = 1;
         // Distinct-ing over the 0-th field of the keypattern.
         params.fieldNo = 0;
         // We'll look at all values in the bounds.
@@ -212,7 +210,7 @@ public:
         params.bounds.fields.push_back(oil);
 
         WorkingSet ws;
-        DistinctScan distinct(&_opCtx, params, &ws);
+        DistinctScan distinct(&_opCtx, std::move(params), &ws);
 
         // We should see each number in the range [1, 6] exactly once.
         std::set<int> seen;
@@ -266,11 +264,9 @@ public:
             &_opCtx, BSON("a" << 1 << "b" << 1), false, &indices);
         ASSERT_EQ(1U, indices.size());
 
-        DistinctParams params;
-        params.descriptor = indices[0];
-        ASSERT_TRUE(params.descriptor);
+        DistinctParams params{&_opCtx, *indices[0]};
 
-        params.direction = 1;
+        params.scanDirection = 1;
         params.fieldNo = 1;
         params.bounds.isSimpleRange = false;
 
@@ -283,7 +279,7 @@ public:
         params.bounds.fields.push_back(bOil);
 
         WorkingSet ws;
-        DistinctScan distinct(&_opCtx, params, &ws);
+        DistinctScan distinct(&_opCtx, std::move(params), &ws);
 
         WorkingSetID wsid;
         PlanStage::StageState state;
author	Bernard Gorman <bernard.gorman@gmail.com>	2018-09-12 15:30:56 +0100
committer	Bernard Gorman <bernard.gorman@gmail.com>	2018-10-11 01:23:07 +0100
commit	8dab2485c1badc2fd09b84667b4dcfc8b53fef63 (patch)
tree	3479da286c74a91a75b1703f422aa886e59bd33d
parent	e01cdb673978f6d3a9d53bb3a6ba35b34c281b0e (diff)
download	mongo-8dab2485c1badc2fd09b84667b4dcfc8b53fef63.tar.gz