SERVER-31698 Move wildcard_index integration tests to /core/

author: Bernard Gorman <bernard.gorman@gmail.com> 2018-10-06 11:28:49 +0100
committer: Bernard Gorman <bernard.gorman@gmail.com> 2018-10-10 16:03:27 +0100
commit: 1b51df8420ef3c57afb2d753d7188814e90eee51 (patch)
tree: cdb3247e7ea95521c5b4ac1330341a13b9713622 /jstests/core/wildcard_index_multikey.js
parent: 549616025c9d471767bc7ede342a9919cb3e6f94 (diff)
download: mongo-1b51df8420ef3c57afb2d753d7188814e90eee51.tar.gz
1 files changed, 241 insertions, 0 deletions
diff --git a/jstests/core/wildcard_index_multikey.js b/jstests/core/wildcard_index_multikey.js
new file mode 100644
index 00000000000..93e9492d6de
--- /dev/null
+++ b/jstests/core/wildcard_index_multikey.js
@@ -0,0 +1,241 @@
+/**
+ * Tests that queries using a multikey $** index, return correct results.
+ */
+(function() {
+    "use strict";
+
+    load("jstests/aggregation/extras/utils.js");  // For arrayEq.
+    load("jstests/libs/analyze_plan.js");         // For getPlanStages.
+
+    const assertArrayEq = (l, r) => assert(arrayEq(l, r), tojson(l) + " != " + tojson(r));
+
+    const coll = db.wildcard_multikey_index;
+    coll.drop();
+
+    // Template document which defines the 'schema' of the documents in the test collection.
+    const templateDoc = {a: [], b: {c: [], d: [{e: 0}]}};
+    const pathList = ["a", "b.c", "b.d.e"];
+
+    // Insert a set of documents into the collection, based on the template document and populated
+    // with an increasing sequence of values. This is to ensure that the range of values present for
+    // each field in the dataset is not entirely homogeneous.
+    for (let i = 0; i < 50; i++) {
+        (function populateDoc(doc, value) {
+            for (let key in doc) {
+                if (typeof doc[key] === "object") {
+                    if (Array.isArray(doc[key])) {
+                        if (typeof doc[key][0] === "object") {
+                            value = populateDoc(doc[key][0], value);
+                        } else {
+                            doc[key] = [++value, ++value];
+                        }
+                    } else {
+                        value = populateDoc(doc[key], value);
+                    }
+                } else {
+                    doc[key] = ++value;
+                }
+            }
+            return value;
+        })(templateDoc, i);
+        assert.commandWorked(coll.insert(templateDoc));
+    }
+
+    // Set of operations which will be applied to each field in the index in turn.
+    const operationList = [
+        {expression: {$gte: 10}},
+        {expression: {$gt: 10}},
+        {expression: {$lt: 40}},
+        {expression: {$lte: 40}},
+        {expression: {$gt: 10, $lt: 40}},
+        {expression: {$eq: 25}},
+        {expression: {$in: [5, 15, 35, 40]}},
+        {expression: {$elemMatch: {$gte: 10, $lte: 40}}},
+    ];
+
+    // Given a keyPattern and (optional) pathProjection, this function builds a $** index on the
+    // collection and then tests each of the match expression in the 'operationList' on each indexed
+    // field in turn. The 'expectedPaths' argument lists the set of paths which we expect to have
+    // been indexed based on the spec; this function will confirm that only the appropriate paths
+    // are present in the $** index.
+    function runWildcardIndexTest(keyPattern, pathProjection, expectedPaths) {
+        assert.commandWorked(coll.dropIndexes());
+        assert.commandWorked(coll.createIndex(
+            keyPattern, pathProjection ? {wildcardProjection: pathProjection} : {}));
+        assert(expectedPaths);
+        // Verify the expected behaviour for every combination of path and operator.
+        for (let op of operationList) {
+            for (let path of pathList) {
+                const query = {[path]: op.expression};
+                assertWildcardQuery(query, expectedPaths.includes(path) ? path : null);
+            }
+        }
+    }
+
+    // Runs a single wildcard query test. If 'expectedPath' is non-null, verifies that there is an
+    // indexed solution that uses the $** index with the given path string. If 'expectedPath' is
+    // null, verifies that no indexed solution was found. If 'explainStats' is non-empty, verifies
+    // that the query's explain output reflects the given stats.
+    function assertWildcardQuery(query, expectedPath, explainStats = {}) {
+        // Explain the query, and determine whether an indexed solution is available.
+        const explainOutput = coll.find(query).explain("executionStats");
+        // If we expect the current path to have been excluded based on the $** keyPattern
+        // or projection, confirm that no indexed solution was found.
+        if (!expectedPath) {
+            assert.gt(getPlanStages(explainOutput.queryPlanner.winningPlan, "COLLSCAN").length, 0);
+            return;
+        }
+        // Verify that the winning plan uses the $** index with the expected path.
+        const ixScans = getPlanStages(explainOutput.queryPlanner.winningPlan, "IXSCAN");
+        assert.eq(ixScans.length, FixtureHelpers.numberOfShardsForCollection(coll));
+        assert.docEq(ixScans[0].keyPattern, {"$_path": 1, [expectedPath]: 1});
+        // Verify that the results obtained from the $** index are identical to a COLLSCAN.
+        assertArrayEq(coll.find(query).toArray(), coll.find(query).hint({$natural: 1}).toArray());
+        // Verify that the explain output reflects the given 'explainStats'.
+        for (let stat in explainStats) {
+            assert.eq(explainStats[stat],
+                      stat.split('.').reduce((obj, i) => obj[i], explainOutput),
+                      explainOutput);
+        }
+    }
+
+    // Test a $** index that indexes the entire document.
+    runWildcardIndexTest({'$**': 1}, null, ['a', 'b.c', 'b.d.e']);
+    // Test a $** index on a single subtree.
+    runWildcardIndexTest({'a.$**': 1}, null, ['a']);
+    runWildcardIndexTest({'b.$**': 1}, null, ['b.c', 'b.d.e']);
+    runWildcardIndexTest({'b.c.$**': 1}, null, ['b.c']);
+    runWildcardIndexTest({'b.d.$**': 1}, null, ['b.d.e']);
+    // Test a $** index which includes a subset of paths.
+    runWildcardIndexTest({'$**': 1}, {a: 1}, ['a']);
+    runWildcardIndexTest({'$**': 1}, {b: 1}, ['b.c', 'b.d.e']);
+    runWildcardIndexTest({'$**': 1}, {'b.d': 1}, ['b.d.e']);
+    runWildcardIndexTest({'$**': 1}, {a: 1, 'b.d': 1}, ['a', 'b.d.e']);
+    // Test a $** index which excludes a subset of paths.
+    runWildcardIndexTest({'$**': 1}, {a: 0}, ['b.c', 'b.d.e']);
+    runWildcardIndexTest({'$**': 1}, {b: 0}, ['a']);
+    runWildcardIndexTest({'$**': 1}, {'b.c': 0}, ['a', 'b.d.e']);
+    runWildcardIndexTest({'$**': 1}, {a: 0, 'b.c': 0}, ['b.d.e']);
+
+    // Sanity check that a few queries which need to be planned specially in the multikey case
+    // return the correct results.
+    coll.drop();
+    assert.commandWorked(coll.createIndex({"$**": 1}));
+    assert.commandWorked(coll.insert({a: [-5, 15]}));
+    assert.eq(1, coll.find({a: {$gt: 0, $lt: 9}}).itcount());
+    assert.eq(1, coll.find({a: {$gt: 0, $lt: 9}}).hint({$natural: 1}).itcount());
+    assert.eq(0, coll.find({a: {$elemMatch: {$gt: 0, $lt: 9}}}).itcount());
+    assert.eq(0, coll.find({a: {$elemMatch: {$gt: 0, $lt: 9}}}).hint({$natural: 1}).itcount());
+
+    assert.commandWorked(coll.insert({b: {c: {d: [{e: {f: -5}}, {e: {f: 15}}]}}}));
+    assert.eq(1, coll.find({"b.c.d.e.f": {$gt: 0, $lt: 9}}).itcount());
+    assert.eq(1, coll.find({"b.c.d.e.f": {$gt: 0, $lt: 9}}).hint({$natural: 1}).itcount());
+    assert.eq(0, coll.find({"b.c.d": {$elemMatch: {"e.f": {$gt: 0, $lt: 9}}}}).itcount());
+    assert.eq(0,
+              coll.find({"b.c.d": {$elemMatch: {"e.f": {$gt: 0, $lt: 9}}}})
+                  .hint({$natural: 1})
+                  .itcount());
+
+    // Fieldname-or-array-index query tests.
+    assert(coll.drop());
+    assert.commandWorked(coll.createIndex({"$**": 1}));
+
+    // Insert some documents that exhibit a mix of numeric fieldnames and array indices.
+    assert.commandWorked(coll.insert({_id: 1, a: [{b: [{c: 1}]}]}));
+    assert.commandWorked(coll.insert({_id: 2, a: [{b: [{c: 0}, {c: 1}]}]}));
+    assert.commandWorked(coll.insert({_id: 3, a: {'0': [{b: {'1': {c: 1}}}, {d: 1}]}}));
+    assert.commandWorked(coll.insert({_id: 4, a: [{b: [{1: {c: 1}}]}]}));
+    assert.commandWorked(
+        coll.insert({_id: 5, a: [{b: [{'1': {c: {'2': {d: [0, 1, 2, 3, {e: 1}]}}}}]}]}));
+
+    /*
+     * Multikey Metadata Keys:
+     * {'': 1, '': 'a'}
+     * {'': 1, '': 'a.0'}
+     * {'': 1, '': 'a.b'}
+     * {'': 1, '': 'a.b.1.c.2.d'}
+     * Keys:
+     * {'': 'a.b.c', '': 1}         // _id: 1, a,b multikey
+     * {'': 'a.b.c', '': 0}         // _id: 2, a,b multikey
+     * {'': 'a.b.c', '': 1}         // _id: 2, a,b multikey
+     * {'': 'a.0.b.1.c', '': 1}     // _id: 3, '0, 1' are fieldnames, a.0 multikey
+     * {'': 'a.0.d', '': 1}         // _id: 3, '0' is fieldname, a.0 multikey
+     * {'': 'a.b.1.c', '': 1}       // _id: 4, '1' is fieldname, a,b multikey
+     * {'': 'a.b.1.c.2.d', '': 0}   // _id: 5, a,b,a.b.1.c.2.d multikey, '1' is fieldname
+     * {'': 'a.b.1.c.2.d', '': 1}   // _id: 5
+     * {'': 'a.b.1.c.2.d', '': 2}   // _id: 5
+     * {'': 'a.b.1.c.2.d', '': 3}   // _id: 5
+     * {'': 'a.b.1.c.2.d.e', '': 1} // _id: 5
+     */
+
+    // Test that a query with multiple numeric path components returns all relevant documents,
+    // whether the numeric path component refers to a fieldname or array index in each doc:
+    //
+    // _id:1 will be captured by the special fieldname-or-array-index bounds 'a.b.c', but will be
+    // filtered out by the INEXACT_FETCH since it has no array index or fieldname 'b.1'.
+    // _id:2 will match both 'a.0' and 'b.1' by array index.
+    // _id:3 will match both 'a.0' and 'b.1' by fieldname.
+    // _id:4 will match 'a.0' by array index and 'b.1' by fieldname.
+    // _id:5 is not captured by the special fieldname-or-array-index bounds.
+    //
+    // We examine the solution's 'nReturned' versus 'totalDocsExamined' to confirm this.
+    // totalDocsExamined: [_id:1, _id:2, _id:3, _id:4], nReturned: [_id:2, _id:3, _id:4]
+    assertWildcardQuery({'a.0.b.1.c': 1},
+                        'a.0.b.1.c',
+                        {'executionStats.nReturned': 3, 'executionStats.totalDocsExamined': 4});
+
+    // Test that we can query a specific field of an array whose fieldname is itself numeric.
+    assertWildcardQuery({'a.0.1.d': 1},
+                        'a.0.1.d',
+                        {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1});
+
+    // Test that we can query a primitive value at a specific array index.
+    assertWildcardQuery({'a.0.b.1.c.2.d.3': 3},
+                        'a.0.b.1.c.2.d.3',
+                        {'executionStats.nReturned': 1, 'executionStats.totalDocsExamined': 1});
+
+    // Test that a $** index can't be used for a query through more than 8 nested array indices.
+    assert.commandWorked(
+        coll.insert({_id: 6, a: [{b: [{c: [{d: [{e: [{f: [{g: [{h: [{i: [1]}]}]}]}]}]}]}]}]}));
+    // We can query up to a depth of 8 arrays via specific indices, but not through 9 or more.
+    assertWildcardQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i': 1},
+                        'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i');
+    assertWildcardQuery({'a.0.b.0.c.0.d.0.e.0.f.0.g.0.h.0.i.0': 1}, null);
+
+    // Test that fieldname-or-array-index queries do not inappropriately trim predicates; that is,
+    // all predicates on the field are added to a FETCH filter above the IXSCAN.
+    assert(coll.drop());
+    assert.commandWorked(coll.createIndex({"$**": 1}));
+
+    assert.commandWorked(coll.insert({_id: 1, a: [0, 1, 2]}));
+    assert.commandWorked(coll.insert({_id: 2, a: [1, 2, 3]}));
+    assert.commandWorked(coll.insert({_id: 3, a: [2, 3, 4], b: [5, 6, 7]}));
+    assert.commandWorked(coll.insert({_id: 4, a: [3, 4, 5], b: [6, 7, 8], c: {'0': 9}}));
+    assert.commandWorked(coll.insert({_id: 5, a: [4, 5, 6], b: [7, 8, 9], c: {'0': 10}}));
+    assert.commandWorked(coll.insert({_id: 6, a: [5, 6, 7], b: [8, 9, 10], c: {'0': 11}}));
+
+    assertWildcardQuery({"a.0": {$gt: 1, $lt: 4}}, 'a.0', {'executionStats.nReturned': 2});
+    assertWildcardQuery({"a.1": {$gte: 1, $lte: 4}}, 'a.1', {'executionStats.nReturned': 4});
+    assertWildcardQuery({"b.2": {$in: [5, 9]}}, 'b.2', {'executionStats.nReturned': 1});
+    assertWildcardQuery({"c.0": {$in: [10, 11]}}, 'c.0', {'executionStats.nReturned': 2});
+
+    // Test that the $** index doesn't trim predicates when planning across multiple nested $and/$or
+    // expressions on various fieldname-or-array-index paths.
+    const trimTestQuery = {
+        $or: [
+            {"a.0": {$gte: 0, $lt: 3}, "a.1": {$in: [2, 3, 4]}},
+            {"b.1": {$gt: 6, $lte: 9}, "c.0": {$gt: 9, $lt: 12}}
+        ]
+    };
+    const trimTestExplain = coll.find(trimTestQuery).explain("executionStats");
+    // Verify that the expected number of documents were matched, and the $** index was used.
+    // Matched documents: [_id:2, _id:3, _id:5, _id:6]
+    assert.eq(trimTestExplain.executionStats.nReturned, 4);
+    const trimTestIxScans = getPlanStages(trimTestExplain.queryPlanner.winningPlan, "IXSCAN");
+    for (let ixScan of trimTestIxScans) {
+        assert.eq(ixScan.keyPattern["$_path"], 1);
+    }
+    // Finally, confirm that a collection scan produces the same results.
+    assertArrayEq(coll.find(trimTestQuery).toArray(),
+                  coll.find(trimTestQuery).hint({$natural: 1}).toArray());
+})();
author	Bernard Gorman <bernard.gorman@gmail.com>	2018-10-06 11:28:49 +0100
committer	Bernard Gorman <bernard.gorman@gmail.com>	2018-10-10 16:03:27 +0100
commit	1b51df8420ef3c57afb2d753d7188814e90eee51 (patch)
tree	cdb3247e7ea95521c5b4ac1330341a13b9713622 /jstests/core/wildcard_index_multikey.js
parent	549616025c9d471767bc7ede342a9919cb3e6f94 (diff)
download	mongo-1b51df8420ef3c57afb2d753d7188814e90eee51.tar.gz