summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2022-09-26 15:16:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-26 19:48:19 +0000
commit136d831a284a5aa7277839c390588ff8f20ab179 (patch)
tree1a7b6fcf32f003fc614fa2de08f65431e3875d6d /jstests
parent73fa6daf31440329b55d0971998e8c0359f08392 (diff)
downloadmongo-136d831a284a5aa7277839c390588ff8f20ab179.tar.gz
SERVER-68596 [CQF] Convert dotted $elemMatch to SargableNode
The main change is to allow paths like Traverse (ComposeM ...) to be Sargable. We add a Traverse to each conjunct as if the original path were ComposeM (Traverse ...) (Traverse ...). This is an over- approximation so we mark it perf-only and keep the original predicate. A separate but related improvement: we now make use of more precise index metadata to remove Traverse nodes. An index on a dotted path such as {'a.b': 1} may have metadata telling us that 'b' is never an array, even if 'a' is multikey. Also, slightly improve lowerPartialSchemaRequirement's ability to turn ranges back into PathArr / PathObj. This rewrite belongs in the PartialSchemaReqLowerTransport so that we recognize these intervals no matter where they occur in the BoolExpr.
Diffstat (limited to 'jstests')
-rw-r--r--jstests/cqf/elemmatch_bounds.js216
-rw-r--r--jstests/libs/optimizer_utils.js125
-rw-r--r--jstests/query_golden/expected_output/non_multikey_paths51
-rw-r--r--jstests/query_golden/non_multikey_paths.js59
4 files changed, 445 insertions, 6 deletions
diff --git a/jstests/cqf/elemmatch_bounds.js b/jstests/cqf/elemmatch_bounds.js
new file mode 100644
index 00000000000..45665f2197f
--- /dev/null
+++ b/jstests/cqf/elemmatch_bounds.js
@@ -0,0 +1,216 @@
+/**
+ * Test that $elemMatch can generate good index bounds. Especially, test different
+ * ways that $elemMatch can lead to a conjunction inside a traverse.
+ *
+ * @tags: [
+ * # Includes plan skeleton from explain in the output.
+ * requires_cqf,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/optimizer_utils.js"); // For getPlanSkeleton.
+
+const coll = db.cqf_elemmatch_bounds;
+coll.drop();
+let id = 0;
+let docs = [];
+const numDuplicates = 100;
+for (let i = 0; i < numDuplicates; ++i) {
+ docs = docs.concat([
+ // Each 'a' is an object, not an array.
+ // Each 'c' is the same as 'a.b', which is a mixture of scalars/arrays.
+ {_id: id++, a: {b: [1, 2, 3]}, c: [1, 2, 3]},
+ {_id: id++, a: {b: 1}, c: 1},
+ {_id: id++, a: {b: 2}, c: 2},
+ {_id: id++, a: {b: 3}, c: 3},
+ {_id: id++, a: {b: [1]}, c: [1]},
+ {_id: id++, a: {b: [2]}, c: [2]},
+ {_id: id++, a: {b: [3]}, c: [3]},
+ // [1, 3] is interesting because it satisfies {$gt: 1} and {$lt: 3},
+ // but no element satisfies both.
+ {_id: id++, a: {b: [1, 3]}, c: [1, 3]},
+
+ // Examples with nested arrays: an array element is immediately another array.
+ {_id: id++, c: [[1, 2, 3]]},
+ {_id: id++, c: [[1, 3]]},
+ {_id: id++, c: [[1], [2], [3]]},
+ {_id: id++, c: [[1], [3]]},
+ {_id: id++, c: [[1]]},
+ {_id: id++, c: [[2]]},
+ {_id: id++, c: [[3]]},
+ ]);
+}
+for (let i = 0; i < numDuplicates; ++i) {
+ // Generate more docs where 'Get c Traverse Traverse Eq 2' but not 'Get c Traverse PathArr'.
+ for (let j = 0; j < 10; ++j)
+ docs = docs.concat([
+ {_id: id++, c: 2},
+ ]);
+}
+for (let i = 0; i < numDuplicates; ++i) {
+ // Generate non-matching docs to discourage collection scan.
+ assert.commandWorked(coll.insert(Array.from({length: 100}, () => ({_id: id++}))));
+}
+assert.commandWorked(coll.insert(docs));
+
+assert.commandWorked(coll.createIndex({'a.b': 1}));
+assert.commandWorked(coll.createIndex({'c': 1}));
+
+// Run the pipeline and assert it uses the expected plan.
+// Return the results with the _id field excluded.
+function run({pipeline, plan: expectedPlan}) {
+ const explain = coll.explain().aggregate(pipeline);
+ const plan = getPlanSkeleton(explain.queryPlanner.winningPlan.optimizerPlan, {
+ extraKeepKeys: ['indexDefName', 'interval', 'filter'],
+ });
+ assert.eq(plan, expectedPlan, plan);
+
+ let result = coll.aggregate(pipeline).toArray();
+ for (let doc of result) {
+ delete doc._id;
+ }
+ return result;
+}
+// Assert that 'doc' occurs exactly 'num' times in 'result'.
+function assertCount(result, num, doc) {
+ let matching = result.filter(d => friendlyEqual(d, doc));
+
+ let msg = `Expected ${num} occurrences of ${tojsononeline(doc)} but got ${matching.length}.`;
+ if (matching.length > 0) {
+ msg += ' For example:\n ' + matching.slice(0, 5).map(d => tojsononeline(d)).join("\n ");
+ }
+ assert.eq(matching.length, num, msg);
+}
+
+let result;
+
+// Test multikey non-$elemMatch $eq predicate includes non-arrays.
+result = run({
+ pipeline: [{$match: {'a.b': {$eq: 2}}}],
+ plan: {
+ "nodeType": "Root",
+ "child": {
+ "nodeType": "BinaryJoin",
+ "leftChild": {"nodeType": "IndexScan", "indexDefName": "a.b_1", "interval": "[ 2, 2 ]"},
+ "rightChild": {"nodeType": "LimitSkip", "child": {"nodeType": "Seek"}}
+ }
+ },
+});
+assertCount(result, numDuplicates, {a: {b: [1, 2, 3]}, c: [1, 2, 3]});
+assertCount(result, numDuplicates, {a: {b: [2]}, c: [2]});
+assertCount(result, numDuplicates, {a: {b: 2}, c: 2});
+assert.eq(result.length, numDuplicates * 3);
+
+// Test $elemMatch only matches arrays (unlike the previous query).
+result = run({
+ pipeline: [{$match: {'a.b': {$elemMatch: {$eq: 2}}}}],
+ plan: {
+ "nodeType": "Root",
+ "child": {
+ "nodeType": "Filter",
+ "filter":
+ "fillEmpty(traverseF(getField(scan_0, \"a\"), (inputGet_6 -> let inputComposeM_1 = getField(inputGet_6, \"b\") in if fillEmpty(traverseF(inputComposeM_1, (valCmp_2 -> (valCmp_2 == 2)), false), false) then isArray(inputComposeM_1) else false), false), false)",
+ "child": {
+ "nodeType": "BinaryJoin",
+ "leftChild":
+ {"nodeType": "IndexScan", "indexDefName": "a.b_1", "interval": "[ 2, 2 ]"},
+ "rightChild": {"nodeType": "LimitSkip", "child": {"nodeType": "Seek"}}
+ }
+ }
+ },
+});
+assertCount(result, numDuplicates, {a: {b: [1, 2, 3]}, c: [1, 2, 3]});
+assertCount(result, numDuplicates, {a: {b: [2]}, c: [2]});
+assertCount(result, 0, {a: {b: 2}, c: 2}); // Expect zero non-arrays.
+assert.eq(result.length, numDuplicates * 2);
+
+// Test conjunction inside a top-level elemMatch.
+result = run({
+ pipeline: [{$match: {c: {$elemMatch: {$gt: 1, $lt: 3}}}}],
+ plan: {
+ "nodeType": "Root",
+ "child": {
+ "nodeType": "BinaryJoin",
+ "leftChild": {
+ "nodeType": "Unique",
+ "child": {"nodeType": "IndexScan", "indexDefName": "c_1", "interval": "( 1, 3 )"}
+ },
+ "rightChild": {
+ "nodeType": "Filter",
+ "filter": "fillEmpty(isArray(evalTemp_4), false)",
+ "child": {"nodeType": "LimitSkip", "child": {"nodeType": "Seek"}}
+ }
+ }
+ },
+});
+assertCount(result, numDuplicates, {a: {b: [1, 2, 3]}, c: [1, 2, 3]});
+assertCount(result, numDuplicates, {a: {b: [2]}, c: [2]});
+assert.eq(result.length, numDuplicates * 2);
+
+// Test conjunction inside a dotted elemMatch.
+result = run({
+ pipeline: [{$match: {'a.b': {$elemMatch: {$gt: 1, $lt: 3}}}}],
+ plan: {
+ "nodeType": "Root",
+ "child": {
+ "nodeType": "Filter",
+ "filter":
+ "fillEmpty(traverseF(getField(scan_0, \"a\"), (inputGet_6 -> let inputComposeM_8 = getField(inputGet_6, \"b\") in if fillEmpty(traverseF(inputComposeM_8, (inputComposeM_7 -> if fillEmpty(if fillEmpty(((inputComposeM_7 <=> 3) < NumberLong(0)), false) then ((inputComposeM_7 <=> NaN) >= NumberLong(0)) else false, false) then if fillEmpty(((inputComposeM_7 <=> 1) > NumberLong(0)), false) then ((inputComposeM_7 <=> \"\") < NumberLong(0)) else false else false), false), false) then isArray(inputComposeM_8) else false), false), false)",
+ "child": {
+ "nodeType": "BinaryJoin",
+ "leftChild": {
+ "nodeType": "Unique",
+ "child":
+ {"nodeType": "IndexScan", "indexDefName": "a.b_1", "interval": "( 1, 3 )"}
+ },
+ "rightChild": {"nodeType": "LimitSkip", "child": {"nodeType": "Seek"}}
+ }
+ }
+ },
+});
+assertCount(result, numDuplicates, {a: {b: [1, 2, 3]}, c: [1, 2, 3]});
+assertCount(result, numDuplicates, {a: {b: [2]}, c: [2]});
+assert.eq(result.length, numDuplicates * 2);
+
+// Nested $elemMatch matches nested arrays, but the bounds only handle PathArr.
+// Multikey indexes don't recursively unwind arrays, so the scalars inside the nested array don't
+// get separate index entries, so we can't generate a tight interval like [2, 2].
+result = run({
+ pipeline: [{$match: {c: {$elemMatch: {$elemMatch: {$eq: 2}}}}}],
+ plan: {
+ "nodeType": "Root",
+ "child": {
+ "nodeType": "Filter",
+ "filter":
+ "fillEmpty(traverseF(getField(scan_0, \"c\"), (inputComposeM_1 -> if fillEmpty(traverseF(inputComposeM_1, (valCmp_3 -> (valCmp_3 == 2)), false), false) then isArray(inputComposeM_1) else false), false), false)",
+ "child": {
+ "nodeType": "BinaryJoin",
+ "leftChild": {
+ "nodeType": "Unique",
+ "child": {
+ "nodeType": "Filter",
+ "filter":
+ "fillEmpty(traverseF(evalTemp_6, (valCmp_2 -> (valCmp_2 == 2)), false), false)",
+ "child": {
+ "nodeType": "IndexScan",
+ "indexDefName": "c_1",
+ "interval": "[ [ ], BinData(0,\"\") )"
+ }
+ }
+ },
+ "rightChild": {
+ "nodeType": "Filter",
+ "filter": "fillEmpty(isArray(evalTemp_2), false)",
+ "child": {"nodeType": "LimitSkip", "child": {"nodeType": "Seek"}}
+ }
+ }
+ }
+ },
+});
+assertCount(result, numDuplicates, {c: [[1, 2, 3]]});
+assertCount(result, numDuplicates, {c: [[1], [2], [3]]});
+assertCount(result, numDuplicates, {c: [[2]]});
+assert.eq(result.length, numDuplicates * 3);
+})(); \ No newline at end of file
diff --git a/jstests/libs/optimizer_utils.js b/jstests/libs/optimizer_utils.js
index 22483624ad9..bd66520a875 100644
--- a/jstests/libs/optimizer_utils.js
+++ b/jstests/libs/optimizer_utils.js
@@ -53,7 +53,9 @@ function leftmostLeafStage(node) {
/**
* Get a very simplified version of a plan, which only includes nodeType and nesting structure.
*/
-function getPlanSkeleton(node, recursiveKeepKeys = [], addToKeepKeys = []) {
+function getPlanSkeleton(node, options = {}) {
+ const {extraKeepKeys = [], keepKeysDeep = []} = options;
+
const keepKeys = [
'nodeType',
@@ -64,26 +66,137 @@ function getPlanSkeleton(node, recursiveKeepKeys = [], addToKeepKeys = []) {
'children',
'leftChild',
'rightChild',
- ].concat(addToKeepKeys);
+ ].concat(extraKeepKeys);
if (Array.isArray(node)) {
- return node.map(n => getPlanSkeleton(n));
+ return node.map(n => getPlanSkeleton(n, options));
} else if (node === null || typeof node !== 'object') {
return node;
} else {
return Object.fromEntries(
Object.keys(node)
- .filter(key => (keepKeys.includes(key) || recursiveKeepKeys.includes(key)))
+ .filter(key => (keepKeys.includes(key) || keepKeysDeep.includes(key)))
.map(key => {
- if (recursiveKeepKeys.includes(key)) {
+ if (keepKeysDeep.includes(key)) {
return [key, node[key]];
+ } else if (key === 'interval') {
+ return [key, prettyInterval(node[key])];
+ } else if (key === 'filter') {
+ return [key, prettyExpression(node[key])];
} else {
- return [key, getPlanSkeleton(node[key], recursiveKeepKeys, addToKeepKeys)];
+ return [key, getPlanSkeleton(node[key], options)];
}
}));
}
}
+function prettyInterval(compoundInterval) {
+ // Takes an array of intervals, each one applying to one component of a compound index key.
+ // Try to format it as a string.
+ // If either bound is not Constant, return the original JSON unchanged.
+ let result = '';
+ for (const {lowBound, highBound} of compoundInterval) {
+ if (lowBound.bound.nodeType !== 'Const' || highBound.bound.nodeType !== 'Const') {
+ return compoundInterval;
+ }
+
+ const lowInclusive = lowBound.inclusive;
+ const highInclusive = highBound.inclusive;
+ assert.eq(typeof lowInclusive, 'boolean');
+ assert.eq(typeof highInclusive, 'boolean');
+
+ result += ' ';
+ result += lowInclusive ? '[ ' : '( ';
+ result += tojson(lowBound.bound.value);
+ result += ', ';
+ result += tojson(highBound.bound.value);
+ result += highInclusive ? ' ]' : ' )';
+ }
+ return result.trim();
+}
+
+function prettyExpression(expr) {
+ switch (expr.nodeType) {
+ case 'Variable':
+ return expr.name;
+ case 'Const':
+ return tojson(expr.value);
+ case 'FunctionCall':
+ return `${expr.name}(${expr.arguments.map(a => prettyExpression(a)).join(', ')})`;
+ case 'If': {
+ const if_ = prettyExpression(expr.condition);
+ const then_ = prettyExpression(expr.then);
+ const else_ = prettyExpression(expr.else);
+ return `if ${if_} then ${then_} else ${else_}`;
+ }
+ case 'Let': {
+ const x = expr.variable;
+ const b = prettyExpression(expr.bind);
+ const e = prettyExpression(expr.expression);
+ return `let ${x} = ${b} in ${e}`;
+ }
+ case 'LambdaAbstraction': {
+ return `(${expr.variable} -> ${prettyExpression(expr.input)})`;
+ }
+ case 'BinaryOp': {
+ const left = prettyExpression(expr.left);
+ const right = prettyExpression(expr.right);
+ const op = prettyOp(expr.op);
+ return `(${left} ${op} ${right})`;
+ }
+ default:
+ return tojson(expr);
+ }
+}
+
+function prettyOp(op) {
+ // See src/mongo/db/query/optimizer/syntax/syntax.h, PATHSYNTAX_OPNAMES.
+ switch (op) {
+ /* comparison operations */
+ case 'Eq':
+ return '==';
+ case 'EqMember':
+ return 'in';
+ case 'Neq':
+ return '!=';
+ case 'Gt':
+ return '>';
+ case 'Gte':
+ return '>=';
+ case 'Lt':
+ return '<';
+ case 'Lte':
+ return '<=';
+ case 'Cmp3w':
+ return '<=>';
+
+ /* binary operations */
+ case 'Add':
+ return '+';
+ case 'Sub':
+ return '-';
+ case 'Mult':
+ return '*';
+ case 'Div':
+ return '/';
+
+ /* unary operations */
+ case 'Neg':
+ return '-';
+
+ /* logical operations */
+ case 'And':
+ return 'and';
+ case 'Or':
+ return 'or';
+ case 'Not':
+ return 'not';
+
+ default:
+ return op;
+ }
+}
+
function navigateToPath(doc, path) {
let result;
let field;
diff --git a/jstests/query_golden/expected_output/non_multikey_paths b/jstests/query_golden/expected_output/non_multikey_paths
new file mode 100644
index 00000000000..2d6fa83adb8
--- /dev/null
+++ b/jstests/query_golden/expected_output/non_multikey_paths
@@ -0,0 +1,51 @@
+
+
+[jsTest] ----
+[jsTest] Query: [ { "$match" : { "one.one.one.one" : 2 } } ]
+[jsTest] ----
+
+Leaf stage: {
+ "nodeType" : "IndexScan",
+ "indexDefName" : "one.one.one.one_1",
+ "interval" : "[ 2, 2 ]"
+}
+
+[jsTest] ----
+[jsTest] Query: [ { "$match" : { "one.one.one.many" : 2 } } ]
+[jsTest] ----
+
+Leaf stage: {
+ "nodeType" : "IndexScan",
+ "indexDefName" : "one.one.one.many_1",
+ "interval" : "[ 2, 2 ]"
+}
+
+[jsTest] ----
+[jsTest] Query: [ { "$match" : { "many.one.one.one" : 2 } } ]
+[jsTest] ----
+
+Leaf stage: {
+ "nodeType" : "IndexScan",
+ "indexDefName" : "many.one.one.one_1",
+ "interval" : "[ 2, 2 ]"
+}
+
+[jsTest] ----
+[jsTest] Query: [ { "$match" : { "many.one.one.many" : 2 } } ]
+[jsTest] ----
+
+Leaf stage: {
+ "nodeType" : "IndexScan",
+ "indexDefName" : "many.one.one.many_1",
+ "interval" : "[ 2, 2 ]"
+}
+
+[jsTest] ----
+[jsTest] Query: [ { "$match" : { "many.many.many.many" : 2 } } ]
+[jsTest] ----
+
+Leaf stage: {
+ "nodeType" : "IndexScan",
+ "indexDefName" : "many.many.many.many_1",
+ "interval" : "[ 2, 2 ]"
+} \ No newline at end of file
diff --git a/jstests/query_golden/non_multikey_paths.js b/jstests/query_golden/non_multikey_paths.js
new file mode 100644
index 00000000000..f0abdf78249
--- /dev/null
+++ b/jstests/query_golden/non_multikey_paths.js
@@ -0,0 +1,59 @@
+/**
+ * Test indexes with long paths, where some components are multikey and some are not.
+ * Make sure queries can use these indexes, with good bounds.
+ *
+ * @tags: [
+ * # Checks explain.
+ * requires_cqf,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/optimizer_utils.js"); // For leftmostLeafStage
+
+const coll = db.cqf_non_multikey_paths;
+coll.drop();
+assert.commandWorked(coll.createIndex({'one.one.one.one': 1}));
+assert.commandWorked(coll.createIndex({'one.one.one.many': 1}));
+assert.commandWorked(coll.createIndex({'many.one.one.one': 1}));
+assert.commandWorked(coll.createIndex({'many.one.one.many': 1}));
+assert.commandWorked(coll.createIndex({'many.many.many.many': 1}));
+let i = 0;
+assert.commandWorked(coll.insert([
+ {_id: ++i, one: {one: {one: {one: 2}}}},
+ {_id: ++i, one: {one: {one: {many: [1, 2, 3]}}}},
+ {
+ _id: ++i,
+ many: [
+ {one: {one: {one: 1}}},
+ {one: {one: {one: 2}}},
+ {one: {one: {one: 3}}},
+ ]
+ },
+ {
+ _id: ++i,
+ many: [
+ {one: {one: {many: [1, 2, 3]}}},
+ {one: {one: {many: [4, 5]}}},
+ ],
+ },
+ {_id: ++i, many: [{many: [{many: [{many: [1, 2, 3]}]}]}]},
+]));
+// Generate enough documents for index to be preferable.
+assert.commandWorked(coll.insert(Array.from({length: 100}, (_, i) => ({_id: i + 1000}))));
+
+function run(pipeline) {
+ jsTestLog(`Query: ${tojsononeline(pipeline)}`);
+ const explain = coll.explain().aggregate(pipeline);
+ print(`Leaf stage: `);
+ const {nodeType, indexDefName, interval} = leftmostLeafStage(explain);
+ printjson({nodeType, indexDefName, interval: prettyInterval(interval)});
+}
+
+run([{$match: {'one.one.one.one': 2}}]);
+run([{$match: {'one.one.one.many': 2}}]);
+run([{$match: {'many.one.one.one': 2}}]);
+run([{$match: {'many.one.one.many': 2}}]);
+run([{$match: {'many.many.many.many': 2}}]);
+})(); \ No newline at end of file