diff options
26 files changed, 1402 insertions, 326 deletions
diff --git a/buildscripts/resmokeconfig/suites/multi_shard_local_read_write_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_local_read_write_multi_stmt_txn_jscore_passthrough.yml index 84da723937d..8a5ef97c70c 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_local_read_write_multi_stmt_txn_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_local_read_write_multi_stmt_txn_jscore_passthrough.yml @@ -178,6 +178,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml index fe38c374f14..9d6a5a2d31f 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml @@ -193,6 +193,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml index 4bca862d979..c7773fc20a6 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml @@ -189,6 +189,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml index 58834b14157..b821a28e165 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml @@ -189,6 +189,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml b/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml index 8b3b0da75d2..1509b9c42e3 100644 --- a/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml +++ b/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml @@ -201,6 +201,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml index 22b4d8a55d1..fee526f5b80 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml @@ -128,6 +128,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml index fb0535caf22..6a5467de08c 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml @@ -121,6 +121,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml index 34f856bfe8d..3c3ca190b55 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml @@ -121,6 +121,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml index 815a7ccd853..5e2b1f08091 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml @@ -118,6 +118,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml index 66ad99b80f5..0d63e7d2932 100644 --- a/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml @@ -159,6 +159,7 @@ selector: - jstests/core/update_dbref.js - jstests/core/updatel.js - jstests/core/write_result.js + - jstests/core/positional_projection.js # Trick for bypassing mongo shell validation in the test doesn't work because txn_override # retry logic will hit the shell validation. diff --git a/jstests/core/dbref3.js b/jstests/core/dbref3.js index 41e29b36c94..6a91a56da2a 100644 --- a/jstests/core/dbref3.js +++ b/jstests/core/dbref3.js @@ -1,9 +1,6 @@ // Make sure we only make a DBRef object for objects where the first field is a string named $ref // and the second field is $id with any type. Only the first two fields matter for deciding if it // is a DBRef. See http://docs.mongodb.org/manual/reference/database-references/#dbrefs. -// @tags: [ -// sbe_incompatible, -// ] var t = db.dbref3; diff --git a/jstests/core/positional_projection.js b/jstests/core/positional_projection.js index 04c1c28d16d..56308338af0 100644 --- a/jstests/core/positional_projection.js +++ b/jstests/core/positional_projection.js @@ -1,23 +1,22 @@ // Tests for $ projection operator. // @tags: [ // requires_getmore, -// sbe_incompatible, // requires_fcv_49 // ] (function() { "use strict"; +load("jstests/libs/sbe_assert_error_override.js"); // Override error-code-checking APIs. + const coll = db.positional_projection; coll.drop(); -function testSingleDocument(query, projection, input, expectedOutput, dropCollection = true) { +function testSingleDocument(query, projection, input, expectedOutput) { assert.commandWorked(coll.insert(input)); const actualOutput = coll.findOne(query, projection); delete actualOutput._id; assert.eq(actualOutput, expectedOutput); - if (dropCollection) { - assert(coll.drop()); - } + assert(coll.drop()); } // Single object match. @@ -47,12 +46,6 @@ testSingleDocument({'y.dd': 5}, }, {y: [{aa: 1, dd: 5}]}); -// $elemMatch with positional projection. -testSingleDocument({x: {$elemMatch: {a: 1, b: 2}}}, - {'x.$': 1}, - {x: [{a: 1, b: 2}, {a: 2, c: 3}, {a: 1, d: 5}]}, - {x: [{a: 1, b: 2}]}); - // Nested paths. testSingleDocument( {'x.y.a': 1}, {'x.y.$': 1}, {x: {y: [{a: 1, b: 1}, {a: 1, b: 2}]}}, {x: {y: [{a: 1, b: 1}]}}); @@ -66,6 +59,100 @@ testSingleDocument({}, }, {}); +testSingleDocument({'a.b': 1}, {'g.$': 1}, {a: {b: 1}}, {}); + +// Usually, if query predicate does not find matching array element in the input document, +// positional projection throws. But if positional projection cannot find specified path, query +// should not throw. +testSingleDocument({x: 1}, {'a.$': 1}, {x: 1}, {}); + +// Test that if positional projection operator cannot find specified path, existing part of it is +// returned. +testSingleDocument({x: {$gt: 0}}, + {'a.b.c.$': 1}, + {x: [-1, 1, 2], a: {b: {d: [1, 2, 3], f: 456}, e: 123}}, + {a: {b: {}}}); + +// Test that only relevant part of specified path is extracted even in case of multiple nested +// arrays. +testSingleDocument( + {x: 1}, {'a.b.c.$': 1}, {x: [1], a: [{b: [[[{c: 1, d: 2}]]]}]}, {a: [{b: [[[{c: 1}]]]}]}); + +// Test that if path specified for positional projection operator does not contain array, it is +// returned unchanged. +testSingleDocument({x: {$gt: 0}}, {'a.b.c.$': 1}, {x: [-1, 1, 2], a: {b: {c: {d: {e: 1}}}}}, { + a: {b: {c: {d: {e: 1}}}} +}); + +testSingleDocument( + {x: {$gt: 0}}, {'a.b.c.$': 1}, {x: [-1, 1, 2], a: {b: {c: 123}}}, {a: {b: {c: 123}}}); + +// Test that positional projection is applied to the first array on the dotted path. +// NOTE: Even though the positional projection is specified for the path 'a.b.c', it is applied to +// the first array it meets along this path. For instance, if value on a path 'a' or 'a.b' is an +// array, positional projection operator is applied only for this array. +testSingleDocument( + {'x.y.z': 1}, + {'a.b.c.$': 1}, + {x: {y: {z: [0, 1, 2]}}, a: [{b: {c: [1, 2, 3]}}, {b: {c: [4, 5, 6]}}, {b: {c: [7, 8, 9]}}]}, + {a: [{b: {c: [4, 5, 6]}}]}); + +testSingleDocument( + {'x.y.z': 1}, + {'a.b.c.$': 1}, + {x: {y: {z: [0, 1, 2]}}, a: {b: [{c: [1, 2, 3]}, {c: [4, 5, 6]}, {c: [7, 8, 9]}]}}, + {a: {b: [{c: [4, 5, 6]}]}}); + +testSingleDocument({'x.y.z': 1}, {'a.b.c.$': 1}, {x: {y: {z: [0, 1, 2]}}, a: {b: {c: [1, 2, 3]}}}, { + a: {b: {c: [2]}} +}); + +// Test $elemMatch in query document with positional projection. +testSingleDocument({x: {$elemMatch: {$gt: 1, $lt: 3}}}, + {'x.$': 1}, + { + x: [1, 2, 3], + }, + {x: [2]}); + +testSingleDocument({x: {$elemMatch: {y: {$gt: 1}}}}, + {'x.$': 1}, + { + x: [{y: 1}, {y: 2}, {y: 3}], + }, + {x: [{y: 2}]}); + +testSingleDocument({x: {$elemMatch: {a: 1, b: 2}}}, + {'x.$': 1}, + {x: [{a: 1, b: 2}, {a: 2, c: 3}, {a: 1, d: 5}]}, + {x: [{a: 1, b: 2}]}); + +// Test nested $elemMatch in the query document. +testSingleDocument({a: {$elemMatch: {$elemMatch: {$eq: 3}}}}, + {"b.$": 1}, + {a: [[1, 2], [3, 4]], b: [[5, 6], [7, 8]]}, + {b: [[7, 8]]}); + +testSingleDocument({a: {$elemMatch: {p: {$elemMatch: {$eq: 2}}}}}, + {'b.$': 1}, + {a: [{p: [1, 2], q: [3, 4]}, {p: [5, 6], q: [7, 8]}], b: [11, 12]}, + {b: [11]}); + +testSingleDocument({a: {$elemMatch: {p: {$elemMatch: {$eq: 6}}}}}, + {'b.$': 1}, + {a: [{p: [1, 2], q: [3, 4]}, {p: [5, 6], q: [7, 8]}], b: [11, 12]}, + {b: [12]}); + +testSingleDocument({a: {$elemMatch: {q: {$elemMatch: {$eq: 3}}}}}, + {'b.$': 1}, + {a: [{p: [1, 2], q: [3, 4]}, {p: [5, 6], q: [7, 8]}], b: [11, 12]}, + {b: [11]}); + +testSingleDocument({a: {$elemMatch: {q: {$elemMatch: {$eq: 7}}}}}, + {'b.$': 1}, + {a: [{p: [1, 2], q: [3, 4]}, {p: [5, 6], q: [7, 8]}], b: [11, 12]}, + {b: [12]}); + // Regular index test. assert.commandWorked(coll.createIndex({'y.d': 1})); testSingleDocument({'y.d': 4}, @@ -83,6 +170,13 @@ testSingleDocument({'covered.dd': 5}, }, {covered: [{aa: 1, dd: 5}]}); +// Positional projection must use the index from the last child of $and operator. +testSingleDocument( + {$and: [{a: 1}, {a: 2}, {a: 3}]}, {'b.$': 1}, {a: [1, 2, 3], b: [4, 5, 6]}, {b: [6]}); + +testSingleDocument( + {$and: [{a: 3}, {a: 2}, {a: 1}]}, {'b.$': 1}, {a: [1, 2, 3], b: [4, 5, 6]}, {b: [4]}); + // Tests involving getMore. Test the $-positional operator across multiple batches. assert.commandWorked(coll.insert([ { @@ -102,14 +196,47 @@ while (it.hasNext()) { assert(coll.drop()); +// Multiple array fields in the query document with positional projection may result in "undefined +// behaviour" according to the documentation. Here we test that at least there is no error/segfault +// for such queries. +assert.commandWorked(coll.insert({a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9]})); +assert.doesNotThrow(() => coll.find({a: 2, b: 5}, {"c.$": 1})); +assert(coll.drop()); + // Tests with invalid positional projection operator. assert.commandWorked(coll.insert([{x: [{a: 1, b: 2}, {a: 2, c: 3}, {a: 1, d: 5}], y: [{aa: 1}]}])); + +// Positional projection cannot be used with $slice. let err = assert.throws(() => coll.find({x: 1}, {'x.$': {'$slice': 1}}).toArray()); assert.commandFailedWithCode(err, 31271); -assert.throws(function() { - coll.find({'x.a': 2}, {'x.$': 1, y: 0}).sort({x: 1}).toArray(); -}, []); +// Positional projection cannot be used with exclusion projection. +err = assert.throws(() => coll.find({'x.a': 2}, {'x.$': 1, y: 0}).sort({x: 1}).toArray()); +assert.commandFailedWithCode(err, 31254); + +// There can be only one positional projection in the query. +err = assert.throws(() => coll.find({'x.a': 1, 'y.aa': 1}, {'x.$': 1, 'y.$': 1}).toArray()); +assert.commandFailedWithCode(err, 31276); + +// Test queries where no array index for positional projection is recorded. +err = assert.throws(() => coll.find({}, {'x.$': 1}).toArray()); +assert.commandFailedWithCode(err, 51246); + +assert(coll.drop()); +assert.commandWorked(coll.insert({a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9]})); + +err = assert.throws(() => coll.find({b: [4, 5, 6]}, {"c.$": 1}).toArray()); +assert.commandFailedWithCode(err, 51246); + +// $or, $nor and $not operators disable positional projection index recording for its children. +err = assert.throws(() => coll.find({$or: [{a: 1}, {a: 0}]}, {'a.$': 1}).toArray()); +assert.commandFailedWithCode(err, 51246); + +err = assert.throws(() => coll.find({$or: [{a: 0}, {a: 2}]}, {'a.$': 1}).toArray()); +assert.commandFailedWithCode(err, 51246); + +err = assert.throws(() => coll.find({$nor: [{a: -1}, {a: -2}]}, {'a.$': 1}).toArray()); +assert.commandFailedWithCode(err, 51246); assert.throws(function() { coll.find({'x.a': 1, 'y.aa': 1}, {'x.$': 1, 'y.$': 1}).toArray(); @@ -119,4 +246,8 @@ err = assert.throws(function() { coll.find({}, {".$": 1}).toArray(); }, []); assert.commandFailedWithCode(err, 5392900); + +err = assert.throws( + () => coll.find({a: {$not: {$not: {$elemMatch: {$eq: 1}}}}}, {'a.$': 1}).toArray()); +assert.commandFailedWithCode(err, 51246); }()); diff --git a/jstests/core/positional_projection_multiple_array_fields.js b/jstests/core/positional_projection_multiple_array_fields.js index 63e4f0a3b85..635461944ea 100644 --- a/jstests/core/positional_projection_multiple_array_fields.js +++ b/jstests/core/positional_projection_multiple_array_fields.js @@ -4,45 +4,50 @@ * * Note that the user's query/filter document may only contain _ONE_ array field for positional * projection to work correctly. - * @tags: [ - * sbe_incompatible, - * ] */ (function() { "use strict"; +load("jstests/aggregation/extras/utils.js"); // For documentEq. +load("jstests/libs/sbe_assert_error_override.js"); // Override error-code-checking APIs. + const coll = db.positional_projection_multiple_array_fields; coll.drop(); +function testSingleDocument(query, projection, expected) { + assert(documentEq(coll.findOne(query, projection), expected)); +} + // Check that slice and positional (on different array fields) works correctly. assert.commandWorked(coll.insert({_id: 0, a: [1, 2, 3], z: [11, 12, 13]})); -assert.eq(coll.find({z: 13}, {a: {$slice: 2}}).toArray(), [{_id: 0, a: [1, 2], z: [11, 12, 13]}]); -assert.eq(coll.find({z: 13}, {a: {$slice: 2}, "z.$": 1}).toArray(), [{_id: 0, a: [1, 2], z: [13]}]); -coll.drop(); +testSingleDocument({z: 13}, {a: {$slice: 2}}, {_id: 0, a: [1, 2], z: [11, 12, 13]}); +testSingleDocument({z: 13}, {a: {$slice: 2}, "z.$": 1}, {_id: 0, a: [1, 2], z: [13]}); +assert(coll.drop()); coll.insert({_id: 0, importing: [{foo: "a"}, {foo: "b"}], "jobs": [{num: 1}, {num: 2}, {num: 3}]}); -assert.eq(coll.find({"importing.foo": "b"}, {jobs: {'$slice': 2}, 'importing.$': 1}).toArray(), - [{_id: 0, importing: [{foo: "b"}], jobs: [{num: 1}, {num: 2}]}]); -assert.eq(coll.find({"importing.foo": "b"}, {jobs: {'$slice': -1}, 'importing.$': 1}).toArray(), - [{_id: 0, importing: [{foo: "b"}], jobs: [{num: 3}]}]); +testSingleDocument({"importing.foo": "b"}, + {jobs: {'$slice': 2}, 'importing.$': 1}, + {_id: 0, importing: [{foo: "b"}], jobs: [{num: 1}, {num: 2}]}); +testSingleDocument({"importing.foo": "b"}, + {jobs: {'$slice': -1}, 'importing.$': 1}, + {_id: 0, importing: [{foo: "b"}], jobs: [{num: 3}]}); -coll.drop(); +assert(coll.drop()); assert.commandWorked(coll.insert({_id: 1, a: [{b: 1, c: 2}, {b: 3, c: 4}], z: [11, 12, 13]})); -assert.eq(coll.find({z: 12}, {"a.b": 1}).toArray(), [{_id: 1, a: [{"b": 1}, {"b": 3}]}]); +testSingleDocument({z: 12}, {"a.b": 1}, {_id: 1, a: [{"b": 1}, {"b": 3}]}); // The positional projection on 'z' which limits it to one element shouldn't be applied to 'a' as // well. -assert.eq(coll.find({z: 12}, {"a.b": 1, "z.$": 1}).toArray(), - [{_id: 1, a: [{b: 1}, {b: 3}], z: [12]}]); +testSingleDocument({z: 12}, {"a.b": 1, "z.$": 1}, {_id: 1, a: [{b: 1}, {b: 3}], z: [12]}); // Test that the positional projection can be applied to a "parallel" array. -coll.drop(); +assert(coll.drop()); assert.commandWorked(coll.insert({_id: 1, a: [1, 2, 3], b: ["one", "two", "three"]})); -assert.eq(coll.find({a: 2}, {"b.$": 1}).toArray(), [{_id: 1, b: ["two"]}]); +testSingleDocument({a: 2}, {"b.$": 1}, {_id: 1, b: ["two"]}); // Similar test, but try a parallel array which is on a dotted path. assert.commandWorked(coll.insert({_id: 2, a: {b: [1, 2, 3]}, c: {d: ["one", "two", "three"]}})); -assert.eq(coll.find({"a.b": 2}, {"c.d.$": 1}).toArray(), [{_id: 2, c: {d: ["two"]}}]); +testSingleDocument({"a.b": 2}, {"c.d.$": 1}, {_id: 2, c: {d: ["two"]}}); // Attempting to apply it to a parallel array which is smaller. assert.commandWorked(coll.insert({_id: 3, a: [4, 5, 6], b: ["four", "five"]})); diff --git a/jstests/core/slice1.js b/jstests/core/slice1.js index ac1773f0f0a..72e60698975 100644 --- a/jstests/core/slice1.js +++ b/jstests/core/slice1.js @@ -263,6 +263,12 @@ testSingleDocument({'a.b.c': {$slice: 1}, d: 0}, {a: [[[{b: [[[{c: [1, 2, 3]}]]]}]]], d: 456}, {a: [[[{b: [[[{c: [1, 2, 3]}]]]}]]]}); +// Test that even though $slice cannot be applied to deep arrays, fields are still filtered in the +// objects inside these deep arrays for inclusion projections. +testSingleDocument({e: 1, 'a.b': {$slice: 1}}, + {e: 123, a: [[[{b: [1, 2, 3], d: 4, f: 5}]]]}, + {e: 123, a: [[[{b: [1, 2, 3]}]]]}); + // Test $slice with an inclusion/exclusion projection for _id field. testSingleDocument({_id: 1, a: {$slice: [1, 1]}}, {_id: 123, a: [1, 2, 3]}, diff --git a/jstests/libs/sbe_assert_error_override.js b/jstests/libs/sbe_assert_error_override.js index 7a1cc4af428..a5ca98ad84b 100644 --- a/jstests/libs/sbe_assert_error_override.js +++ b/jstests/libs/sbe_assert_error_override.js @@ -67,6 +67,8 @@ const equivalentErrorCodesList = [ [5166403, 5166603], [5166404, 5166604], [5166405, 5166606], + [51246, 5291401], + [51247, 5291402], [51744, 5154400], [51745, 5154400], [51746, 5154400], @@ -104,6 +106,10 @@ const equivalentErrorCodesMap = function() { }(); const lookupEquivalentErrorCodes = function(errorCodes) { + if (errorCodes === assert._kAnyErrorCode) { + return errorCodes; + } + if (!Array.isArray(errorCodes)) { errorCodes = [errorCodes]; } diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index df357de7117..bad51c67045 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -319,7 +319,7 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { break; } case TypeTags::Nothing: - stream << "---===*** NOTHING ***===---"; + stream << "Nothing"; break; case TypeTags::MinKey: stream << "minKey"; @@ -387,9 +387,11 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { } break; } - case TypeTags::bsonObjectId: - stream << "---===*** bsonObjectId ***===---"; + case TypeTags::bsonObjectId: { + auto objId = getRawPointerView(val); + stream << "bsonObjectId(\"" << OID::from(objId).toString() << "\")"; break; + } case TypeTags::bsonBinData: { auto data = reinterpret_cast<const char*>(getBSONBinDataCompat(TypeTags::bsonBinData, val)); diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 28f58517e9a..9c9312b2c5f 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -354,15 +354,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder relevantSlots = sbe::makeSV(); outputs.forEachSlot(forwardingReqs, [&](auto&& slot) { relevantSlots.push_back(slot); }); - stage = generateFilter(_opCtx, - fn->filter.get(), - std::move(stage), - &_slotIdGenerator, - &_frameIdGenerator, - outputs.get(kResult), - _data.env, - std::move(relevantSlots), - root->nodeId()); + + std::tie(std::ignore, stage) = generateFilter(_opCtx, + fn->filter.get(), + std::move(stage), + &_slotIdGenerator, + &_frameIdGenerator, + outputs.get(kResult), + _data.env, + std::move(relevantSlots), + root->nodeId()); } return {std::move(stage), std::move(outputs)}; @@ -731,6 +732,9 @@ SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, auto childReqs = reqs.copy().set(kResult); auto [inputStage, outputs] = build(pn->children[0], childReqs); + auto relevantSlots = sbe::makeSV(); + outputs.forEachSlot(reqs, [&](auto&& slot) { relevantSlots.push_back(slot); }); + auto [slot, stage] = generateProjection(_opCtx, &pn->proj, std::move(inputStage), @@ -738,6 +742,7 @@ SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, &_frameIdGenerator, outputs.get(kResult), _data.env, + std::move(relevantSlots), root->nodeId()); outputs.set(kResult, slot); @@ -788,15 +793,15 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto forwardingReqs = reqs.copy().clear(kResult); outputs.forEachSlot(forwardingReqs, [&](auto&& slot) { relevantSlots.push_back(slot); }); - stage = generateFilter(_opCtx, - orn->filter.get(), - std::move(stage), - &_slotIdGenerator, - &_frameIdGenerator, - outputs.get(kResult), - _data.env, - std::move(relevantSlots), - root->nodeId()); + std::tie(std::ignore, stage) = generateFilter(_opCtx, + orn->filter.get(), + std::move(stage), + &_slotIdGenerator, + &_frameIdGenerator, + outputs.get(kResult), + _data.env, + std::move(relevantSlots), + root->nodeId()); } return {std::move(stage), std::move(outputs)}; diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp index 34435afaa51..3e8a7f573b1 100644 --- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp @@ -226,15 +226,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo relevantSlots.push_back(*tsSlot); } - stage = generateFilter(opCtx, - csn->filter.get(), - std::move(stage), - slotIdGenerator, - frameIdGenerator, - resultSlot, - env, - std::move(relevantSlots), - csn->nodeId()); + auto [_, filterStage] = generateFilter(opCtx, + csn->filter.get(), + std::move(stage), + slotIdGenerator, + frameIdGenerator, + resultSlot, + env, + std::move(relevantSlots), + csn->nodeId()); + stage = std::move(filterStage); // We may be requested to stop applying the filter after the first match. This can happen // if the query is just a lower bound on 'ts' on a forward scan. In this case every document @@ -436,15 +437,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc relevantSlots.push_back(*tsSlot); } - stage = generateFilter(opCtx, - csn->filter.get(), - std::move(stage), - slotIdGenerator, - frameIdGenerator, - resultSlot, - env, - std::move(relevantSlots), - csn->nodeId()); + auto [_, filterStage] = generateFilter(opCtx, + csn->filter.get(), + std::move(stage), + slotIdGenerator, + frameIdGenerator, + resultSlot, + env, + std::move(relevantSlots), + csn->nodeId()); + stage = std::move(filterStage); } PlanStageSlots outputs; diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp index cdbb8c8f826..e1ff40f035d 100644 --- a/src/mongo/db/query/sbe_stage_builder_expression.cpp +++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp @@ -2605,8 +2605,11 @@ private: } std::reverse(branches.begin(), branches.end()); - auto [resultExpr, opStage] = generateShortCircuitingLogicalOp( - logicOp, std::move(branches), _context->planNodeId, _context->slotIdGenerator); + auto [resultExpr, opStage] = generateShortCircuitingLogicalOp(logicOp, + std::move(branches), + _context->planNodeId, + _context->slotIdGenerator, + BooleanStateHelper{}); auto loopJoinStage = makeLoopJoin(_context->extractCurrentEvalStage(), std::move(opStage), diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index 3afce4adfa1..ebea2839c1e 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -31,6 +31,8 @@ #include "mongo/db/query/sbe_stage_builder_filter.h" +#include <functional> + #include "mongo/db/exec/sbe/stages/co_scan.h" #include "mongo/db/exec/sbe/stages/filter.h" #include "mongo/db/exec/sbe/stages/limit_skip.h" @@ -77,6 +79,25 @@ namespace mongo::stage_builder { namespace { +struct MatchExpressionVisitorContext; + +/** + * Output of the tree can come from two places: + * - If there is an expression on the evaluation stack in the end of tree construction, then this + * is the output for the whole tree. This is checked in the 'MatchExpressionVisitorContext::done' + * method. + * - If we apply top-level AND optimization, then in the end of tree construction the evaluation + * stack will be empty. This happens because expressions which normally would reside on the stack + * are popped and inserted directly into the filter stage for each branch. + * + * So, we need to record output in both the 'MatchExpressionVisitorContext::done' method and builder + * for top-level AND. + * + * This function takes the current expression, projects it into a separate slot and stores this slot + * as an output for the current frame. + */ +void projectCurrentExprToOutputSlot(MatchExpressionVisitorContext* context); + /** * The various flavors of PathMatchExpressions require the same skeleton of traverse operators in * order to perform implicit path traversal, but may translate differently to an SBE expression that @@ -99,14 +120,16 @@ struct MatchExpressionVisitorContext { sbe::value::SlotId inputSlot, const MatchExpression* root, sbe::RuntimeEnvironment* env, - PlanNodeId planNodeId) + PlanNodeId planNodeId, + const FilterStateHelper& stateHelper) : opCtx{opCtx}, inputSlot{inputSlot}, slotIdGenerator{slotIdGenerator}, frameIdGenerator{frameIdGenerator}, topLevelAnd{nullptr}, env{env}, - planNodeId{planNodeId} { + planNodeId{planNodeId}, + stateHelper{stateHelper} { // Set up the top-level EvalFrame. evalStack.emplaceFrame(std::move(inputStage), inputSlot); @@ -117,25 +140,39 @@ struct MatchExpressionVisitorContext { } } - EvalStage done() { + std::pair<boost::optional<sbe::value::SlotId>, EvalStage> done() { invariant(evalStack.framesCount() == 1); auto& frame = evalStack.topFrame(); if (frame.exprsCount() > 0) { + if (stateHelper.stateContainsValue()) { + projectCurrentExprToOutputSlot(this); + } invariant(frame.exprsCount() == 1); - frame.setStage( - makeFilter<false>(frame.extractStage(), frame.popExpr().extractExpr(), planNodeId)); + frame.setStage(makeFilter<false>(frame.extractStage(), + stateHelper.getBool(frame.popExpr().extractExpr()), + planNodeId)); } - return frame.extractStage(); + if (outputSlot && stateHelper.stateContainsValue()) { + // In case 'outputSlot' is defined and state contains a value, we need to extract this + // value into a separate slot and return it. The resulting value depends on the state + // type, see the implementation of specific state helper for details. + return stateHelper.projectValueCombinator( + *outputSlot, frame.extractStage(), planNodeId, slotIdGenerator, frameIdGenerator); + } + + return {boost::none, frame.extractStage()}; } - struct InputSlotFrameData { + struct FrameData { sbe::value::SlotId inputSlot; + + FrameData(sbe::value::SlotId inputSlot) : inputSlot{inputSlot} {} }; OperationContext* opCtx; - EvalStack<InputSlotFrameData> evalStack; + EvalStack<FrameData> evalStack; sbe::value::SlotId inputSlot; sbe::value::SlotIdGenerator* slotIdGenerator; sbe::value::FrameIdGenerator* frameIdGenerator; @@ -145,8 +182,28 @@ struct MatchExpressionVisitorContext { // The id of the 'QuerySolutionNode' which houses the match expression that we are converting to // SBE. const PlanNodeId planNodeId; + + // Helper for managing the internal state of the filter tree. See 'FilterStateHelper' definition + // for details. + const FilterStateHelper& stateHelper; + + // Trees for some queries can have something to output. For instance, if we use + // 'IndexStateHelper' for managing internal state, this output is the index of the array element + // that matched our query predicate. This field stores the slot id containing the output of the + // tree. + boost::optional<sbe::value::SlotId> outputSlot; }; +void projectCurrentExprToOutputSlot(MatchExpressionVisitorContext* context) { + tassert(5291405, "Output slot is not empty", !context->outputSlot); + auto& frame = context->evalStack.topFrame(); + auto [projectedExprSlot, stage] = projectEvalExpr( + frame.popExpr(), frame.extractStage(), context->planNodeId, context->slotIdGenerator); + context->outputSlot = projectedExprSlot; + frame.pushExpr(projectedExprSlot); + frame.setStage(std::move(stage)); +} + enum class LeafTraversalMode { // Don't generate a TraverseStage for the leaf. kDoNotTraverseLeaf = 0, @@ -160,45 +217,45 @@ enum class LeafTraversalMode { /** * This function generates a path traversal plan stage at the given nested 'level' of the traversal - * path. For example, for a dotted path expression {'a.b': 2}, the traversal sub-tree will look like - * this: + * path. For example, for a dotted path expression {'a.b': 2}, the traversal sub-tree built with + * 'BooleanStateHelper' will look like this: * * traverse - * outputSlot1 // the traversal result - * innerSlot1 // the result coming from the 'in' branch - * fieldSlot1 // field 'a' projected in the 'from' branch, this is the field we will be + * outputSlot1 // the traversal result + * innerSlot1 // the result coming from the 'in' branch + * fieldSlot1 // field 'a' projected in the 'from' branch, this is the field we will be * // traversing - * {outputSlot1 || innerSlot1} // the folding expression - combining - * // results for each element - * {outputSlot1} // final (early out) expression - when we hit the 'true' value, - * // we don't have to traverse the whole array - * in - * project [innerSlot1 = // if getField(fieldSlot1,'b') - * fillEmpty(outputSlot2, false) || // returns an array, compare the - * (fillEmpty(isArray(fieldSlot), false) && // array itself to 2 as well - * fillEmpty(fieldSlot2==2, false))] - * traverse // nested traversal - * outputSlot2 // the traversal result - * innerSlot2 // the result coming from the 'in' branch - * fieldSlot2 // field 'b' projected in the 'from' branch, this is the field we will be + * {outputSlot1 || innerSlot1} // the folding expression - combining results for each + * // element + * {outputSlot1} // final (early out) expression - when we hit the 'true' value, we don't + * // have to traverse the whole array + * from + * project [fieldSlot1 = getField(inputSlot, "a")] // project field 'a' from the document + * // bound to 'inputSlot' + * <inputStage> // e.g. collection scan + * in + * project [innerSlot1 = // if getField(fieldSlot1,'b') + * fillEmpty(outputSlot2, false) || // returns an array, compare the + * (fillEmpty(isArray(fieldSlot2), false) && // array itself to 2 as well + * fillEmpty(fieldSlot2 == 2, false))] + * traverse // nested traversal + * outputSlot2 // the traversal result + * innerSlot2 // the result coming from the 'in' branch + * fieldSlot2 // field 'b' projected in the 'from' branch, this is the field we will be * // traversing - * {outputSlot2 || innerSlot2} // the folding expression - * {outputSlot2} // final (early out) expression - * in - * project [innerSlot2 = // compare the field 'b' to 2 and store - * fillEmpty(fieldSlot2==2, false)] // the bool result in innerSlot2 - * limit 1 - * coscan - * from - * project [fieldSlot2 = getField(fieldSlot1, 'b')] // project field 'b' from the + * {outputSlot2 || innerSlot2} // the folding expression + * {outputSlot2} // final (early out) expression + * from + * project [fieldSlot2 = getField(fieldSlot1, "b")] // project field 'b' from the * // document bound to 'fieldSlot1', * // which is field 'a' - * limit 1 - * coscan - * from - * project [fieldSlot1 = getField(inputSlot, 'a')] // project field 'a' from the document - * // bound to 'inputSlot' - * <inputStage> // e.g., COLLSCAN + * limit 1 + * coscan + * in + * project [innerSlot2 = // compare the field 'b' to 2 and + * fillEmpty(fieldSlot2 == 2, false)] // store the result in innerSlot2 + * limit 1 + * coscan */ EvalExprStagePair generatePathTraversal(EvalStage inputStage, sbe::value::SlotId inputSlot, @@ -206,8 +263,10 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, size_t level, PlanNodeId planNodeId, sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator, const MakePredicateFn& makePredicate, - LeafTraversalMode mode) { + LeafTraversalMode mode, + const FilterStateHelper& stateHelper) { using namespace std::literals; invariant(level < fp.getPathLength()); @@ -227,13 +286,16 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, sbe::makeE<sbe::EConstant>(fieldName)))); if (isLeafField && mode == LeafTraversalMode::kDoNotTraverseLeaf) { + // 'makePredicate' in this mode must return valid state, not just plain boolean value. So + // there is no need to wrap it in '_context->stateHelper.makePredicateCombinator'. return makePredicate(fieldSlot, std::move(fromBranch)); } // Generate the 'in' branch for the TraverseStage that we're about to construct. auto [innerExpr, innerBranch] = isLeafField - // Base case: Evaluate the predicate. - ? makePredicate(fieldSlot, EvalStage{}) + // Base case: Evaluate the predicate. Predicate returns boolean value, we need to convert it + // to state using '_context->stateHelper.makePredicateCombinator'. + ? stateHelper.makePredicateCombinator(makePredicate(fieldSlot, EvalStage{})) // Recursive case. : generatePathTraversal(EvalStage{}, fieldSlot, @@ -241,32 +303,64 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, level + 1, planNodeId, slotIdGenerator, + frameIdGenerator, makePredicate, - mode); - - sbe::value::SlotId innerSlot; - std::tie(innerSlot, innerBranch) = - projectEvalExpr(std::move(innerExpr), std::move(innerBranch), planNodeId, slotIdGenerator); - - // Generate the traverse stage for the current nested level. + mode, + stateHelper); + + if (stateHelper.stateContainsValue()) { + auto isInputArray = slotIdGenerator->generate(); + fromBranch = makeProject(std::move(fromBranch), + planNodeId, + isInputArray, + makeFunction("isArray"sv, sbe::makeE<sbe::EVariable>(fieldSlot))); + + // The expression below checks if input is an array. In this case it returns initial state. + // This value will be the first one to be stored in 'traverseOutputSlot'. On the subsequent + // iterations 'traverseOutputSlot' is updated according to fold expression. + // If input is not array, expression below simply assigns state from the predicate to the + // 'innerResultSlot'. + // If state does not containy any value apart from boolean, we do not need to perform this + // check. + innerExpr = + makeLocalBind(frameIdGenerator, + [&](sbe::EVariable state) { + return sbe::makeE<sbe::EIf>( + sbe::makeE<sbe::EVariable>(isInputArray), + stateHelper.makeInitialState(stateHelper.getBool(state.clone())), + state.clone()); + }, + innerExpr.extractExpr()); + } + + auto innerResultSlot = slotIdGenerator->generate(); + innerBranch = + makeProject(std::move(innerBranch), planNodeId, innerResultSlot, innerExpr.extractExpr()); + + // Generate the traverse stage for the current nested level. There are several cases covered + // during this phase: + // 1. If input is not an array, value from 'in' branch is returned (see comment for the 'in' + // branch construction). + // 2. If input is an array of size 1, fold expression is never executed. 'in' branch returns + // initial state, paired with false value if predicate evaluates to false and true value + // otherwise. + // 3. If input is an array of size larger than 1 and predicate does not evaluate to true on the + // first array element, fold expression is executed at least once. See comments for + // respective implementation of 'FilterStateHelper::makeTraverseCombinator' for details. auto traverseOutputSlot = slotIdGenerator->generate(); - auto outputStage = makeTraverse(std::move(fromBranch), - std::move(innerBranch), // NOLINT(bugprone-use-after-move) - fieldSlot, - traverseOutputSlot, - innerSlot, - makeBinaryOp(sbe::EPrimBinary::logicOr, - sbe::makeE<sbe::EVariable>(traverseOutputSlot), - sbe::makeE<sbe::EVariable>(innerSlot)), - sbe::makeE<sbe::EVariable>(traverseOutputSlot), - planNodeId, - 1); - - auto outputSlot = slotIdGenerator->generate(); - outputStage = makeProject(std::move(outputStage), - planNodeId, - outputSlot, - makeFillEmptyFalse(sbe::makeE<sbe::EVariable>(traverseOutputSlot))); + auto outputStage = stateHelper.makeTraverseCombinator( + std::move(fromBranch), + std::move(innerBranch), // NOLINT(bugprone-use-after-move) + fieldSlot, + traverseOutputSlot, + innerResultSlot, + planNodeId, + frameIdGenerator); + + // If traverse stage was not executed at all (empty input array), 'traverseOutputSlot' contains + // Nothing. In this case we have not found matching element, so we simply return false value. + auto resultExpr = makeFunction( + "fillEmpty", sbe::makeE<sbe::EVariable>(traverseOutputSlot), stateHelper.makeState(false)); if (isLeafField && mode == LeafTraversalMode::kArrayAndItsElements) { // For the last level, if 'mode' == kArrayAndItsElements and getField() returns an array we @@ -278,18 +372,19 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, EvalExpr outputExpr; std::tie(outputExpr, outputStage) = makePredicate(fieldSlot, std::move(outputStage)); - outputExpr = makeBinaryOp( - sbe::EPrimBinary::logicOr, - sbe::makeE<sbe::EVariable>(outputSlot), - makeBinaryOp( + // If during an array traversal we have found matching element, simply return 'outputSlot'. + // Otherwise, we must check if the whole array matches the predicate. + resultExpr = stateHelper.mergeStates( + std::move(resultExpr), + stateHelper.makeState(sbe::makeE<sbe::EPrimBinary>( sbe::EPrimBinary::logicAnd, - makeFillEmptyFalse(makeFunction("isArray", sbe::makeE<sbe::EVariable>(fieldSlot))), - outputExpr.extractExpr())); - - return {std::move(outputExpr), std::move(outputStage)}; // NOLINT(bugprone-use-after-move) - } else { - return {outputSlot, std::move(outputStage)}; + makeFillEmptyFalse(sbe::makeE<sbe::EFunction>( + "isArray", sbe::makeEs(sbe::makeE<sbe::EVariable>(fieldSlot)))), + outputExpr.extractExpr())), + frameIdGenerator); } + + return {std::move(resultExpr), std::move(outputStage)}; } /** @@ -303,7 +398,8 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, void generatePredicate(MatchExpressionVisitorContext* context, StringData path, MakePredicateFn makePredicate, - LeafTraversalMode mode = LeafTraversalMode::kArrayAndItsElements) { + LeafTraversalMode mode = LeafTraversalMode::kArrayAndItsElements, + bool useCombinator = true) { auto& frame = context->evalStack.topFrame(); auto&& [expr, stage] = [&]() { if (!path.empty()) { @@ -313,14 +409,20 @@ void generatePredicate(MatchExpressionVisitorContext* context, 0, context->planNodeId, context->slotIdGenerator, + context->frameIdGenerator, makePredicate, - mode); + mode, + context->stateHelper); } else { // If matchExpr's parent is a ElemMatchValueMatchExpression, then matchExpr()->path() // will be empty. In this case, 'inputSlot' will be a "correlated slot" that holds the // value of the ElemMatchValueMatchExpression's field path, and we should apply the // predicate directly on 'inputSlot' without array traversal. - return makePredicate(frame.data().inputSlot, frame.extractStage()); + auto result = makePredicate(frame.data().inputSlot, frame.extractStage()); + if (useCombinator) { + return context->stateHelper.makePredicateCombinator(std::move(result)); + } + return result; } }(); @@ -385,11 +487,12 @@ void generateArraySize(MatchExpressionVisitorContext* context, auto [opOutput, opStage] = generateShortCircuitingLogicalOp(sbe::EPrimBinary::logicAnd, std::move(branches), context->planNodeId, - context->slotIdGenerator); + context->slotIdGenerator, + BooleanStateHelper{}); inputStage = makeLoopJoin(std::move(inputStage), std::move(opStage), context->planNodeId); - return {std::move(opOutput), std::move(inputStage)}; + return {context->stateHelper.makeState(opOutput.extractExpr()), std::move(inputStage)}; }; generatePredicate(context, @@ -489,8 +592,7 @@ void generateComparison(MatchExpressionVisitorContext* context, */ void generateAlwaysBoolean(MatchExpressionVisitorContext* context, bool value) { auto& frame = context->evalStack.topFrame(); - frame.pushExpr(sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Boolean, - sbe::value::bitcastFrom<bool>(value))); + frame.pushExpr(context->stateHelper.makeState(value)); } /** @@ -612,8 +714,11 @@ void buildLogicalExpression(sbe::EPrimBinary::Op op, std::reverse(branches.begin(), branches.end()); auto& frame = context->evalStack.topFrame(); - auto&& [expr, opStage] = generateShortCircuitingLogicalOp( - op, std::move(branches), context->planNodeId, context->slotIdGenerator); + auto&& [expr, opStage] = generateShortCircuitingLogicalOp(op, + std::move(branches), + context->planNodeId, + context->slotIdGenerator, + context->stateHelper); frame.pushExpr(std::move(expr)); // Join frame.stage with opStage. @@ -621,6 +726,59 @@ void buildLogicalExpression(sbe::EPrimBinary::Op op, } /** + * Helper to use for 'makePredicate' argument of 'generatePredicate' function for $elemMatch + * expressions. + */ +EvalExprStagePair elemMatchMakePredicate(MatchExpressionVisitorContext* context, + sbe::value::SlotId filterSlot, + EvalStage& filterStage, + sbe::value::SlotId childInputSlot, + sbe::value::SlotId inputSlot, + EvalStage inputStage) { + // The 'filterStage' subtree was generated to read from 'childInputSlot', based on + // the assumption that 'childInputSlot' is some correlated slot that will be made + // available by childStages's parent. We add a projection here to 'inputStage' to + // feed 'inputSlot' into 'childInputSlot'. + auto isInputArray = context->slotIdGenerator->generate(); + auto fromBranch = makeProject(std::move(inputStage), + context->planNodeId, + childInputSlot, + sbe::makeE<sbe::EVariable>(inputSlot), + isInputArray, + makeFunction("isArray", sbe::makeE<sbe::EVariable>(inputSlot))); + + auto [innerResultSlot, innerBranch] = [&]() -> std::pair<sbe::value::SlotId, EvalStage> { + if (!context->stateHelper.stateContainsValue()) { + return {filterSlot, std::move(filterStage)}; + } + + auto resultSlot = context->slotIdGenerator->generate(); + return {resultSlot, + makeProject(std::move(filterStage), + context->planNodeId, + resultSlot, + context->stateHelper.makeInitialState( + context->stateHelper.getBool(filterSlot)))}; + }(); + + innerBranch = makeFilter<true>( + std::move(innerBranch), sbe::makeE<sbe::EVariable>(isInputArray), context->planNodeId); + + // Generate the traverse. + auto traverseSlot = context->slotIdGenerator->generate(); + auto traverseStage = context->stateHelper.makeTraverseCombinator( + std::move(fromBranch), + std::move(innerBranch), // NOLINT(bugprone-use-after-move) + childInputSlot, + traverseSlot, + innerResultSlot, + context->planNodeId, + context->frameIdGenerator); + + return {traverseSlot, std::move(traverseStage)}; +} + +/** * A match expression pre-visitor used for maintaining nested logical expressions while traversing * the match expression tree. */ @@ -827,10 +985,16 @@ public: // with at least one, we evaluate each child within the current EvalFrame. if (expr->numChildren() >= 1) { // Process the output of the last child. + if (_context->stateHelper.stateContainsValue()) { + projectCurrentExprToOutputSlot(_context); + } + auto& frame = _context->evalStack.topFrame(); invariant(frame.exprsCount() > 0); - frame.setStage(makeFilter<false>( - frame.extractStage(), frame.popExpr().extractExpr(), _context->planNodeId)); + frame.setStage( + makeFilter<false>(frame.extractStage(), + _context->stateHelper.getBool(frame.popExpr().extractExpr()), + _context->planNodeId)); } return; } @@ -855,6 +1019,7 @@ public: } void visit(const ElemMatchObjectMatchExpression* matchExpr) final { + using namespace std::placeholders; // ElemMatchObjectMatchExpression is guaranteed to always have exactly 1 child invariant(matchExpr->numChildren() == 1); @@ -876,46 +1041,27 @@ public: }(); // We're using 'kDoNotTraverseLeaf' traverse mode, so we're guaranteed that 'makePredcate' - // will only be called once, so it's safe to capture and pass in the 'filterStage' subtree + // will only be called once, so it's safe to bind the reference to 'filterStage' subtree // here. - auto makePredicate = [&, filterSlot = filterSlot, &filterStage = filterStage]( - sbe::value::SlotId inputSlot, - EvalStage inputStage) -> EvalExprStagePair { - // Generate the traverse. - auto traverseSlot = _context->slotIdGenerator->generate(); - auto traverseStage = makeTraverse( - // The 'filterStage' subtree was generated to read from 'childInputSlot', based on - // the assumption that 'childInputSlot' is some correlated slot that will be made - // available by childStages's parent. We add a projection here to 'inputStage' to - // feed 'inputSlot' into 'childInputSlot'. - makeProject(std::move(inputStage), - _context->planNodeId, - childInputSlot, - sbe::makeE<sbe::EVariable>(inputSlot)), - makeFilter<true>(std::move(filterStage), - sbe::makeE<sbe::EFunction>( - "isArray", sbe::makeEs(sbe::makeE<sbe::EVariable>(inputSlot))), - _context->planNodeId), - childInputSlot, - traverseSlot, - filterSlot, - makeBinaryOp(sbe::EPrimBinary::logicOr, - sbe::makeE<sbe::EVariable>(traverseSlot), - sbe::makeE<sbe::EVariable>(filterSlot)), - sbe::makeE<sbe::EVariable>(traverseSlot), - _context->planNodeId, - 1); - - return {traverseSlot, std::move(traverseStage)}; - }; - + auto makePredicate = std::bind(&elemMatchMakePredicate, + _context, + filterSlot, + std::ref(filterStage), + childInputSlot, + _1, + _2); + + // 'makePredicate' defined above returns a state instead of plain boolean value, so there is + // no need to use combinator for it. generatePredicate(_context, matchExpr->path(), std::move(makePredicate), - LeafTraversalMode::kDoNotTraverseLeaf); + LeafTraversalMode::kDoNotTraverseLeaf, + false /* useCombinator */); } void visit(const ElemMatchValueMatchExpression* matchExpr) final { + using namespace std::placeholders; auto numChildren = matchExpr->numChildren(); invariant(numChildren >= 1); @@ -934,7 +1080,8 @@ public: generateShortCircuitingLogicalOp(sbe::EPrimBinary::logicAnd, std::move(childStages), _context->planNodeId, - _context->slotIdGenerator); + _context->slotIdGenerator, + _context->stateHelper); sbe::value::SlotId filterSlot; std::tie(filterSlot, filterStage) = projectEvalExpr(std::move(filterExpr), @@ -943,47 +1090,23 @@ public: _context->slotIdGenerator); // We're using 'kDoNotTraverseLeaf' traverse mode, so we're guaranteed that 'makePredcate' - // will only be called once, so it's safe to capture and pass in the 'filterStage' subtree + // will only be called once, so it's safe to bind the reference to 'filterStage' subtree // here. - auto makePredicate = [&, - filterSlot = filterSlot, - &filterStage = filterStage]( // NOLINT(bugprone-use-after-move) - sbe::value::SlotId inputSlot, - EvalStage inputStage) -> EvalExprStagePair { - invariant(filterStage.stage); - - // Generate the traverse. - auto traverseSlot = _context->slotIdGenerator->generate(); - auto traverseStage = makeTraverse( - // The 'childStage' subtree was generated to read from 'childInputSlot', based - // on the assumption that 'childInputSlot' is some correlated slot that will be - // made available by childStages's parent. We add a projection here to 'inputStage' - // to feed 'inputSlot' into 'childInputSlot'. - makeProject(std::move(inputStage), - _context->planNodeId, - childInputSlot, - sbe::makeE<sbe::EVariable>(inputSlot)), - makeFilter<true>(std::move(filterStage), - sbe::makeE<sbe::EFunction>( - "isArray", sbe::makeEs(sbe::makeE<sbe::EVariable>(inputSlot))), - _context->planNodeId), - childInputSlot, - traverseSlot, - filterSlot, - makeBinaryOp(sbe::EPrimBinary::logicOr, - sbe::makeE<sbe::EVariable>(traverseSlot), - sbe::makeE<sbe::EVariable>(filterSlot)), - sbe::makeE<sbe::EVariable>(traverseSlot), - _context->planNodeId, - 1); - - return {traverseSlot, std::move(traverseStage)}; - }; - + auto makePredicate = std::bind(&elemMatchMakePredicate, + _context, + filterSlot, + std::ref(filterStage), + childInputSlot, + _1, + _2); + + // 'makePredicate' defined above returns a state instead of plain boolean value, so there is + // no need to use combinator for it. generatePredicate(_context, matchExpr->path(), std::move(makePredicate), - LeafTraversalMode::kDoNotTraverseLeaf); + LeafTraversalMode::kDoNotTraverseLeaf, + false /* useCombinator */); } void visit(const EqualityMatchExpression* expr) final { @@ -1025,8 +1148,10 @@ public: // expression which does exactly that. auto logicExpr = generateCoerceToBoolExpression(sbe::EVariable{frameId, 0}); - frame.pushExpr(sbe::makeE<sbe::ELocalBind>( - frameId, sbe::makeEs(std::move(expr)), std::move(logicExpr))); + auto localBindExpr = sbe::makeE<sbe::ELocalBind>( + frameId, sbe::makeEs(std::move(expr)), std::move(logicExpr)); + + frame.pushExpr(_context->stateHelper.makeState(std::move(localBindExpr))); frame.setStage(EvalStage{std::move(stage), std::move(currentStage.outSlots)}); } @@ -1161,7 +1286,8 @@ public: generateShortCircuitingLogicalOp(sbe::EPrimBinary::logicOr, std::move(branches), _context->planNodeId, - _context->slotIdGenerator); + _context->slotIdGenerator, + BooleanStateHelper{}); inputStage = makeLoopJoin(std::move(inputStage), // NOLINT(bugprone-use-after-move) @@ -1262,15 +1388,23 @@ public: buildLogicalExpression(sbe::EPrimBinary::logicOr, expr->numChildren(), _context); // Second step is to negate the result of $or expression. + // Here we discard the index value of the state even if it was set by expressions below NOR. + // This matches the behaviour of classic engine, which does not pass 'MatchDetails' object + // to children of NOR and thus does not get any information on 'elemMatchKey' from them. auto& frame = _context->evalStack.topFrame(); - frame.pushExpr(makeNot(frame.popExpr().extractExpr())); + frame.pushExpr(_context->stateHelper.makeState( + makeNot(_context->stateHelper.getBool(frame.popExpr().extractExpr())))); } void visit(const NotMatchExpression* expr) final { auto& frame = _context->evalStack.topFrame(); // Negate the result of $not's child. - frame.pushExpr(makeNot(frame.popExpr().extractExpr())); + // Here we discard the index value of the state even if it was set by expressions below NOT. + // This matches the behaviour of classic engine, which does not pass 'MatchDetails' object + // to children of NOT and thus does not get any information on 'elemMatchKey' from them. + frame.pushExpr(_context->stateHelper.makeState( + makeNot(_context->stateHelper.getBool(frame.popExpr().extractExpr())))); } void visit(const OrMatchExpression* expr) final { @@ -1351,11 +1485,12 @@ public: void visit(const AndMatchExpression* expr) final { if (expr == _context->topLevelAnd) { // For a top-level $and, we evaluate each child within the current EvalFrame. - // Process the output of the most recently evaluated child. auto& frame = _context->evalStack.topFrame(); invariant(frame.exprsCount() > 0); - frame.setStage(makeFilter<false>( - frame.extractStage(), frame.popExpr().extractExpr(), _context->planNodeId)); + frame.setStage( + makeFilter<false>(frame.extractStage(), + _context->stateHelper.getBool(frame.popExpr().extractExpr()), + _context->planNodeId)); return; } @@ -1448,19 +1583,21 @@ private: }; } // namespace -std::unique_ptr<sbe::PlanStage> generateFilter(OperationContext* opCtx, - const MatchExpression* root, - std::unique_ptr<sbe::PlanStage> stage, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator, - sbe::value::SlotId inputSlot, - sbe::RuntimeEnvironment* env, - sbe::value::SlotVector relevantSlots, - PlanNodeId planNodeId) { +std::pair<boost::optional<sbe::value::SlotId>, std::unique_ptr<sbe::PlanStage>> generateFilter( + OperationContext* opCtx, + const MatchExpression* root, + std::unique_ptr<sbe::PlanStage> stage, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator, + sbe::value::SlotId inputSlot, + sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlots, + PlanNodeId planNodeId, + bool trackIndex) { // The planner adds an $and expression without the operands if the query was empty. We can bail // out early without generating the filter plan stage if this is the case. if (root->matchType() == MatchExpression::AND && root->numChildren() == 0) { - return stage; + return {boost::none, std::move(stage)}; } // If 'inputSlot' is not present within 'relevantSlots', add it now. @@ -1468,6 +1605,7 @@ std::unique_ptr<sbe::PlanStage> generateFilter(OperationContext* opCtx, relevantSlots.push_back(inputSlot); } + auto stateHelper = makeFilterStateHelper(trackIndex); MatchExpressionVisitorContext context{opCtx, slotIdGenerator, frameIdGenerator, @@ -1475,12 +1613,15 @@ std::unique_ptr<sbe::PlanStage> generateFilter(OperationContext* opCtx, inputSlot, root, env, - planNodeId}; + planNodeId, + *stateHelper}; MatchExpressionPreVisitor preVisitor{&context}; MatchExpressionInVisitor inVisitor{&context}; MatchExpressionPostVisitor postVisitor{&context}; MatchExpressionWalker walker{&preVisitor, &inVisitor, &postVisitor}; tree_walker::walk<true, MatchExpression>(root, &walker); - return context.done().stage; + + auto [resultSlot, resultStage] = context.done(); + return {resultSlot, std::move(resultStage.stage)}; } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_filter.h b/src/mongo/db/query/sbe_stage_builder_filter.h index 46c4320bd10..8e4dc230422 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.h +++ b/src/mongo/db/query/sbe_stage_builder_filter.h @@ -41,15 +41,24 @@ namespace mongo::stage_builder { * parameter specifies the input slot the filter should use. The 'relevantSlotsIn' parameter * specifies the slots produced by the 'stage' subtree that must remain visible to consumers of * the tree returned by this function. + * Optional slot returned by this function stores index of array element that matches the 'root' + * match expression. The role of this slot is to be a replacement of 'MatchDetails::elemMatchKey()'. + * If 'trackIndex' is true and 'root' contains match expression with array semantics (there are + * certain predicates that do not, such as '{}'), valid slot id is returned. This slot is pointing + * to an optional value of type int32. Otherwise, 'boost::none' is returned. + * If match expression found matching array element, value behind slot id is an int32 array index. + * Otherwise, it is Nothing. */ -std::unique_ptr<sbe::PlanStage> generateFilter(OperationContext* opCtx, - const MatchExpression* root, - std::unique_ptr<sbe::PlanStage> stage, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator, - sbe::value::SlotId inputSlotIn, - sbe::RuntimeEnvironment* env, - sbe::value::SlotVector relevantSlotsIn, - PlanNodeId planNodeId); +std::pair<boost::optional<sbe::value::SlotId>, std::unique_ptr<sbe::PlanStage>> generateFilter( + OperationContext* opCtx, + const MatchExpression* root, + std::unique_ptr<sbe::PlanStage> stage, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator, + sbe::value::SlotId inputSlotIn, + sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlotsIn, + PlanNodeId planNodeId, + bool trackIndex = false); } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp index d6a0b2defc2..305324a2be2 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp +++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp @@ -411,9 +411,19 @@ EvalExprStagePair generateSingleResultUnion(std::vector<EvalExprStagePair> branc EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, std::vector<EvalExprStagePair> branches, PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator) { + sbe::value::SlotIdGenerator* slotIdGenerator, + const FilterStateHelper& stateHelper) { invariant(logicOp == sbe::EPrimBinary::logicAnd || logicOp == sbe::EPrimBinary::logicOr); + if (!branches.empty() && logicOp == sbe::EPrimBinary::logicOr) { + // OR does not support index tracking, so we must ensure that state from the last branch + // holds only boolean value. + // NOTE: There is no technical reason for that. We could support index tracking for OR + // expression, but this would differ from the existing behaviour. + auto& [expr, _] = branches.back(); + expr = stateHelper.makeState(stateHelper.getBool(expr.extractExpr())); + } + // For AND and OR, if 'branches' only has one element, we can just return branches[0]. if (branches.size() == 1) { return std::move(branches[0]); @@ -426,28 +436,28 @@ EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, // be evaluated. In other words, the evaluation process will "short-circuit". If a branch's // filter condition is false, the branch will not produce a value and the evaluation process // will continue. The last branch doesn't have a FilterStage and will always produce a value. - auto branchFn = [logicOp](EvalExpr expr, - EvalStage stage, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator) { + auto branchFn = [logicOp, &stateHelper](EvalExpr expr, + EvalStage stage, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator) { // Create a FilterStage for each branch (except the last one). If a branch's filter // condition is true, it will "short-circuit" the evaluation process. For AND, short- // circuiting should happen if an operand evalautes to false. For OR, short-circuiting // should happen if an operand evaluates to true. - auto filterExpr = (logicOp == sbe::EPrimBinary::logicAnd) ? makeNot(expr.extractExpr()) - : expr.extractExpr(); - - stage = makeFilter<false>(std::move(stage), std::move(filterExpr), planNodeId); - // Set up an output value to be returned if short-circuiting occurs. For AND, when // short-circuiting occurs, the output returned should be false. For OR, when short- // circuiting occurs, the output returned should be true. - auto shortCircuitVal = sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Boolean, - (logicOp == sbe::EPrimBinary::logicOr)); - auto slot = slotIdGenerator->generate(); - auto resultStage = - makeProject(std::move(stage), planNodeId, slot, std::move(shortCircuitVal)); - return std::make_pair(slot, std::move(resultStage)); + auto filterExpr = stateHelper.getBool(expr.extractExpr()); + if (logicOp == sbe::EPrimBinary::logicAnd) { + filterExpr = makeNot(std::move(filterExpr)); + } + stage = makeFilter<false>(std::move(stage), std::move(filterExpr), planNodeId); + + auto resultSlot = slotIdGenerator->generate(); + auto resultValue = stateHelper.makeState(logicOp == sbe::EPrimBinary::logicOr); + stage = makeProject(std::move(stage), planNodeId, resultSlot, std::move(resultValue)); + + return std::make_pair(resultSlot, std::move(stage)); }; return generateSingleResultUnion(std::move(branches), branchFn, planNodeId, slotIdGenerator); @@ -524,4 +534,64 @@ uint32_t dateTypeMask() { getBSONTypeMask(sbe::value::TypeTags::ObjectId) | getBSONTypeMask(sbe::value::TypeTags::bsonObjectId)); } + +EvalStage IndexStateHelper::makeTraverseCombinator( + EvalStage outer, + EvalStage inner, + sbe::value::SlotId inputSlot, + sbe::value::SlotId outputSlot, + sbe::value::SlotId innerOutputSlot, + PlanNodeId planNodeId, + sbe::value::FrameIdGenerator* frameIdGenerator) const { + // Fold expression is executed only when array has more then 1 element. It increments index + // value on each iteration. During this process index is paired with false value. Once the + // predicate evaluates to true, false value of index is changed to true. Final expression of + // traverse stage detects that now index is paired with true value and it means that we have + // found an index of array element where predicate evaluates to true. + // + // First step is to increment index. Fold expression is always executed when index stored in + // 'outputSlot' is encoded as a false value. This means that to increment index, we should + // subtract 1 from it. + auto frameId = frameIdGenerator->generate(); + auto advancedIndex = sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::sub, sbe::makeE<sbe::EVariable>(outputSlot), makeConstant(ValueType, 1)); + auto binds = sbe::makeEs(std::move(advancedIndex)); + sbe::EVariable advancedIndexVar{frameId, 0}; + + // In case the predicate in the inner branch of traverse returns true, we want pair + // incremented index with true value. This will tell final expression of traverse that we + // have found a matching element and iteration can be stopped. + // The expression below express the following function: f(x) = abs(x) - 1. This function + // converts false value to a true value because f(- index - 2) = index + 1 (take a look at + // the comment for the 'IndexStateHelper' class for encoding description). + auto indexWithTrueValue = + sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::sub, + makeFunction("abs", advancedIndexVar.clone()), + makeConstant(ValueType, 1)); + + // Finally, we check if the predicate in the inner branch returned true. If that's the case, + // we pair incremented index with true value. Otherwise, it stays paired with false value. + auto foldExpr = sbe::makeE<sbe::EIf>(FilterStateHelper::getBool(innerOutputSlot), + std::move(indexWithTrueValue), + advancedIndexVar.clone()); + + foldExpr = sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(foldExpr)); + + return makeTraverse(std::move(outer), + std::move(inner), + inputSlot, + outputSlot, + innerOutputSlot, + std::move(foldExpr), + FilterStateHelper::getBool(outputSlot), + planNodeId, + 1); +} + +std::unique_ptr<FilterStateHelper> makeFilterStateHelper(bool trackIndex) { + if (trackIndex) { + return std::make_unique<IndexStateHelper>(); + } + return std::make_unique<BooleanStateHelper>(); +} } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.h b/src/mongo/db/query/sbe_stage_builder_helpers.h index d1ecffdac74..36da900927f 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.h +++ b/src/mongo/db/query/sbe_stage_builder_helpers.h @@ -186,8 +186,8 @@ inline std::unique_ptr<sbe::EExpression> makeFunction(std::string_view name, Arg } template <typename T> -inline auto makeConstant(sbe::value::TypeTags tag, T&& value) { - return sbe::makeE<sbe::EConstant>(tag, sbe::value::bitcastFrom<T>(std::forward<T>(value))); +inline auto makeConstant(sbe::value::TypeTags tag, T value) { + return sbe::makeE<sbe::EConstant>(tag, sbe::value::bitcastFrom<T>(value)); } inline auto makeConstant(std::string_view str) { @@ -333,15 +333,6 @@ EvalExprStagePair generateSingleResultUnion(std::vector<EvalExprStagePair> branc PlanNodeId planNodeId, sbe::value::SlotIdGenerator* slotIdGenerator); -/** - * Creates tree with short-circuiting for OR and AND. Each element in 'braches' argument represents - * logical expression and sub-tree generated for it. - */ -EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, - std::vector<EvalExprStagePair> branches, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator); - /** This helper takes an SBE SlotIdGenerator and an SBE Array and returns an output slot and a * unwind/project/limit/coscan subtree that streams out the elements of the array one at a time via * the output slot over a series of calls to getNext(), mimicking the output of a collection scan or @@ -372,4 +363,321 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> makeValue(const BSONArray& ba * Returns a BSON type mask of all data types coercible to date. */ uint32_t dateTypeMask(); + +/** + * Constructs local binding with inner expression built by 'innerExprFunc' and variables assigned + * to expressions from 'bindings'. + * Example usage: + * + * makeLocalBind( + * _context->frameIdGenerator, + * [](sbe::EVariable inputArray, sbe::EVariable index) { + * return <expression using inputArray and index>; + * }, + * <expression to assign to inputArray>, + * <expression to assign to index> + * ); + */ +template <typename... Bindings, + typename InnerExprFunc, + typename = std::enable_if_t< + std::conjunction_v<std::is_same<std::unique_ptr<sbe::EExpression>, Bindings>...>>> +std::unique_ptr<sbe::EExpression> makeLocalBind(sbe::value::FrameIdGenerator* frameIdGenerator, + InnerExprFunc innerExprFunc, + Bindings... bindings) { + auto frameId = frameIdGenerator->generate(); + auto binds = sbe::makeEs(); + binds.reserve(sizeof...(Bindings)); + sbe::value::SlotId lastIndex = 0; + auto convertToVariable = [&](std::unique_ptr<sbe::EExpression> expr) { + binds.emplace_back(std::move(expr)); + auto currentIndex = lastIndex; + lastIndex++; + return sbe::EVariable{frameId, currentIndex}; + }; + auto innerExpr = innerExprFunc(convertToVariable(std::move(bindings))...); + return sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(innerExpr)); +} + +/** + * Trees generated by 'generateFilter' maintain state during execution. There are two types of state + * that can be maintained: + * - Boolean. The state is just boolean value, indicating if the document matches the predicate. + * - Index. The state stores a tuple of (boolean, optional int32). + * + * Depending on the query type, one of state types can be selected to use in the tree. + * 'FilterStateHelper' class and it's descendants aim to provide unified interface to operate with + * this two types of states. + */ +class FilterStateHelper { +public: + using Expression = std::unique_ptr<sbe::EExpression>; + + virtual ~FilterStateHelper() = default; + + /** + * Returns true if state contains a value along with boolean and false otherwise. + */ + virtual bool stateContainsValue() const = 0; + + /** + * Creates a constant holding state with given boolean 'value'. Index part of the constructed + * state is empty. + */ + virtual Expression makeState(bool value) const = 0; + + /** + * Creates an expression that constructs state from 'expr'. 'expr' must evaluate to a boolean + * value. Index part of the constructed state is empty. + */ + virtual Expression makeState(Expression expr) const = 0; + + /** + * Creates an expression that constructs an initial state from 'expr'. 'expr' must evaluate to a + * boolean value. + * Initial state is used as an output value for the inner branch passed to + * 'makeTraverseCombinator'. + */ + virtual Expression makeInitialState(Expression expr) const = 0; + + /** + * Creates an expression that extracts boolean value from the state evaluated from 'expr'. + */ + virtual Expression getBool(Expression expr) const = 0; + + Expression getBool(sbe::value::SlotId slotId) const { + return getBool(sbe::makeE<sbe::EVariable>(slotId)); + } + + /** + * Implements Elvis operator. If state from 'left' expression represents true boolean value, + * returns 'left'. Otherwise, returns 'right'. + */ + virtual Expression mergeStates(Expression left, + Expression right, + sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; + + /** + * Extracts index value from the state and projects it into a separate slot. If state does not + * contain index value, slot contains Nothing. + * If state does not support storing index value, this function does nothing. + */ + virtual std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( + sbe::value::SlotId stateSlot, + EvalStage stage, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; + + /** + * Uses an expression from 'EvalExprStagePair' to construct state. Expresion must evaluate to + * boolean value. + */ + virtual EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair) const = 0; + + /** + * Creates traverse stage with fold and final expressions tuned to maintain consistent state. + * If state does support index value, records the index of a first array element for which inner + * branch returns true value. + */ + virtual EvalStage makeTraverseCombinator( + EvalStage outer, + EvalStage inner, + sbe::value::SlotId inputSlot, + sbe::value::SlotId outputSlot, + sbe::value::SlotId innerOutputSlot, + PlanNodeId planNodeId, + sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; +}; + +/** + * This class implements 'FilterStateHelper' interface for a state which can be represented as a + * tuple of (boolean, optional int32). Such tuple is encoded as a single int64 value. + * + * While we could represent such tuple as an SBE array, this approach would cost us additional + * allocations and the need to call expensive builtins such as 'getElement'. Integer operations are + * much simpler, faster and do not require any allocations. + * + * The following encoding is implemented: + * - [False, Nothing] -> -1 + * - [True, Nothing] -> 0 + * - [False, value] -> - value - 2 + * - [True, value] -> value + 1 + * + * Such encoding allows us to easily extract boolean value (just compare resulting int64 with 0) and + * requires only a few arithmetical operations to extract the index value. Furthemore, we can + * increment/decrement index value simply by incrementing/decrementing the decoded value. + */ +class IndexStateHelper : public FilterStateHelper { +public: + static constexpr sbe::value::TypeTags ValueType = sbe::value::TypeTags::NumberInt64; + + bool stateContainsValue() const override { + return true; + } + + Expression makeState(bool value) const override { + return makeConstant(ValueType, value ? 0 : -1); + } + + Expression makeState(Expression expr) const override { + return sbe::makeE<sbe::EIf>(std::move(expr), makeState(true), makeState(false)); + } + + Expression makeInitialState(Expression expr) const override { + return sbe::makeE<sbe::EIf>( + std::move(expr), makeConstant(ValueType, 1), makeConstant(ValueType, -2)); + } + + Expression getBool(Expression expr) const override { + return sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::greaterEq, std::move(expr), makeConstant(ValueType, 0)); + } + + Expression mergeStates(Expression left, + Expression right, + sbe::value::FrameIdGenerator* frameIdGenerator) const override { + return makeLocalBind(frameIdGenerator, + [&](sbe::EVariable left) { + return sbe::makeE<sbe::EIf>( + getBool(left.clone()), left.clone(), std::move(right)); + }, + std::move(left)); + } + + std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( + sbe::value::SlotId stateSlot, + EvalStage stage, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator) const override { + sbe::EVariable stateVar{stateSlot}; + auto indexSlot = slotIdGenerator->generate(); + + auto indexInt64 = sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::sub, stateVar.clone(), makeConstant(ValueType, 1)); + + auto indexInt32 = makeLocalBind( + frameIdGenerator, + [&](sbe::EVariable convertedIndex) { + return sbe::makeE<sbe::EIf>( + makeFunction("exists", convertedIndex.clone()), + convertedIndex.clone(), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5291403}, + "Cannot convert array index into int32 number")); + }, + sbe::makeE<sbe::ENumericConvert>(std::move(indexInt64), + sbe::value::TypeTags::NumberInt32)); + + auto resultStage = makeProject( + std::move(stage), + planNodeId, + indexSlot, + sbe::makeE<sbe::EIf>(sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::greater, + stateVar.clone(), + makeConstant(ValueType, 0)), + std::move(indexInt32), + makeConstant(sbe::value::TypeTags::Nothing, 0))); + return {indexSlot, std::move(resultStage)}; + } + + EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair) const override { + auto [expr, stage] = std::move(pair); + return {makeState(expr.extractExpr()), std::move(stage)}; + } + + EvalStage makeTraverseCombinator(EvalStage outer, + EvalStage inner, + sbe::value::SlotId inputSlot, + sbe::value::SlotId outputSlot, + sbe::value::SlotId innerOutputSlot, + PlanNodeId planNodeId, + sbe::value::FrameIdGenerator* frameIdGenerator) const override; +}; + +/** + * This class implements 'FilterStateHelper' interface for a plain boolean state, without index + * part. + */ +class BooleanStateHelper : public FilterStateHelper { +public: + bool stateContainsValue() const override { + return false; + } + + Expression makeState(bool value) const override { + return makeConstant(sbe::value::TypeTags::Boolean, value); + } + + Expression makeState(Expression expr) const override { + return expr; + } + + Expression makeInitialState(Expression expr) const override { + return expr; + } + + Expression getBool(Expression expr) const override { + return expr; + } + + Expression mergeStates(Expression left, + Expression right, + sbe::value::FrameIdGenerator* frameIdGenerator) const override { + return sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::logicOr, std::move(left), std::move(right)); + } + + std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( + sbe::value::SlotId stateSlot, + EvalStage stage, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator) const override { + return {stateSlot, std::move(stage)}; + } + + EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair) const override { + return pair; + } + + EvalStage makeTraverseCombinator( + EvalStage outer, + EvalStage inner, + sbe::value::SlotId inputSlot, + sbe::value::SlotId outputSlot, + sbe::value::SlotId innerOutputSlot, + PlanNodeId planNodeId, + sbe::value::FrameIdGenerator* frameIdGenerator) const override { + return makeTraverse( + std::move(outer), + std::move(inner), + inputSlot, + outputSlot, + innerOutputSlot, + sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr, + sbe::makeE<sbe::EVariable>(outputSlot), + sbe::makeE<sbe::EVariable>(innerOutputSlot)), + sbe::makeE<sbe::EVariable>(outputSlot), + planNodeId, + 1); + } +}; + +/** + * Helper function to create respective 'FilterStateHelper' implementation. If 'trackIndex' is true, + * returns 'IndexStateHelper'. Otherwise, returns 'BooleanStateHelper'. + */ +std::unique_ptr<FilterStateHelper> makeFilterStateHelper(bool trackIndex); + +/** + * Creates tree with short-circuiting for OR and AND. Each element in 'braches' argument represents + * logical expression and sub-tree generated for it. + */ +EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, + std::vector<EvalExprStagePair> branches, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator, + const FilterStateHelper& stateHelper); + } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_projection.cpp b/src/mongo/db/query/sbe_stage_builder_projection.cpp index e16fe2e9572..6dfa198785c 100644 --- a/src/mongo/db/query/sbe_stage_builder_projection.cpp +++ b/src/mongo/db/query/sbe_stage_builder_projection.cpp @@ -36,15 +36,18 @@ #include "mongo/db/exec/sbe/stages/co_scan.h" #include "mongo/db/exec/sbe/stages/filter.h" #include "mongo/db/exec/sbe/stages/limit_skip.h" +#include "mongo/db/exec/sbe/stages/loop_join.h" #include "mongo/db/exec/sbe/stages/makeobj.h" #include "mongo/db/exec/sbe/stages/project.h" #include "mongo/db/exec/sbe/stages/traverse.h" +#include "mongo/db/exec/sbe/stages/union.h" #include "mongo/db/exec/sbe/values/bson.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/query/sbe_stage_builder_expression.h" #include "mongo/db/query/sbe_stage_builder_filter.h" #include "mongo/db/query/sbe_stage_builder_helpers.h" #include "mongo/db/query/tree_walker.h" +#include "mongo/db/query/util/make_data_structure.h" #include "mongo/util/str.h" #include "mongo/util/visit_helper.h" @@ -99,6 +102,11 @@ private: EvalMode _mode; }; +struct PositionalProjectionData { + std::vector<std::string> fieldPath; + CopyableMatchExpression matchExpression; +}; + /** * Stores context across calls to visit() in the projection traversal visitors. */ @@ -178,14 +186,18 @@ struct ProjectionTraversalVisitorContext { sbe::value::FrameIdGenerator* frameIdGenerator, PlanStageType inputStage, sbe::value::SlotId inputSlot, - sbe::RuntimeEnvironment* env) + sbe::value::SlotId preImageSlot, + sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlots) : opCtx(opCtx), planNodeId(planNodeId), projectType(projectType), slotIdGenerator(slotIdGenerator), frameIdGenerator(frameIdGenerator), inputSlot(inputSlot), - env(env) { + preImageSlot(preImageSlot), + env(env), + relevantSlots(std::move(relevantSlots)) { pushLevel({}); topLevel().evalStage = std::move(inputStage); } @@ -201,16 +213,25 @@ struct ProjectionTraversalVisitorContext { // The slot to read a root document from. sbe::value::SlotId inputSlot; + sbe::value::SlotId preImageSlot; sbe::RuntimeEnvironment* env; std::stack<NestedLevel> levels; - // See the comment above the generateExpression() declaration for an explanation of the - // 'relevantSlots' list. + // These are slots that must be accessible from the root of the resulting tree. sbe::value::SlotVector relevantSlots; + // See the comment above the 'generateExpression()' declaration for an explanation of the + // 'expressionsRelevantSlots' vector. + sbe::value::SlotVector expressionsRelevantSlots; + // Flag indicating if $slice operator is used in the projection. bool hasSliceProjection = false; + + // Vector containing field names for current field path. + std::vector<std::string> currentFieldPath; + + boost::optional<PositionalProjectionData> positionalProjectionData; }; /** @@ -225,11 +246,10 @@ public: void visit(const projection_ast::ProjectionPathASTNode* node) final { _context->pushLevel({node->fieldNames().begin(), node->fieldNames().end()}); + _context->currentFieldPath.push_back(_context->topFrontField()); } - void visit(const projection_ast::ProjectionPositionalASTNode* node) final { - uasserted(4822885, str::stream() << "Positional projection is not supported in SBE"); - } + void visit(const projection_ast::ProjectionPositionalASTNode* node) final {} void visit(const projection_ast::ProjectionSliceASTNode* node) final {} @@ -257,6 +277,8 @@ public: void visit(const projection_ast::ProjectionPathASTNode* node) final { _context->popFrontField(); + _context->currentFieldPath.pop_back(); + _context->currentFieldPath.push_back(_context->topFrontField()); } void visit(const projection_ast::ProjectionPositionalASTNode* node) final {} @@ -376,7 +398,7 @@ public: _context->inputSlot, _context->env, _context->planNodeId, - &_context->relevantSlots); + &_context->expressionsRelevantSlots); _context->topLevelEvals().emplace_back(outputSlot, std::move(expr)); _context->topLevel().evalStage = std::move(stage); } @@ -386,6 +408,7 @@ public: // Remove the last field name from context and ensure that there are no more left. _context->popFrontField(); + _context->currentFieldPath.pop_back(); invariant(_context->topLevel().fields.empty()); auto [projectSlots, projectFields, restrictFields, keepFields, childLevelStage] = @@ -456,18 +479,36 @@ public: _context->topLevelEvals().emplace_back(parentLevelResultSlot, nullptr); } - void visit(const projection_ast::ProjectionPositionalASTNode* node) final {} + void visit(const projection_ast::ProjectionPositionalASTNode* node) final { + // NOTE: Positional projection operator has it's own path traversal semantics implemented in + // 'generatePositionalProjection'. But before these semantics are applied, path is extracted + // from the input object according to path traversal semantics of 'BooleanConstantASTNode'. + // This is why we add 'KeepField' to evals in this visitor. + tassert(5291404, + "positional projection cannot be used with exclusion", + _context->projectType == projection_ast::ProjectType::kInclusion); + _context->topLevelEvals().emplace_back(EvalMode::KeepField); + + const auto& children = node->children(); + invariant(children.size() == 1); + auto matchExpression = + exact_pointer_cast<projection_ast::MatchExpressionASTNode*>(children[0].get()); + invariant(matchExpression); + _context->positionalProjectionData = PositionalProjectionData{ + _context->currentFieldPath, matchExpression->matchExpression()}; + } void visit(const projection_ast::ProjectionSliceASTNode* node) final { + // NOTE: $slice projection operator has it's own path traversal semantics implemented in + // 'SliceProjectionTraversalPostVisitor'. But before these semantics are applied, path is + // extracted from the input object according to path traversal semantics of + // 'BooleanConstantASTNode'. This is why we add 'KeepField' and 'IgnoreField' to evals in + // this visitor. using namespace std::literals; auto& evals = _context->topLevelEvals(); if (_context->projectType == projection_ast::ProjectType::kInclusion) { - evals.emplace_back( - _context->slotIdGenerator->generate(), - makeFunction("getField"sv, - sbe::makeE<sbe::EVariable>(_context->topLevel().inputSlot), - makeConstant(_context->topFrontField()))); + evals.emplace_back(EvalMode::KeepField); } else { // For exclusion projection we do need to project current field manually, it will be // included in the input document anyway. @@ -510,7 +551,7 @@ public: invariant(elemMatchObject); invariant(elemMatchObject->numChildren() == 1); auto elemMatchPredicate = elemMatchObject->getChild(0); - auto elemMatchPredicateTree = + auto [_, elemMatchPredicateTree] = generateFilter(_context->opCtx, elemMatchPredicate, makeLimitCoScanTree(_context->planNodeId), @@ -540,15 +581,16 @@ public: auto clonedChild = elemMatchValue->getChild(i)->shallowClone(); topLevelAnd->add(std::move(clonedChild)); } - return generateFilter(_context->opCtx, - topLevelAnd.get(), - makeLimitCoScanTree(_context->planNodeId), - _context->slotIdGenerator, - _context->frameIdGenerator, - inputArraySlot, - _context->env, - sbe::makeSV(), - _context->planNodeId); + auto [_, stage] = generateFilter(_context->opCtx, + topLevelAnd.get(), + makeLimitCoScanTree(_context->planNodeId), + _context->slotIdGenerator, + _context->frameIdGenerator, + inputArraySlot, + _context->env, + sbe::makeSV(), + _context->planNodeId); + return std::move(stage); } else { MONGO_UNREACHABLE; } @@ -659,6 +701,7 @@ public: // Remove the last field name from context and ensure that there are no more left. _context->popFrontField(); + _context->currentFieldPath.pop_back(); invariant(_context->topLevel().fields.empty()); // All field paths without $slice operator are marked using 'EvalMode::IgnoreField' (see @@ -776,7 +819,12 @@ public: _context->topLevelEvals().emplace_back(parentLevelResultSlot, nullptr); } - void visit(const projection_ast::ProjectionPositionalASTNode* node) final {} + void visit(const projection_ast::ProjectionPositionalASTNode* node) final { + // This expression is already built in the 'ProjectionTraversalPostVisitor'. We push an + // empty eval to match the size of 'evals' vector on the current level with the count of + // fields. + _context->topLevelEvals().emplace_back(EvalMode::IgnoreField); + } void visit(const projection_ast::ProjectionSliceASTNode* node) final { using namespace std::literals; @@ -859,6 +907,309 @@ private: projection_ast::ProjectionASTConstVisitor* _inVisitor; projection_ast::ProjectionASTConstVisitor* _postVisitor; }; + +/** + * Generates expression that applies positional projection operator to the array stored in the + * 'inputSlot' using optional index from 'maybeIndexSlot'. + * If 'maybeIndexSlot' is boost::none, generates expression that always returns error. Otherwise, + * generates expression that looks like this: + * + * if isArray(inputSlot) { + * if exists(indexSlot) { + * let [subArray = extractSubArray(inputArray, 1, indexSlot)] + * if isArrayEmpty(subArray) { + * fail() + * } else { + * return subArray + * } + * } else { + * fail() + * } + * } else { + * return Nothing + * } + */ +ExpressionType generateApplyPositionalProjectionExpr( + boost::optional<sbe::value::SlotId> maybeIndexSlot, + sbe::value::SlotId inputSlot, + sbe::value::FrameIdGenerator* frameIdGenerator) { + auto indexIsNotDefinedError = sbe::makeE<sbe::EFail>( + ErrorCodes::Error{5291401}, + "positional operator '.$' couldn't find a matching element in the array"); + if (!maybeIndexSlot) { + return indexIsNotDefinedError; + } + + sbe::EVariable indexVar{*maybeIndexSlot}; + sbe::EVariable inputArray{inputSlot}; + auto subArrayWithElement = makeFunction("extractSubArray", + inputArray.clone(), + makeConstant(sbe::value::TypeTags::NumberInt32, 1), + indexVar.clone()); + + auto checkSubArrayEmpty = + makeLocalBind(frameIdGenerator, + [&](sbe::EVariable subArrayWithElement) { + return sbe::makeE<sbe::EIf>( + makeFunction("isArrayEmpty", subArrayWithElement.clone()), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{5291402}, + "positional operator '.$' element mismatch"), + subArrayWithElement.clone()); + }, + std::move(subArrayWithElement)); + + auto checkIndex = sbe::makeE<sbe::EIf>(makeFunction("exists", indexVar.clone()), + std::move(checkSubArrayEmpty), + std::move(indexIsNotDefinedError)); + + return sbe::makeE<sbe::EIf>(makeFunction("isArray", inputArray.clone()), + std::move(checkIndex), + makeConstant(sbe::value::TypeTags::Nothing, 0)); +} + +/** + * Generates tree that does path traversal according to positional projection operator semantics. + */ +std::pair<sbe::value::SlotId, PlanStageType> generatePositionalProjection( + PlanStageType inputStage, + const PositionalProjectionData& data, + OperationContext* opCtx, + PlanNodeId planNodeId, + sbe::value::SlotIdGenerator* slotIdGenerator, + sbe::value::FrameIdGenerator* frameIdGenerator, + sbe::value::SlotId postImageSlot, + sbe::value::SlotId preImageSlot, + sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlots) { + // First step is to generate filter tree that will record an array index for positional + // projection. + auto [maybeIndexSlot, indexStage] = generateFilter(opCtx, + &*data.matchExpression, + makeLimitCoScanTree(planNodeId), + slotIdGenerator, + frameIdGenerator, + preImageSlot, + env, + sbe::makeSV(), + planNodeId, + true /* trackIndex */); + + // The index slot is optional because there are certain queries that do not support index + // tracking (see 'generateFilter' declaration). For such queries we do not want to include + // stages generated by this function since we will not use any output from them. + // If index slot is defined, we join 'indexStage' with 'inputStage' using loop-join below. + // Otherwise, we do not use 'indexStage' at all. + if (maybeIndexSlot) { + auto outerProjectedSlots = relevantSlots; + outerProjectedSlots.push_back(postImageSlot); + inputStage = sbe::makeS<sbe::LoopJoinStage>(std::move(inputStage), + std::move(indexStage), + std::move(outerProjectedSlots), + sbe::makeSV(preImageSlot), + nullptr, + planNodeId); + } + + // Second step is to implement path traversal semantics for positional projection operator. The + // general idea is that for each of the components in field path we: + // - Extract respective field + // - If extracted value is not an object and not an array, we return it unchanged + // - If extracted value is an object, we pass it to the next component of the field path + // - If extracted value is an array, we apply positional projection operator to it and return + // the result + // + // For each component there are four main slots: + // - 'inputDocumentSlot'. This slot stores document containing current field. + // - 'extractedValueSlot'. The value corresponding to the current field is stored in this slot. + // - 'nextFieldResultSlot'. This is the result from the next field. If there is a field path + // 'a.b.c.$' and the current field is 'b', 'nextFieldResultSlot' stores result from + // evaluating field 'c'. Note that the loop below goes from field 'c' to field 'a', backwards + // - 'currentFieldResultSlot'. This slot stores result from evaluating the current field. + auto extractedValueSlot = slotIdGenerator->generate(); + sbe::value::SlotId nextFieldResultSlot; + PlanStageType resultStage; + const auto& fieldPath = data.fieldPath; + for (auto it = fieldPath.rbegin(); it != fieldPath.rend(); it++) { + const auto fieldName = *it; + // First and last terminology is applied to reading field paths from left to right. In + // the field path 'a.b.c.$', 'a' is a first field and 'c' is the last one. + const bool isFirstField = std::next(it) == fieldPath.rend(); + const bool isLastField = it == fieldPath.rbegin(); + + sbe::value::SlotId inputDocumentSlot; + PlanStageType fromBranch; + if (isFirstField) { + // For the first field the input document is the post-image document itself. + inputDocumentSlot = postImageSlot; + fromBranch = std::move(inputStage); + } else { + // For all other fields input document will be extracted manually. + inputDocumentSlot = slotIdGenerator->generate(); + fromBranch = makeLimitCoScanTree(planNodeId); + } + + // Construct 'from' branch of the loop-join stage below. Simply extract current field value + // from the input document. + fromBranch = + sbe::makeProjectStage(std::move(fromBranch), + planNodeId, + extractedValueSlot, + makeFunction("getField", + sbe::makeE<sbe::EVariable>(inputDocumentSlot), + makeConstant(fieldName))); + + // Construct 'in' branch of the loop-join stage below. This branch is responsible for what + // we do with the extracted value: apply positional projection, go deeper into the object + // or return the value unchanged. + auto projectionResultSlot = slotIdGenerator->generate(); + auto inBranch = + sbe::makeProjectStage(makeLimitCoScanTree(planNodeId), + planNodeId, + projectionResultSlot, + generateApplyPositionalProjectionExpr( + maybeIndexSlot, extractedValueSlot, frameIdGenerator)); + + sbe::value::SlotId fieldValueSlot = projectionResultSlot; + if (!isLastField) { + // All fields except the last one have the option to pass the extracted value to the + // next field. Branch stage below checks the type of the extracted value. If it is an + // array, we apply positional projection operator. Otherwise, we pass the value to the + // next field. + invariant(resultStage); + fieldValueSlot = slotIdGenerator->generate(); + inBranch = sbe::makeS<sbe::BranchStage>( + std::move(inBranch), + std::move(resultStage), + makeFunction("isArray", sbe::makeE<sbe::EVariable>(extractedValueSlot)), + sbe::makeSV(projectionResultSlot), + sbe::makeSV(nextFieldResultSlot), + sbe::makeSV(fieldValueSlot), + planNodeId); + } + + // After we have computed a new field value (either by applying positional projection or by + // getting result from the next field), we construct a new object where current field has + // this new value. + auto modifiedObjectSlot = slotIdGenerator->generate(); + inBranch = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inBranch), + modifiedObjectSlot, + inputDocumentSlot, + sbe::MakeBsonObjStage::FieldBehavior::drop, + std::vector<std::string>{}, + std::vector<std::string>{fieldName}, + sbe::makeSV(fieldValueSlot), + false, + false, + planNodeId); + + // Top branch stage is constructed differently for the last field and others. + // For the last field, 'inBranch' is containing 'mkobj / project' stages at this point, + // expecting an array to be stored in 'extractedValueSlot'. This means that top branch must + // check if 'extractedValueSlot' is actually an array and return the value unchanged + // otherwise. + // For all other fields, 'inBranch' is containing 'mkobj / branch / project' stages at this + // point, expecting an array or object to be stored in 'extractedValueSlot'. In this case, + // top branch must check if 'extractedValueSlot' is actually an array or object and return + // the value unchanged otherwise. + auto applyProjectionCondition = + makeFunction("isArray", sbe::makeE<sbe::EVariable>(extractedValueSlot)); + if (!isLastField) { + applyProjectionCondition = sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::logicOr, + std::move(applyProjectionCondition), + makeFunction("isObject", sbe::makeE<sbe::EVariable>(extractedValueSlot))); + } + + // We should also check that current field exists in the 'inputDocumentSlot' and return the + // value unchanged if not. + applyProjectionCondition = sbe::makeE<sbe::EPrimBinary>( + sbe::EPrimBinary::logicAnd, + makeFunction("exists", sbe::makeE<sbe::EVariable>(extractedValueSlot)), + std::move(applyProjectionCondition)); + + // Finally, we construct the top stage of the 'in' branch for the loop-join stage below. + // This branch stage checks the condition constructed above and returns the + // 'inputDocumentSlot' unchanged if this condition is false. + auto currentFieldResultSlot = slotIdGenerator->generate(); + inBranch = sbe::makeS<sbe::BranchStage>(std::move(inBranch), + makeLimitCoScanTree(planNodeId), + std::move(applyProjectionCondition), + sbe::makeSV(modifiedObjectSlot), + sbe::makeSV(inputDocumentSlot), + sbe::makeSV(currentFieldResultSlot), + planNodeId); + + // Construct the loop-join stage. + // Final tree for the last field looks like this: + // + // nlj correlatedSlots = [extractedValueSlot, inputDocumentSlot] + // left + // project extractedValueSlot = getField(inputDocumentSlot, fieldName) + // <limit-1/coscan or stage constructed by 'generateFilter' or 'inputStage'> + // right + // branch + // condition = exists(extractedValueSlot) && isArray(extractedValueSlot), + // result = currentFieldResultSlot + // [modifiedObjectSlot] then + // mkbson fieldName = projectionResultSlot + // project projectionResultSlot = <position projecton expr> + // limit 1 + // coscan + // [inputDocumentSlot] else + // limit 1 + // coscan + // + // Final tree for all other fields looks like this: + // + // nlj correlatedSlots = [extractedValueSlot, inputDocumentSlot] + // left + // project extractedValueSlot = getField(inputDocumentSlot, fieldName) + // <limit-1/coscan or stage constructed by 'generateFilter' or 'inputStage'> + // right + // branch + // condition = exists(extractedValueSlot) && isArrayOrObject(extractedValueSlot) + // result = currentFieldResultSlot + // [modifiedObjectSlot] then + // mkbson fieldName = fieldValueSlot + // branch condition = isArray(extractedValueSlot)} + // [projectionResultSlot] then + // project projectionResultSlot = <position projecton expr> + // limit 1 + // coscan + // [nextFieldResultSlot] else + // <resultStage> + // [inputDocumentSlot] else + // limit 1 + // coscan + auto outerProjectedSlots = sbe::makeSV(); + auto correlatedSlots = sbe::makeSV(extractedValueSlot, inputDocumentSlot); + if (isFirstField) { + // Loop-join stage constructed for the first field will be the top stage of the whole + // resulting tree. It needs to respect relevant slots and propagate index slot if it is + // defined. + outerProjectedSlots = relevantSlots; + if (maybeIndexSlot) { + correlatedSlots.push_back(*maybeIndexSlot); + } + } + resultStage = sbe::makeS<sbe::LoopJoinStage>(std::move(fromBranch), + std::move(inBranch), + std::move(outerProjectedSlots), + std::move(correlatedSlots), + nullptr, + planNodeId); + + // Exchange slots to hold the invariant. The field on the next iteration is located to the + // left of the current one, it can be considered previous to the current one. This previous + // field should extract it's field value into the 'inputDocumentSlot' for the current field. + // Also, from the previous field perspective current field is the next one, so we should + // store 'currentFieldResultSlot' in 'nextFieldResultSlot'. + extractedValueSlot = inputDocumentSlot; + nextFieldResultSlot = currentFieldResultSlot; + } + + return {nextFieldResultSlot, std::move(resultStage)}; +} } // namespace std::pair<sbe::value::SlotId, PlanStageType> generateProjection( @@ -869,6 +1220,7 @@ std::pair<sbe::value::SlotId, PlanStageType> generateProjection( sbe::value::FrameIdGenerator* frameIdGenerator, sbe::value::SlotId inputVar, sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlots, PlanNodeId planNodeId) { ProjectionTraversalVisitorContext context{opCtx, planNodeId, @@ -877,7 +1229,9 @@ std::pair<sbe::value::SlotId, PlanStageType> generateProjection( frameIdGenerator, std::move(stage), inputVar, - env}; + inputVar, + env, + relevantSlots}; ProjectionTraversalPreVisitor preVisitor{&context}; ProjectionTraversalInVisitor inVisitor{&context}; ProjectionTraversalPostVisitor postVisitor{&context}; @@ -898,7 +1252,9 @@ std::pair<sbe::value::SlotId, PlanStageType> generateProjection( frameIdGenerator, std::move(resultStage), resultSlot, - env}; + inputVar, + env, + relevantSlots}; ProjectionTraversalPreVisitor slicePreVisitor{&sliceContext}; ProjectionTraversalInVisitor sliceInVisitor{&sliceContext}; SliceProjectionTraversalPostVisitor slicePostVisitor{&sliceContext}; @@ -907,6 +1263,29 @@ std::pair<sbe::value::SlotId, PlanStageType> generateProjection( std::tie(resultSlot, resultStage) = sliceContext.done(); } + if (context.positionalProjectionData) { + // Positional projection operator has different path traversal semantics compared to other + // operators. It goes along the path until it meets an array. Once the array is detected, it + // extracts the array element using index recorded by query predicate. Path traversal is + // stopped after this. + // To implement these semantics we build another tree on top of the existing one. This tree + // applies positional projection operator to the post-image object. + // Existing visitor pattern is not suitable for this operator because it has a different + // evaluation model. Positional projection must be applied to the first array it meets on + // the path, while other operators are applied only to the leaf path node. + std::tie(resultSlot, resultStage) = + generatePositionalProjection(std::move(resultStage), + *context.positionalProjectionData, + opCtx, + planNodeId, + slotIdGenerator, + frameIdGenerator, + resultSlot, /* postImageSlot */ + inputVar, /* preImageSlot */ + env, + relevantSlots); + } + return {resultSlot, std::move(resultStage)}; } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_projection.h b/src/mongo/db/query/sbe_stage_builder_projection.h index 6feeb3d7c1d..1c9decf686a 100644 --- a/src/mongo/db/query/sbe_stage_builder_projection.h +++ b/src/mongo/db/query/sbe_stage_builder_projection.h @@ -49,6 +49,7 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateProjectio sbe::value::FrameIdGenerator* frameIdGenerator, sbe::value::SlotId inputVar, sbe::RuntimeEnvironment* env, + sbe::value::SlotVector relevantSlots, PlanNodeId planNodeId); } // namespace mongo::stage_builder diff --git a/src/mongo/shell/assert.js b/src/mongo/shell/assert.js index 4cf065304ba..e4d74097830 100644 --- a/src/mongo/shell/assert.js +++ b/src/mongo/shell/assert.js @@ -729,12 +729,13 @@ assert = (function() { return res; } - const kAnyErrorCode = Object.create(null); + assert._kAnyErrorCode = Object.create(null); + function _assertCommandFailed(res, expectedCode, msg) { _validateAssertionMessage(msg); _validateCommandResponse(res, "commandFailed"); - if (expectedCode !== kAnyErrorCode && !Array.isArray(expectedCode)) { + if (expectedCode !== assert._kAnyErrorCode && !Array.isArray(expectedCode)) { expectedCode = [expectedCode]; } @@ -745,7 +746,7 @@ assert = (function() { }; const makeFailCodeMsg = () => { - return (expectedCode !== kAnyErrorCode) + return (expectedCode !== assert._kAnyErrorCode) ? _buildAssertionMessage(msg, "command did not fail with any of the following codes " + tojson(expectedCode) + " " + tojson(res)) @@ -759,7 +760,7 @@ assert = (function() { // A WriteCommandError implies ok:0. // Error objects may have a `code` property added (e.g. // DBCollection.prototype.mapReduce) without a `ok` property. - if (expectedCode !== kAnyErrorCode) { + if (expectedCode !== assert._kAnyErrorCode) { if (!res.hasOwnProperty("code") || !expectedCode.includes(res.code)) { doassert(makeFailCodeMsg(), res); } @@ -771,7 +772,7 @@ assert = (function() { doassert(makeFailMsg(), res); } - if (expectedCode !== kAnyErrorCode) { + if (expectedCode !== assert._kAnyErrorCode) { let foundCode = false; if (res.hasOwnProperty("code") && expectedCode.includes(res.code)) { foundCode = true; @@ -824,7 +825,7 @@ assert = (function() { }; assert.commandFailed = function(res, msg) { - return _assertCommandFailed(res, kAnyErrorCode, msg); + return _assertCommandFailed(res, assert._kAnyErrorCode, msg); }; // expectedCode can be an array of possible codes. @@ -866,7 +867,7 @@ assert = (function() { }; assert.writeError = function(res, msg) { - return assert.writeErrorWithCode(res, kAnyErrorCode, msg); + return assert.writeErrorWithCode(res, assert._kAnyErrorCode, msg); }; // If expectedCode is an array then this asserts that the found code is one of the codes in @@ -908,7 +909,7 @@ assert = (function() { } } - if (!errMsg && expectedCode !== kAnyErrorCode) { + if (!errMsg && expectedCode !== assert._kAnyErrorCode) { if (!Array.isArray(expectedCode)) { expectedCode = [expectedCode]; } |