summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Boros <ian.boros@mongodb.com>2023-02-17 18:43:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-03-10 23:23:36 +0000
commit3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e (patch)
tree0cfc867008eeca940d9f48b552d5dfc0a7dddbbf
parent5034e23d2044179ff56b6853ae0b398e88d3e651 (diff)
downloadmongo-3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e.tar.gz
SERVER-71037 SBE top-k sort improvements
-rw-r--r--jstests/core/query/sort/sort5.js57
-rw-r--r--jstests/core/query/sort/sort_dotted_paths.js168
-rw-r--r--jstests/core/query/sort/sort_dotted_paths_collation.js149
-rw-r--r--src/mongo/db/exec/field_name_bloom_filter.h75
-rw-r--r--src/mongo/db/exec/sbe/expressions/expression.cpp6
-rw-r--r--src/mongo/db/exec/sbe/stages/scan.cpp6
-rw-r--r--src/mongo/db/exec/sbe/stages/scan.h3
-rw-r--r--src/mongo/db/exec/sbe/values/sort_spec.h56
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp64
-rw-r--r--src/mongo/db/exec/sbe/values/value.h15
-rw-r--r--src/mongo/db/exec/sbe/values/value_printer.cpp10
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp62
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.h7
-rw-r--r--src/mongo/db/index/sort_key_generator.cpp168
-rw-r--r--src/mongo/db/index/sort_key_generator.h89
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp51
16 files changed, 752 insertions, 234 deletions
diff --git a/jstests/core/query/sort/sort5.js b/jstests/core/query/sort/sort5.js
index 01a267bf6b6..36b949bb807 100644
--- a/jstests/core/query/sort/sort5.js
+++ b/jstests/core/query/sort/sort5.js
@@ -1,36 +1,43 @@
+(function() {
// test compound sorting
-var t = db.sort5;
+const t = db.sort5;
t.drop();
-t.save({_id: 5, x: 1, y: {a: 5, b: 4}});
-t.save({_id: 7, x: 2, y: {a: 7, b: 3}});
-t.save({_id: 2, x: 3, y: {a: 2, b: 3}});
-t.save({_id: 9, x: 4, y: {a: 9, b: 3}});
+assert.commandWorked(t.insert({_id: 5, x: 1, y: {a: 5, b: 4}}));
+assert.commandWorked(t.insert({_id: 7, x: 2, y: {a: 7, b: 3}}));
+assert.commandWorked(t.insert({_id: 2, x: 3, y: {a: 2, b: 3}}));
+assert.commandWorked(t.insert({_id: 9, x: 4, y: {a: 9, b: 3}}));
-assert.eq([4, 2, 3, 1],
- t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) {
- return z.x;
- }),
- "A no index");
+function testSortAndSortWithLimit(expected, sortPattern, description) {
+ // Test sort and sort with limit.
+ assert.eq(expected,
+ t.find().sort(sortPattern).map(function(z) {
+ return z.x;
+ }),
+ description);
+
+ assert.eq(expected,
+ t.find().sort(sortPattern).limit(500).map(function(z) {
+ return z.x;
+ }),
+ description);
+}
+
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A no index");
t.createIndex({"y.b": 1, "y.a": -1});
-assert.eq([4, 2, 3, 1],
- t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) {
- return z.x;
- }),
- "A index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A index");
assert(t.validate().valid, "A valid");
// test sorting on compound key involving _id
-assert.eq([4, 2, 3, 1],
- t.find().sort({"y.b": 1, _id: -1}).map(function(z) {
- return z.x;
- }),
- "B no index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B no index");
t.createIndex({"y.b": 1, "_id": -1});
-assert.eq([4, 2, 3, 1],
- t.find().sort({"y.b": 1, _id: -1}).map(function(z) {
- return z.x;
- }),
- "B index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B index");
assert(t.validate().valid, "B valid");
+
+function testWithProj(proj, sort, expected) {
+ assert.eq(t.find({}, proj).sort(sort).map((doc) => doc._id), expected);
+ assert.eq(t.find({}, proj).sort(sort).limit(500).map((doc) => doc._id), expected);
+}
+testWithProj({"y.a": 1, "y.b": 1, _id: 1}, {"y.a": 1, "y.b": 1, _id: 1}, [2, 5, 7, 9]);
+})();
diff --git a/jstests/core/query/sort/sort_dotted_paths.js b/jstests/core/query/sort/sort_dotted_paths.js
index 1ce7ad2533e..2db3ba5683c 100644
--- a/jstests/core/query/sort/sort_dotted_paths.js
+++ b/jstests/core/query/sort/sort_dotted_paths.js
@@ -15,6 +15,11 @@
const coll = db.sort_dotted_paths;
coll.drop();
+function testSortAndSortWithLimit(sortPattern, expectedIds) {
+ assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray());
+ assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray());
+}
+
// Basic tests to verify that sorting deals with undefined, null, missing fields, and nested arrays
// as expected.
assert.commandWorked(coll.insert([
@@ -29,20 +34,20 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [undefined]}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: 1, _id: 1},
[{_id: 2}, {_id: 5}, {_id: 9}, {_id: 3}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 4}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: -1, _id: 1},
[{_id: 7}, {_id: 4}, {_id: 1}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 2}, {_id: 5}, {_id: 9}]);
// Test out sort({"a.0":1}) on a collection of documents where field 'a' is a mix of different
// types (arrays of varying size for some documents, non-arrays for other documents).
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0": 1, _id: 1},
[{_id: 9}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 6}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0": -1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 9}]);
// Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array
@@ -60,17 +65,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: 2, b: 3}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: 1, b: 1, _id: 1},
[{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: 1, b: -1, _id: 1},
[{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: -1, b: 1, _id: 1},
[{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: -1, b: -1, _id: 1},
[{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]);
// Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one
@@ -113,17 +118,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: []}, {b: [1, 3]}]}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, _id: 1},
[{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, _id: -1},
[{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, _id: -1},
[{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]);
// Basic tests for a sort pattern that contains two paths of length 2 with a common prefix.
@@ -140,33 +145,32 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: 8, c: 2}, {b: 6, c: 3}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": 1, _id: 1},
[{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": -1, _id: 1},
[{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": 1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": -1, _id: 1},
[{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
// Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where
// one path is a prefix of the other path.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(),
- [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}],
- () => tojson(coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).explain()));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, a: 1, _id: 1},
+ [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
+testSortAndSortWithLimit(
+ {"a.c": 1, a: -1, _id: 1},
[{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, a: 1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, a: -1, _id: 1},
[{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
// Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least
@@ -199,47 +203,47 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: [8], c: 2}, {b: [6], c: 3}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": 1, _id: 1},
[{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": -1, _id: 1},
[{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": 1, _id: 1},
[{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": -1, _id: 1},
[{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]);
// More tests for a sort pattern that contains a path of length 2 and a path of length 1, where
// one path is a prefix of the other path.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, a: 1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, a: -1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, a: 1, _id: 1},
[{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, a: -1, _id: 1},
[{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]);
// Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are
// a mix of different types.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": 1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": 1, _id: -1},
[{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": -1, _id: 1},
[{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": -1, _id: -1},
[{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]);
// Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix.
@@ -256,17 +260,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: {b: 3}, c: [{d: [8]}, {d: [6]}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, "c.d": 1, _id: 1},
[{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, "c.d": -1, _id: 1},
[{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, "c.d": 1, _id: 1},
[{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, "c.d": -1, _id: 1},
[{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]);
// Basic tests for a sort pattern that contains a path of length 3.
@@ -283,16 +287,16 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: [{c: []}, {c: [1, 3]}]}]}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": 1, _id: 1},
[{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": 1, _id: -1},
[{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": -1, _id: 1},
[{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": -1, _id: -1},
[{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]);
})();
diff --git a/jstests/core/query/sort/sort_dotted_paths_collation.js b/jstests/core/query/sort/sort_dotted_paths_collation.js
index 10111a95441..b0b91f7126d 100644
--- a/jstests/core/query/sort/sort_dotted_paths_collation.js
+++ b/jstests/core/query/sort/sort_dotted_paths_collation.js
@@ -25,6 +25,11 @@ const caseInsensitive = {
};
assert.commandWorked(db.createCollection(coll.getName(), caseInsensitive));
+function testSortAndSortWithLimit(sortPattern, expectedIds) {
+ assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray());
+ assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray());
+}
+
// Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array
// for other documents.
assert.commandWorked(coll.insert([
@@ -39,17 +44,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: "b", b: "C"}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: 1, b: 1, _id: 1},
[{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: 1, b: -1, _id: 1},
[{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: -1, b: 1, _id: 1},
[{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {a: -1, b: -1, _id: 1},
[{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]);
// Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one
@@ -94,17 +99,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: []}, {b: ["a", "C"]}]}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, _id: 1},
[{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, _id: -1},
[{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, _id: -1},
[{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]);
// Basic tests for a sort pattern that contains two paths of length 2 with a common prefix.
@@ -122,32 +127,32 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: "h", c: "b"}, {b: "F", c: "C"}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": 1, _id: 1},
[{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": -1, _id: 1},
[{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": 1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": -1, _id: 1},
[{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
// Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where
// one path is a prefix of the other path.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, a: 1, _id: 1},
[{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, a: -1, _id: 1},
[{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, a: 1, _id: 1},
[{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, a: -1, _id: 1},
[{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
// Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least
@@ -183,47 +188,47 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: ["h"], c: "b"}, {b: ["F"], c: "C"}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": 1, _id: 1},
[{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": 1, "a.b": -1, _id: 1},
[{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": 1, _id: 1},
[{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.c": -1, "a.b": -1, _id: 1},
[{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]);
// More tests for a sort pattern that contains a path of length 2 and a path of length 1, where
// one path is a prefix of the other path.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, a: 1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, a: -1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, a: 1, _id: 1},
[{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, a: -1, _id: 1},
[{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]);
// Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are
// a mix of different types.
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": 1, _id: 1},
[{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": 1, _id: -1},
[{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": -1, _id: 1},
[{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.0.b": -1, _id: -1},
[{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]);
// Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix.
@@ -241,17 +246,17 @@ assert.commandWorked(coll.insert([
{_id: 9, a: {b: "C"}, c: [{d: ["h"]}, {d: ["F"]}]},
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, "c.d": 1, _id: 1},
[{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": 1, "c.d": -1, _id: 1},
[{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, "c.d": 1, _id: 1},
[{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b": -1, "c.d": -1, _id: 1},
[{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]);
// Basic tests for a sort pattern that contains a path of length 3.
@@ -269,16 +274,16 @@ assert.commandWorked(coll.insert([
{_id: 9, a: [{b: [{c: []}, {c: ["a", "C"]}]}]}
]));
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": 1, _id: 1},
[{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": 1, _id: -1},
[{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": -1, _id: 1},
[{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]);
-assert.eq(
- coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+ {"a.b.c": -1, _id: -1},
[{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]);
})();
diff --git a/src/mongo/db/exec/field_name_bloom_filter.h b/src/mongo/db/exec/field_name_bloom_filter.h
new file mode 100644
index 00000000000..e53978806d0
--- /dev/null
+++ b/src/mongo/db/exec/field_name_bloom_filter.h
@@ -0,0 +1,75 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mongo {
+typedef uint64_t (*FieldNameBloomFilterHasher)(const char*, size_t);
+inline uint64_t fieldNameBloomFilterDefaultHash(const char* str, size_t len) {
+ const auto shift = static_cast<unsigned char>(str[len / 2]) & 63u;
+ return uint64_t{1} << shift;
+}
+
+/*
+ * Tiny bloom filter intended to be used when scanning a list of [key, value]
+ * pairs (i.e. bson).
+ */
+template <FieldNameBloomFilterHasher HashFn = fieldNameBloomFilterDefaultHash>
+class FieldNameBloomFilter {
+public:
+ /**
+ * Inserts a string to the bloom filter.
+ */
+ void insert(const char* str, size_t len) {
+ _data = _data | HashFn(str, len);
+ }
+
+ /**
+ * If false is returned, the given string was definitely not inserted.
+ * If true is returned, the given string may have been inserted.
+ */
+ bool maybeContains(const char* str, size_t len) const {
+ return static_cast<bool>(_data & HashFn(str, len));
+ }
+
+ /**
+ * Similar to maybeContains() but the caller can compute a hash themselves and check
+ * whether anything with that hash was maybe-inserted.
+ */
+ bool maybeContainsHash(uint64_t hash) {
+ return _data & hash;
+ }
+
+private:
+ uint64_t _data = 0;
+};
+} // namespace mongo
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index e25df50a847..43833ecb1b1 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -715,6 +715,12 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = {
{"ftsMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::ftsMatch, false}},
{"generateSortKey",
BuiltinFn{[](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateSortKey, false}},
+ {"generateCheapSortKey",
+ BuiltinFn{
+ [](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateCheapSortKey, false}},
+ {"sortKeyComponentVectorGetElement",
+ BuiltinFn{
+ [](size_t n) { return n == 2; }, vm::Builtin::sortKeyComponentVectorGetElement, false}},
{"tsSecond", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsSecond, false}},
{"tsIncrement", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsIncrement, false}},
{"typeMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::typeMatch, false}},
diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp
index 56ec29cfe13..5175529f99a 100644
--- a/src/mongo/db/exec/sbe/stages/scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/scan.cpp
@@ -85,12 +85,10 @@ ScanStage::ScanStage(UUID collectionUuid,
_fields.end()));
// We cannot use a random cursor if we are seeking or requesting a reverse scan.
invariant(!_useRandomCursor || (!_seekKeySlot && _forward));
- // Initialize _fieldsBloomFilter.
- _fieldsBloomFilter = 0;
for (size_t idx = 0; idx < _fields.size(); ++idx) {
const char* str = _fields[idx].c_str();
auto len = _fields[idx].size();
- _fieldsBloomFilter = _fieldsBloomFilter | computeFieldMask(str, len);
+ _fieldsBloomFilter.insert(str, len);
}
}
@@ -512,7 +510,7 @@ PlanState ScanStage::getNext() {
// the field.
auto field = bson::fieldNameAndLength(bsonElement);
const size_t offset = computeFieldMaskOffset(field.rawData(), field.size());
- if (!(_fieldsBloomFilter & computeFieldMask(offset))) {
+ if (!(_fieldsBloomFilter.maybeContainsHash(computeFieldMask(offset)))) {
bsonElement = bson::advance(bsonElement, field.size());
continue;
}
diff --git a/src/mongo/db/exec/sbe/stages/scan.h b/src/mongo/db/exec/sbe/stages/scan.h
index 4c358efdd75..6642c4ce1e8 100644
--- a/src/mongo/db/exec/sbe/stages/scan.h
+++ b/src/mongo/db/exec/sbe/stages/scan.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/config.h"
+#include "mongo/db/exec/field_name_bloom_filter.h"
#include "mongo/db/exec/sbe/expressions/expression.h"
#include "mongo/db/exec/sbe/expressions/runtime_environment.h"
#include "mongo/db/exec/sbe/stages/collection_helpers.h"
@@ -217,7 +218,7 @@ private:
// the accessor quickly rather than having to look it up in the _fieldAccessors hashtable.
value::SlotAccessor* _tsFieldAccessor{nullptr};
- uint64_t _fieldsBloomFilter{0};
+ FieldNameBloomFilter<computeFieldMask> _fieldsBloomFilter;
RecordId _recordId;
diff --git a/src/mongo/db/exec/sbe/values/sort_spec.h b/src/mongo/db/exec/sbe/values/sort_spec.h
index af48415c539..fad29a4f09f 100644
--- a/src/mongo/db/exec/sbe/values/sort_spec.h
+++ b/src/mongo/db/exec/sbe/values/sort_spec.h
@@ -29,26 +29,56 @@
#pragma once
+#include "mongo/bson/ordering.h"
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/index/btree_key_generator.h"
+#include "mongo/db/index/sort_key_generator.h"
+#include "mongo/db/query/sort_pattern.h"
namespace mongo::sbe::value {
/**
* SortSpec is a wrapper around a BSONObj giving a sort pattern (encoded as a BSONObj), a collator,
- * and a BtreeKeyGenerator object.
+ * and a SortKeyGenerator object.
*/
class SortSpec {
public:
- SortSpec(const BSONObj& sortPattern)
- : _sortPattern(sortPattern.getOwned()), _keyGen(initKeyGen()) {}
- SortSpec(const SortSpec& other) : _sortPattern(other._sortPattern), _keyGen(initKeyGen()) {}
+ SortSpec(const BSONObj& sortPatternBson)
+ : _sortPatternBson(sortPatternBson.getOwned()),
+ _sortPattern(_sortPatternBson, nullptr /* expCtx, needed for meta sorts */),
+ _sortKeyGen(_sortPattern, nullptr /* collator */) {
+ _localBsonEltStorage.resize(_sortPattern.size());
+ _localSortKeyComponentStorage.elts.resize(_sortPattern.size());
+ }
+ SortSpec(const SortSpec& other)
+ : _sortPatternBson(other._sortPatternBson),
+ _sortPattern(other._sortPattern),
+ _sortKeyGen(_sortPattern, nullptr /* collator */) {
+ _localBsonEltStorage.resize(_sortPattern.size());
+ _localSortKeyComponentStorage.elts.resize(_sortPattern.size());
+ }
SortSpec& operator=(const SortSpec&) = delete;
KeyString::Value generateSortKey(const BSONObj& obj, const CollatorInterface* collator);
+
+ /*
+ * Creates a sort key that's cheaper to generate but more expensive to compare.
+ *
+ * The underlying memory for the returned SortKeyComponentVector is owned by the SortSpec
+ * itself. It is the caller's responsibility to ensure the SortSpec remains alive while the
+ * return value of this function is used. The return value is valid until the next call to
+ * generateSortKeyComponentVector().
+ *
+ * If the passed in 'obj' is owned, this class takes ownership of it. If it is not owned,
+ * the passed in obj must remain alive as long as the return value from this function may
+ * be used.
+ */
+ value::SortKeyComponentVector* generateSortKeyComponentVector(
+ FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator);
+
const BSONObj& getPattern() const {
- return _sortPattern;
+ return _sortPatternBson;
}
size_t getApproximateSize() const;
@@ -56,7 +86,19 @@ public:
private:
BtreeKeyGenerator initKeyGen() const;
- const BSONObj _sortPattern;
- const BtreeKeyGenerator _keyGen;
+ const BSONObj _sortPatternBson;
+
+ const SortPattern _sortPattern;
+ SortKeyGenerator _sortKeyGen;
+
+ // Storage for the sort key component vector returned by generateSortKeyComponentVector().
+ std::vector<BSONElement> _localBsonEltStorage;
+ value::SortKeyComponentVector _localSortKeyComponentStorage;
+
+ // These members store objects that may be held by the key generator. For example, if the
+ // caller generates keys using an object that is temporary, it will get stashed here so that it
+ // remains alive while the sort keys can be used.
+ BSONObj _tempObj;
+ boost::optional<ValueGuard> _tempVal;
};
} // namespace mongo::sbe::value
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index a0089938d8a..4e6fec83b7f 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -149,27 +149,59 @@ std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex) {
}
KeyString::Value SortSpec::generateSortKey(const BSONObj& obj, const CollatorInterface* collator) {
- KeyStringSet keySet;
- SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
- const bool skipMultikey = false;
- MultikeyPaths* multikeyPaths = nullptr;
- _keyGen.getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, collator);
+ _sortKeyGen.setCollator(collator);
+ return _sortKeyGen.computeSortKeyString(obj);
+}
+
+value::SortKeyComponentVector* SortSpec::generateSortKeyComponentVector(
+ FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator) {
+ auto [objOwned, objTag, objVal] = obj;
+ ValueGuard guard(objOwned, objTag, objVal);
+
+ // While this function accepts any type of object, for now we simply convert everything
+ // to BSON. In the future, we may change this function to avoid the conversion.
+ auto bsonObj = [&, objTag = objTag, objVal = objVal, objOwned = objOwned]() {
+ if (objTag == value::TypeTags::bsonObject) {
+ if (objOwned) {
+ // Take ownership of the temporary object here.
+ _tempVal.emplace(objTag, objVal);
+ guard.reset();
+ }
+ return BSONObj{value::bitcastTo<const char*>(objVal)};
+ } else if (objTag == value::TypeTags::Object) {
+ BSONObjBuilder objBuilder;
+ bson::convertToBsonObj(objBuilder, value::getObjectView(objVal));
+ _tempObj = objBuilder.obj();
+ return _tempObj;
+ } else {
+ MONGO_UNREACHABLE_TASSERT(7103703);
+ }
+ }();
+
- // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least
- // one key into 'keySet', so this assertion should always be true.
- tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty());
+ _sortKeyGen.setCollator(collator);
+ // Use the generic API for getting an array of bson elements representing the
+ // sort key.
+ _sortKeyGen.generateSortKeyComponentVector(bsonObj, &_localBsonEltStorage);
- // Return the first KeyString in the set.
- return std::move(*keySet.extract_sequence().begin());
+ // Convert this array of BSONElements into the SBE SortKeyComponentVector type.
+ {
+ size_t i = 0;
+ for (auto& elt : _localBsonEltStorage) {
+ _localSortKeyComponentStorage.elts[i++] = bson::convertFrom<true>(elt);
+ }
+ }
+ return &_localSortKeyComponentStorage;
}
BtreeKeyGenerator SortSpec::initKeyGen() const {
- tassert(
- 5037003, "SortSpec should not be passed an empty sort pattern", !_sortPattern.isEmpty());
+ tassert(5037003,
+ "SortSpec should not be passed an empty sort pattern",
+ !_sortPatternBson.isEmpty());
std::vector<const char*> fields;
std::vector<BSONElement> fixed;
- for (auto&& elem : _sortPattern) {
+ for (auto&& elem : _sortPatternBson) {
fields.push_back(elem.fieldName());
// BtreeKeyGenerator's constructor's first parameter (the 'fields' vector) and second
@@ -183,15 +215,15 @@ BtreeKeyGenerator SortSpec::initKeyGen() const {
const bool isSparse = false;
auto version = KeyString::Version::kLatestVersion;
- auto ordering = Ordering::make(_sortPattern);
+ auto ordering = Ordering::make(_sortPatternBson);
return {std::move(fields), std::move(fixed), isSparse, version, ordering};
}
size_t SortSpec::getApproximateSize() const {
auto size = sizeof(SortSpec);
- size += _sortPattern.isOwned() ? _sortPattern.objsize() : 0;
- size += _keyGen.getApproximateSize() - sizeof(_keyGen);
+ size += _sortKeyGen.getApproximateSize();
+ size += _sortPatternBson.isOwned() ? _sortPatternBson.objsize() : 0;
return size;
}
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index 8332008796e..01aee0de8ff 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -126,6 +126,10 @@ enum class TypeTags : uint8_t {
MinKey,
MaxKey,
+ // Pointer to sort key component vector. This type is always owned within a SortSpec,
+ // and is never created, copied, or destroyed by SBE.
+ sortKeyComponentVector,
+
// Pointer to a struct with data necessary to read values from a columnstore index cell. The
// values of this type are fully owned by the column_scan stage and are never created, cloned or
// destroyed by SBE.
@@ -919,6 +923,13 @@ private:
ValueSetType _values;
};
+/*
+ * A vector of values representing a sort key. The values are NOT owned by this object.
+ */
+struct SortKeyComponentVector {
+ std::vector<std::pair<value::TypeTags, value::Value>> elts;
+};
+
bool operator==(const ArraySet& lhs, const ArraySet& rhs);
bool operator!=(const ArraySet& lhs, const ArraySet& rhs);
@@ -1260,6 +1271,10 @@ inline IndexBounds* getIndexBoundsView(Value val) noexcept {
return reinterpret_cast<IndexBounds*>(val);
}
+inline SortKeyComponentVector* getSortKeyComponentVectorView(Value v) noexcept {
+ return reinterpret_cast<SortKeyComponentVector*>(v);
+}
+
inline sbe::value::CsiCell* getCsiCellView(Value val) noexcept {
return reinterpret_cast<sbe::value::CsiCell*>(val);
}
diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp
index 2405f698f3f..8a78037c0ff 100644
--- a/src/mongo/db/exec/sbe/values/value_printer.cpp
+++ b/src/mongo/db/exec/sbe/values/value_printer.cpp
@@ -166,6 +166,9 @@ void ValuePrinter<T>::writeTagToStream(TypeTags tag) {
case TypeTags::csiCell:
stream << "csiCell";
break;
+ case TypeTags::sortKeyComponentVector:
+ stream << "SortKeyComponentVector";
+ break;
default:
stream << "unknown tag";
break;
@@ -539,6 +542,13 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth)
case TypeTags::csiCell:
stream << "CsiCell(" << getCsiCellView(val) << ")";
break;
+ case TypeTags::sortKeyComponentVector:
+ stream << "SortKeyComponentVector(";
+ for (auto elt : getSortKeyComponentVectorView(val)->elts) {
+ stream << elt << ", ";
+ }
+ stream << ")";
+ break;
default:
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 860a106668b..fd8a9561c2e 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -4937,27 +4937,52 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGetRegexFlags(Ar
return {true, strType, strValue};
}
-FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) {
+std::pair<value::SortSpec*, CollatorInterface*> ByteCode::generateSortKeyHelper(ArityType arity) {
invariant(arity == 2 || arity == 3);
auto [ssOwned, ssTag, ssVal] = getFromStack(0);
auto [objOwned, objTag, objVal] = getFromStack(1);
if (ssTag != value::TypeTags::sortSpec || !value::isObject(objTag)) {
- return {false, value::TypeTags::Nothing, 0};
+ return {nullptr, nullptr};
}
CollatorInterface* collator{nullptr};
if (arity == 3) {
auto [collatorOwned, collatorTag, collatorVal] = getFromStack(2);
if (collatorTag != value::TypeTags::collator) {
- return {false, value::TypeTags::Nothing, 0};
+ return {nullptr, nullptr};
}
collator = value::getCollatorView(collatorVal);
}
auto ss = value::getSortSpecView(ssVal);
+ return {ss, collator};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateCheapSortKey(
+ ArityType arity) {
+ auto [sortSpec, collator] = generateSortKeyHelper(arity);
+ if (!sortSpec) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ // We "move" the object argument into the sort spec.
+ auto sortKeyComponentVector =
+ sortSpec->generateSortKeyComponentVector(moveFromStack(1), collator);
+
+ return {false,
+ value::TypeTags::sortKeyComponentVector,
+ value::bitcastFrom<value::SortKeyComponentVector*>(sortKeyComponentVector)};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) {
+ auto [sortSpec, collator] = generateSortKeyHelper(arity);
+ if (!sortSpec) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
- auto obj = [objTag = objTag, objVal = objVal]() {
+ auto [objOwned, objTag, objVal] = getFromStack(1);
+ auto bsonObj = [objTag = objTag, objVal = objVal]() {
if (objTag == value::TypeTags::bsonObject) {
return BSONObj{value::bitcastTo<const char*>(objVal)};
} else if (objTag == value::TypeTags::Object) {
@@ -4972,7 +4997,26 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(
return {true,
value::TypeTags::ksValue,
value::bitcastFrom<KeyString::Value*>(
- new KeyString::Value(ss->generateSortKey(obj, collator)))};
+ new KeyString::Value(sortSpec->generateSortKey(bsonObj, collator)))};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinSortKeyComponentVectorGetElement(
+ ArityType arity) {
+ invariant(arity == 2);
+
+ auto [sortVecOwned, sortVecTag, sortVecVal] = getFromStack(0);
+ auto [idxOwned, idxTag, idxVal] = getFromStack(1);
+ if (sortVecTag != value::TypeTags::sortKeyComponentVector ||
+ idxTag != value::TypeTags::NumberInt32) {
+ return {false, value::TypeTags::Nothing, 0};
+ }
+
+ auto* sortObj = value::getSortKeyComponentVectorView(sortVecVal);
+ const auto idxInt32 = value::bitcastTo<int32_t>(idxVal);
+
+ invariant(idxInt32 >= 0 && static_cast<size_t>(idxInt32) < sortObj->elts.size());
+ auto [outTag, outVal] = sortObj->elts[idxInt32];
+ return {false, outTag, outVal};
}
FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinMakeBsonObj(ArityType arity) {
@@ -5538,6 +5582,10 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin
return builtinFtsMatch(arity);
case Builtin::generateSortKey:
return builtinGenerateSortKey(arity);
+ case Builtin::generateCheapSortKey:
+ return builtinGenerateCheapSortKey(arity);
+ case Builtin::sortKeyComponentVectorGetElement:
+ return builtinSortKeyComponentVectorGetElement(arity);
case Builtin::makeBsonObj:
return builtinMakeBsonObj(arity);
case Builtin::tsSecond:
@@ -5773,6 +5821,10 @@ std::string builtinToString(Builtin b) {
return "ftsMatch";
case Builtin::generateSortKey:
return "generateSortKey";
+ case Builtin::generateCheapSortKey:
+ return "generateCheapSortKey";
+ case Builtin::sortKeyComponentVectorGetElement:
+ return "sortKeyComponentVectorGetElement";
case Builtin::makeBsonObj:
return "makeBsonObj";
case Builtin::tsSecond:
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 20dd1983f2f..ab5465221e9 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -726,6 +726,9 @@ enum class Builtin : uint8_t {
hash,
ftsMatch,
generateSortKey,
+ generateCheapSortKey,
+ sortKeyComponentVectorGetElement,
+
makeBsonObj,
tsSecond,
tsIncrement,
@@ -1378,7 +1381,11 @@ private:
FastTuple<bool, value::TypeTags, value::Value> builtinGetRegexFlags(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinHash(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinFtsMatch(ArityType arity);
+ std::pair<value::SortSpec*, CollatorInterface*> generateSortKeyHelper(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinGenerateSortKey(ArityType arity);
+ FastTuple<bool, value::TypeTags, value::Value> builtinGenerateCheapSortKey(ArityType arity);
+ FastTuple<bool, value::TypeTags, value::Value> builtinSortKeyComponentVectorGetElement(
+ ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinMakeBsonObj(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinTsSecond(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinTsIncrement(ArityType arity);
diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp
index 34bfbb7f61a..31c8fe4758c 100644
--- a/src/mongo/db/index/sort_key_generator.cpp
+++ b/src/mongo/db/index/sort_key_generator.cpp
@@ -36,27 +36,29 @@
#include "mongo/util/overloaded_visitor.h"
namespace mongo {
-
-SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator)
- : _collator(collator), _sortPattern(std::move(sortPattern)) {
+namespace {
+BSONObj createSortSpecWithoutMeta(const SortPattern& sortPattern) {
BSONObjBuilder btreeBob;
- size_t nFields = 0;
-
- for (auto&& part : _sortPattern) {
+ for (auto&& part : sortPattern) {
if (part.fieldPath) {
btreeBob.append(part.fieldPath->fullPath(), part.isAscending ? 1 : -1);
- ++nFields;
}
}
+ return btreeBob.obj();
+}
+} // namespace
- // The fake index key pattern used to generate Btree keys.
- _sortSpecWithoutMeta = btreeBob.obj();
- _sortHasMeta = nFields < _sortPattern.size();
-
+SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator)
+ : _collator(collator),
+ _sortPattern(std::move(sortPattern)),
+ _sortSpecWithoutMeta(createSortSpecWithoutMeta(_sortPattern)),
+ _ordering(Ordering::make(_sortSpecWithoutMeta)),
+ _sortHasMeta(static_cast<size_t>(_sortSpecWithoutMeta.nFields()) < _sortPattern.size()) {
// If we're just sorting by meta, don't bother creating an index key generator.
if (_sortSpecWithoutMeta.isEmpty()) {
return;
}
+ const auto nFields = _sortSpecWithoutMeta.nFields();
// We'll need to treat arrays as if we were to create an index over them. that is, we may need
// to unnest the first level and consider each array element to decide the sort order. In order
@@ -69,11 +71,24 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf
}
constexpr bool isSparse = false;
- _indexKeyGen = std::make_unique<BtreeKeyGenerator>(fieldNames,
- fixed,
- isSparse,
- KeyString::Version::kLatestVersion,
- Ordering::make(_sortSpecWithoutMeta));
+ _indexKeyGen = std::make_unique<BtreeKeyGenerator>(
+ fieldNames, fixed, isSparse, KeyString::Version::kLatestVersion, _ordering);
+
+ {
+ // TODO SERVER-74725: Remove this.
+ std::set<std::string> fieldNameSet;
+ for (auto& fn : fieldNames) {
+ fieldNameSet.insert(fn);
+ }
+ _sortHasRepeatKey = (fieldNameSet.size() != fieldNames.size());
+ }
+
+ if (!_sortHasMeta && !_sortHasRepeatKey) {
+ size_t i = 0;
+ for (auto&& keyPart : _sortPattern) {
+ _sortKeyTreeRoot.addSortPatternPart(&keyPart, 0, i++);
+ }
+ }
}
Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
@@ -84,6 +99,21 @@ Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
return computeSortKeyFromIndexKey(wsm);
}
+KeyString::Value SortKeyGenerator::computeSortKeyString(const BSONObj& obj) const {
+ // This function could be optimized to have a fast path for the non-array case.
+
+ KeyStringSet keySet;
+ SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
+ const bool skipMultikey = false;
+ MultikeyPaths* multikeyPaths = nullptr;
+ _indexKeyGen->getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, _collator);
+
+ // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least
+ // one key into 'keySet', so this assertion should always be true.
+ tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty());
+ return std::move(*keySet.extract_sequence().begin());
+}
+
Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& member) const {
invariant(member.getState() == WorkingSetMember::RID_AND_IDX);
invariant(!_sortHasMeta);
@@ -314,4 +344,110 @@ Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc,
extractKeyWithArray(doc, metadata));
}
+
+bool SortKeyGenerator::fastFillOutSortKeyParts(const BSONObj& bson,
+ const SortKeyGenerator::SortKeyTreeNode& tree,
+ std::vector<BSONElement>* out) {
+ // Walk the BSON once and fill out the sort key parts.
+ // Returns false if array is encountered and fallback is needed.
+ BSONObjIterator it(bson);
+
+ size_t childrenVisited = 0;
+ while (it.more()) {
+ auto elt = it.next();
+
+ const SortKeyGenerator::SortKeyTreeNode* childNode = nullptr;
+ for (auto& child : tree.children) {
+ auto fieldNameSd = elt.fieldNameStringData();
+ if (tree.bloomFilter.maybeContains(fieldNameSd.rawData(), fieldNameSd.size())) {
+ // Could use a hash table, but sort patterns are small so brute force search is good
+ // enough.
+ if (child->name == fieldNameSd) {
+ childNode = child.get();
+ break;
+ }
+ }
+ }
+
+ if (childNode) {
+ if (elt.type() == BSONType::Array) {
+ // Slow path needed.
+ return false;
+ }
+
+ if (childNode->part) {
+ (*out)[childNode->partIdx] = elt;
+ }
+
+ if (elt.type() == BSONType::Object && !childNode->children.empty()) {
+ if (!fastFillOutSortKeyParts(elt.embeddedObject(), *childNode, out)) {
+ return false;
+ }
+ }
+
+ ++childrenVisited;
+ if (childrenVisited == tree.children.size()) {
+ return true;
+ }
+ }
+ }
+
+ return true;
+}
+
+void SortKeyGenerator::generateSortKeyComponentVector(const BSONObj& bson,
+ std::vector<BSONElement>* eltsOut) {
+ tassert(7103704, "Sort cannot have meta", !_sortHasMeta);
+ tassert(7103701, "Sort cannot have repeat keys", !_sortHasRepeatKey);
+ tassert(7103702, "Cannot pass null as eltsOut", eltsOut);
+ const static BSONObj kBsonWithNull = BSON("" << NullLabeler{});
+ const static BSONElement kNullElement = kBsonWithNull.firstElement();
+
+ for (size_t i = 0; i < eltsOut->size(); ++i) {
+ // In MQL missing sort keys are substituted with JS null.
+ (*eltsOut)[i] = kNullElement;
+ }
+
+ const bool fastPathSucceeded = fastFillOutSortKeyParts(bson, _sortKeyTreeRoot, eltsOut);
+
+ if (fastPathSucceeded) {
+ if (_collator) {
+ // Eventually we could reuse the buffer from _localObjStorage here.
+ BSONObjBuilder bob;
+ for (auto elt : *eltsOut) {
+ CollationIndexKey::collationAwareIndexKeyAppend(elt, _collator, &bob);
+ }
+ _localObjStorage = bob.obj();
+
+ {
+ size_t i = 0;
+ for (auto elt : _localObjStorage) {
+ (*eltsOut)[i++] = elt;
+ }
+ }
+ }
+
+ // Fast path succeeded, we're done.
+ return;
+ }
+
+ // Slow, generic path, used when an array was encountered.
+ Document doc(bson);
+ DocumentMetadataFields meta;
+
+ Value sortKeyVal = computeSortKeyFromDocument(doc, meta);
+
+ Document outDoc(std::vector<std::pair<StringData, Value>>{{""_sd, sortKeyVal}});
+ _localObjStorage = outDoc.toBson();
+ if (_localObjStorage.firstElement().type() == BSONType::Array) {
+ size_t i = 0;
+ for (auto& elt : _localObjStorage.firstElement().embeddedObject()) {
+ (*eltsOut)[i++] = elt;
+ }
+ } else {
+ (*eltsOut)[0] = _localObjStorage.firstElement();
+ }
+}
+
+
} // namespace mongo
diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h
index 40e270dac21..e2fe5ed0dcf 100644
--- a/src/mongo/db/index/sort_key_generator.h
+++ b/src/mongo/db/index/sort_key_generator.h
@@ -30,11 +30,14 @@
#pragma once
#include "mongo/bson/bsonobj.h"
+#include "mongo/db/exec/field_name_bloom_filter.h"
#include "mongo/db/exec/working_set.h"
#include "mongo/db/index/btree_key_generator.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/sort_pattern.h"
+#include "mongo/db/storage/key_string.h"
+#include "mongo/util/assert_util.h"
namespace mongo {
@@ -58,6 +61,18 @@ public:
Value computeSortKey(const WorkingSetMember&) const;
/**
+ * Computes a KeyString that can be used as the sort key for this object.
+ */
+ KeyString::Value computeSortKeyString(const BSONObj& bson) const;
+
+ /**
+ * Determines all of the portions of the sort key for the given document and populates the
+ * output vector with their positions. The results in the output vector are valid until the
+ * next call.
+ */
+ void generateSortKeyComponentVector(const BSONObj& obj, std::vector<BSONElement>* out);
+
+ /**
* Returns the sort key for the input 'doc' as a Value or throws if no key could be generated.
* When the sort pattern has multiple components, the resulting sort key is an Array-typed Value
* with one element for each component. For sort patterns with just one component, the sort key
@@ -79,7 +94,68 @@ public:
return _sortPattern;
}
+ void setCollator(const CollatorInterface* c) {
+ _collator = c;
+ }
+
+ size_t getApproximateSize() const {
+ return sizeof(this) + (_indexKeyGen ? _indexKeyGen->getApproximateSize() : 0) +
+ _sortKeyTreeRoot.getApproximateSize() + _sortSpecWithoutMeta.objsize() +
+ _localObjStorage.objsize();
+ }
+
private:
+ /* Tree representation of the sort key pattern. E.g. {a.b:1, a.x: 1}
+ * a
+ * / \
+ * b x
+ * This is used for the fast path where the sort key is generated in one pass over the bson.
+ * This is only used when the sort pattern does not include a $meta.
+ */
+ struct SortKeyTreeNode {
+ std::string name;
+ const SortPattern::SortPatternPart* part = nullptr; // Pointer into the SortPattern.
+ std::vector<std::unique_ptr<SortKeyTreeNode>> children;
+ size_t partIdx = 0;
+
+ // Tracks field names of the children. We use this when scanning the bson to quickly skip
+ // over fields irrelevant to the sort pattern.
+ FieldNameBloomFilter<> bloomFilter;
+
+ // Adds a new component of the sort pattern to the tree.
+ void addSortPatternPart(const SortPattern::SortPatternPart* part,
+ const size_t pathIdx,
+ const size_t partIdx) {
+ if (pathIdx == part->fieldPath->getPathLength()) {
+ tassert(7103700, "Invalid sort tree", !this->part);
+ this->part = part;
+ this->partIdx = partIdx;
+ return;
+ }
+
+ for (auto& c : children) {
+ // Check if we already have a child with the same prefix.
+ if (c->name == part->fieldPath->getFieldName(pathIdx)) {
+ c->addSortPatternPart(part, pathIdx + 1, partIdx);
+ return;
+ }
+ }
+
+ children.push_back(std::make_unique<SortKeyTreeNode>());
+ children.back()->name = part->fieldPath->getFieldName(pathIdx).toString();
+ children.back()->addSortPatternPart(part, pathIdx + 1, partIdx);
+ bloomFilter.insert(children.back()->name.c_str(), children.back()->name.size());
+ }
+
+ size_t getApproximateSize() const {
+ size_t size = sizeof(this) + name.size();
+ for (auto& c : children) {
+ size += c->getApproximateSize();
+ }
+ return size;
+ }
+ };
+
// Returns the sort key for the input 'doc' as a Value.
//
// Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but
@@ -135,6 +211,12 @@ private:
// should always be sorted with the simple (i.e. binary) collation.
Value getCollationComparisonKey(const Value& val) const;
+ // Fast path for reading a BSON obj and computing the sort key. Returns true on success, false
+ // when an array is encountered and the slow path needs to be used instead.
+ bool fastFillOutSortKeyParts(const BSONObj& bson,
+ const SortKeyGenerator::SortKeyTreeNode& tree,
+ std::vector<BSONElement>* out);
+
const CollatorInterface* _collator = nullptr;
SortPattern _sortPattern;
@@ -142,11 +224,18 @@ private:
// The sort pattern with any $meta sort components stripped out, since the underlying index key
// generator does not understand $meta sort.
BSONObj _sortSpecWithoutMeta;
+ Ordering _ordering;
// If we're not sorting with a $meta value we can short-cut some work.
bool _sortHasMeta = false;
+ bool _sortHasRepeatKey = false;
+
std::unique_ptr<BtreeKeyGenerator> _indexKeyGen;
+
+ // Used for fastFillOutSortKeyParts()/extractSortKeyParts().
+ SortKeyTreeNode _sortKeyTreeRoot;
+ BSONObj _localObjStorage;
};
} // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index a63b0e7b516..9178d70e863 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -1364,27 +1364,66 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
std::move(stage), std::move(sortExpressions), root->nodeId());
} else {
- // Handle the case where two or more parts of the sort pattern have a common prefix.
- orderBy = _slotIdGenerator.generateMultiple(1);
- direction = {sbe::value::SortDirection::Ascending};
+ // When there's no limit on the sort, the dominating factor is number of comparisons
+ // (nlogn). A sort with a limit of k requires only nlogk comparisons. When k is small, the
+ // number of key generations (n) can actually dominate the runtime. So for all top-k sorts
+ // we use a "cheap" sort key: it's cheaper to construct but more expensive to compare. The
+ // assumption here is that k << n.
+
+ StringData sortKeyGenerator = sn->limit ? "generateCheapSortKey" : "generateSortKey";
auto sortSpec = std::make_unique<sbe::value::SortSpec>(sn->pattern);
auto sortSpecExpr =
makeConstant(sbe::value::TypeTags::sortSpec,
sbe::value::bitcastFrom<sbe::value::SortSpec*>(sortSpec.release()));
+ const auto fullSortKeySlot = _slotIdGenerator.generate();
+
// generateSortKey() will handle the parallel arrays check and sort key traversal for us,
// so we don't need to generate our own sort key traversal logic in the SBE plan.
stage = sbe::makeProjectStage(std::move(stage),
root->nodeId(),
- orderBy[0],
- collatorSlot ? makeFunction("generateSortKey",
+ fullSortKeySlot,
+ collatorSlot ? makeFunction(sortKeyGenerator,
std::move(sortSpecExpr),
makeVariable(outputSlotId),
makeVariable(*collatorSlot))
- : makeFunction("generateSortKey",
+ : makeFunction(sortKeyGenerator,
std::move(sortSpecExpr),
makeVariable(outputSlotId)));
+
+ if (sortKeyGenerator == "generateSortKey") {
+ // In this case generateSortKey() produces a mem-comparable KeyString so we use for
+ // the comparison. We always sort in ascending order because the KeyString takes the
+ // ordering into account.
+ orderBy = {fullSortKeySlot};
+ direction = {sbe::value::SortDirection::Ascending};
+ } else {
+ // Generate the cheap sort key represented as an array then extract each component into
+ // a slot:
+ //
+ // sort [s1, s2] [asc, dsc] ...
+ // project s1=getElement(fullSortKey,0), s2=getElement(fullSortKey,1)
+ // project fullSortKey=generateSortKeyCheap(bson)
+ sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> prjSlotToExprMap;
+
+ int i = 0;
+ for (const auto& part : sortPattern) {
+ auto sortKeySlot = _slotIdGenerator.generate();
+
+ orderBy.push_back(sortKeySlot);
+ direction.push_back(part.isAscending ? sbe::value::SortDirection::Ascending
+ : sbe::value::SortDirection::Descending);
+
+ prjSlotToExprMap[sortKeySlot] =
+ makeFunction("sortKeyComponentVectorGetElement",
+ makeVariable(fullSortKeySlot),
+ makeConstant(sbe::value::TypeTags::NumberInt32, i));
+ ++i;
+ }
+ stage = sbe::makeS<sbe::ProjectStage>(
+ std::move(stage), std::move(prjSlotToExprMap), root->nodeId());
+ }
}
// Slots for sort stage to forward to parent stage. Values in these slots are not used during