SERVER-71037 SBE top-k sort improvements

author: Ian Boros <ian.boros@mongodb.com> 2023-02-17 18:43:37 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2023-03-10 23:23:36 +0000
commit: 3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e (patch)
tree: 0cfc867008eeca940d9f48b552d5dfc0a7dddbbf
parent: 5034e23d2044179ff56b6853ae0b398e88d3e651 (diff)
download: mongo-3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e.tar.gz
16 files changed, 752 insertions, 234 deletions
diff --git a/jstests/core/query/sort/sort5.js b/jstests/core/query/sort/sort5.js
index 01a267bf6b6..36b949bb807 100644
--- a/jstests/core/query/sort/sort5.js
+++ b/jstests/core/query/sort/sort5.js
@@ -1,36 +1,43 @@
+(function() {
 // test compound sorting
-var t = db.sort5;
+const t = db.sort5;
 t.drop();
 
-t.save({_id: 5, x: 1, y: {a: 5, b: 4}});
-t.save({_id: 7, x: 2, y: {a: 7, b: 3}});
-t.save({_id: 2, x: 3, y: {a: 2, b: 3}});
-t.save({_id: 9, x: 4, y: {a: 9, b: 3}});
+assert.commandWorked(t.insert({_id: 5, x: 1, y: {a: 5, b: 4}}));
+assert.commandWorked(t.insert({_id: 7, x: 2, y: {a: 7, b: 3}}));
+assert.commandWorked(t.insert({_id: 2, x: 3, y: {a: 2, b: 3}}));
+assert.commandWorked(t.insert({_id: 9, x: 4, y: {a: 9, b: 3}}));
 
-assert.eq([4, 2, 3, 1],
-          t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) {
-              return z.x;
-          }),
-          "A no index");
+function testSortAndSortWithLimit(expected, sortPattern, description) {
+    // Test sort and sort with limit.
+    assert.eq(expected,
+              t.find().sort(sortPattern).map(function(z) {
+                  return z.x;
+              }),
+              description);
+
+    assert.eq(expected,
+              t.find().sort(sortPattern).limit(500).map(function(z) {
+                  return z.x;
+              }),
+              description);
+}
+
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A no index");
 t.createIndex({"y.b": 1, "y.a": -1});
-assert.eq([4, 2, 3, 1],
-          t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) {
-              return z.x;
-          }),
-          "A index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A index");
 assert(t.validate().valid, "A valid");
 
 // test sorting on compound key involving _id
 
-assert.eq([4, 2, 3, 1],
-          t.find().sort({"y.b": 1, _id: -1}).map(function(z) {
-              return z.x;
-          }),
-          "B no index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B no index");
 t.createIndex({"y.b": 1, "_id": -1});
-assert.eq([4, 2, 3, 1],
-          t.find().sort({"y.b": 1, _id: -1}).map(function(z) {
-              return z.x;
-          }),
-          "B index");
+testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B index");
 assert(t.validate().valid, "B valid");
+
+function testWithProj(proj, sort, expected) {
+    assert.eq(t.find({}, proj).sort(sort).map((doc) => doc._id), expected);
+    assert.eq(t.find({}, proj).sort(sort).limit(500).map((doc) => doc._id), expected);
+}
+testWithProj({"y.a": 1, "y.b": 1, _id: 1}, {"y.a": 1, "y.b": 1, _id: 1}, [2, 5, 7, 9]);
+})();
diff --git a/jstests/core/query/sort/sort_dotted_paths.js b/jstests/core/query/sort/sort_dotted_paths.js
index 1ce7ad2533e..2db3ba5683c 100644
--- a/jstests/core/query/sort/sort_dotted_paths.js
+++ b/jstests/core/query/sort/sort_dotted_paths.js
@@ -15,6 +15,11 @@
 const coll = db.sort_dotted_paths;
 coll.drop();
 
+function testSortAndSortWithLimit(sortPattern, expectedIds) {
+    assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray());
+    assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray());
+}
+
 // Basic tests to verify that sorting deals with undefined, null, missing fields, and nested arrays
 // as expected.
 assert.commandWorked(coll.insert([
@@ -29,20 +34,20 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [undefined]}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: 1, _id: 1},
     [{_id: 2}, {_id: 5}, {_id: 9}, {_id: 3}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 4}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: -1, _id: 1},
     [{_id: 7}, {_id: 4}, {_id: 1}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 2}, {_id: 5}, {_id: 9}]);
 
 // Test out sort({"a.0":1}) on a collection of documents where field 'a' is a mix of different
 // types (arrays of varying size for some documents, non-arrays for other documents).
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0": 1, _id: 1},
     [{_id: 9}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 6}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0": -1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 9}]);
 
 // Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array
@@ -60,17 +65,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: 2, b: 3}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: 1, b: 1, _id: 1},
     [{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: 1, b: -1, _id: 1},
     [{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: -1, b: 1, _id: 1},
     [{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: -1, b: -1, _id: 1},
     [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]);
 
 // Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one
@@ -113,17 +118,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: []}, {b: [1, 3]}]}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, _id: 1},
     [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, _id: -1},
     [{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, _id: -1},
     [{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]);
 
 // Basic tests for a sort pattern that contains two paths of length 2 with a common prefix.
@@ -140,33 +145,32 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: 8, c: 2}, {b: 6, c: 3}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": 1, _id: 1},
     [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": -1, _id: 1},
     [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": 1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": -1, _id: 1},
     [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
 
 // Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where
 // one path is a prefix of the other path.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(),
-    [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}],
-    () => tojson(coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).explain()));
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, a: 1, _id: 1},
+    [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
+testSortAndSortWithLimit(
+    {"a.c": 1, a: -1, _id: 1},
     [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, a: 1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, a: -1, _id: 1},
     [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
 
 // Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least
@@ -199,47 +203,47 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: [8], c: 2}, {b: [6], c: 3}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": 1, _id: 1},
     [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": -1, _id: 1},
     [{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": 1, _id: 1},
     [{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": -1, _id: 1},
     [{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]);
 
 // More tests for a sort pattern that contains a path of length 2 and a path of length 1, where
 // one path is a prefix of the other path.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, a: 1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, a: -1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, a: 1, _id: 1},
     [{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, a: -1, _id: 1},
     [{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]);
 
 // Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are
 // a mix of different types.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": 1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": 1, _id: -1},
     [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": -1, _id: 1},
     [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": -1, _id: -1},
     [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]);
 
 // Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix.
@@ -256,17 +260,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: {b: 3}, c: [{d: [8]}, {d: [6]}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, "c.d": 1, _id: 1},
     [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, "c.d": -1, _id: 1},
     [{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, "c.d": 1, _id: 1},
     [{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, "c.d": -1, _id: 1},
     [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]);
 
 // Basic tests for a sort pattern that contains a path of length 3.
@@ -283,16 +287,16 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: [{c: []}, {c: [1, 3]}]}]}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": 1, _id: 1},
     [{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": 1, _id: -1},
     [{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": -1, _id: 1},
     [{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": -1, _id: -1},
     [{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]);
 })();
diff --git a/jstests/core/query/sort/sort_dotted_paths_collation.js b/jstests/core/query/sort/sort_dotted_paths_collation.js
index 10111a95441..b0b91f7126d 100644
--- a/jstests/core/query/sort/sort_dotted_paths_collation.js
+++ b/jstests/core/query/sort/sort_dotted_paths_collation.js
@@ -25,6 +25,11 @@ const caseInsensitive = {
 };
 assert.commandWorked(db.createCollection(coll.getName(), caseInsensitive));
 
+function testSortAndSortWithLimit(sortPattern, expectedIds) {
+    assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray());
+    assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray());
+}
+
 // Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array
 // for other documents.
 assert.commandWorked(coll.insert([
@@ -39,17 +44,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: "b", b: "C"}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: 1, b: 1, _id: 1},
     [{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: 1, b: -1, _id: 1},
     [{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: -1, b: 1, _id: 1},
     [{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {a: -1, b: -1, _id: 1},
     [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]);
 
 // Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one
@@ -94,17 +99,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: []}, {b: ["a", "C"]}]}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, _id: 1},
     [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, _id: -1},
     [{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, _id: -1},
     [{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]);
 
 // Basic tests for a sort pattern that contains two paths of length 2 with a common prefix.
@@ -122,32 +127,32 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: "h", c: "b"}, {b: "F", c: "C"}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": 1, _id: 1},
     [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": -1, _id: 1},
     [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": 1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": -1, _id: 1},
     [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
 
 // Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where
 // one path is a prefix of the other path.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, a: 1, _id: 1},
     [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, a: -1, _id: 1},
     [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, a: 1, _id: 1},
     [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, a: -1, _id: 1},
     [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]);
 
 // Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least
@@ -183,47 +188,47 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: ["h"], c: "b"}, {b: ["F"], c: "C"}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": 1, _id: 1},
     [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": 1, "a.b": -1, _id: 1},
     [{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": 1, _id: 1},
     [{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.c": -1, "a.b": -1, _id: 1},
     [{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]);
 
 // More tests for a sort pattern that contains a path of length 2 and a path of length 1, where
 // one path is a prefix of the other path.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, a: 1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, a: -1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, a: 1, _id: 1},
     [{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, a: -1, _id: 1},
     [{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]);
 
 // Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are
 // a mix of different types.
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": 1, _id: 1},
     [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": 1, _id: -1},
     [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": -1, _id: 1},
     [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.0.b": -1, _id: -1},
     [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]);
 
 // Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix.
@@ -241,17 +246,17 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: {b: "C"}, c: [{d: ["h"]}, {d: ["F"]}]},
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, "c.d": 1, _id: 1},
     [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": 1, "c.d": -1, _id: 1},
     [{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, "c.d": 1, _id: 1},
     [{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b": -1, "c.d": -1, _id: 1},
     [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]);
 
 // Basic tests for a sort pattern that contains a path of length 3.
@@ -269,16 +274,16 @@ assert.commandWorked(coll.insert([
     {_id: 9, a: [{b: [{c: []}, {c: ["a", "C"]}]}]}
 ]));
 
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": 1, _id: 1},
     [{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": 1, _id: -1},
     [{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": -1, _id: 1},
     [{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]);
-assert.eq(
-    coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(),
+testSortAndSortWithLimit(
+    {"a.b.c": -1, _id: -1},
     [{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]);
 })();
diff --git a/src/mongo/db/exec/field_name_bloom_filter.h b/src/mongo/db/exec/field_name_bloom_filter.h
new file mode 100644
index 00000000000..e53978806d0
--- /dev/null
+++ b/src/mongo/db/exec/field_name_bloom_filter.h
@@ -0,0 +1,75 @@
+/**
+ *    Copyright (C) 2023-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mongo {
+typedef uint64_t (*FieldNameBloomFilterHasher)(const char*, size_t);
+inline uint64_t fieldNameBloomFilterDefaultHash(const char* str, size_t len) {
+    const auto shift = static_cast<unsigned char>(str[len / 2]) & 63u;
+    return uint64_t{1} << shift;
+}
+
+/*
+ * Tiny bloom filter intended to be used when scanning a list of [key, value]
+ * pairs (i.e. bson).
+ */
+template <FieldNameBloomFilterHasher HashFn = fieldNameBloomFilterDefaultHash>
+class FieldNameBloomFilter {
+public:
+    /**
+     * Inserts a string to the bloom filter.
+     */
+    void insert(const char* str, size_t len) {
+        _data = _data | HashFn(str, len);
+    }
+
+    /**
+     * If false is returned, the given string was definitely not inserted.
+     * If true is returned, the given string may have been inserted.
+     */
+    bool maybeContains(const char* str, size_t len) const {
+        return static_cast<bool>(_data & HashFn(str, len));
+    }
+
+    /**
+     * Similar to maybeContains() but the caller can compute a hash themselves and check
+     * whether anything with that hash was maybe-inserted.
+     */
+    bool maybeContainsHash(uint64_t hash) {
+        return _data & hash;
+    }
+
+private:
+    uint64_t _data = 0;
+};
+}  // namespace mongo
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index e25df50a847..43833ecb1b1 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -715,6 +715,12 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = {
     {"ftsMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::ftsMatch, false}},
     {"generateSortKey",
      BuiltinFn{[](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateSortKey, false}},
+    {"generateCheapSortKey",
+     BuiltinFn{
+         [](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateCheapSortKey, false}},
+    {"sortKeyComponentVectorGetElement",
+     BuiltinFn{
+         [](size_t n) { return n == 2; }, vm::Builtin::sortKeyComponentVectorGetElement, false}},
     {"tsSecond", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsSecond, false}},
     {"tsIncrement", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsIncrement, false}},
     {"typeMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::typeMatch, false}},
diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp
index 56ec29cfe13..5175529f99a 100644
--- a/src/mongo/db/exec/sbe/stages/scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/scan.cpp
@@ -85,12 +85,10 @@ ScanStage::ScanStage(UUID collectionUuid,
                  _fields.end()));
     // We cannot use a random cursor if we are seeking or requesting a reverse scan.
     invariant(!_useRandomCursor || (!_seekKeySlot && _forward));
-    // Initialize _fieldsBloomFilter.
-    _fieldsBloomFilter = 0;
     for (size_t idx = 0; idx < _fields.size(); ++idx) {
         const char* str = _fields[idx].c_str();
         auto len = _fields[idx].size();
-        _fieldsBloomFilter = _fieldsBloomFilter | computeFieldMask(str, len);
+        _fieldsBloomFilter.insert(str, len);
     }
 }
 
@@ -512,7 +510,7 @@ PlanState ScanStage::getNext() {
                 // the field.
                 auto field = bson::fieldNameAndLength(bsonElement);
                 const size_t offset = computeFieldMaskOffset(field.rawData(), field.size());
-                if (!(_fieldsBloomFilter & computeFieldMask(offset))) {
+                if (!(_fieldsBloomFilter.maybeContainsHash(computeFieldMask(offset)))) {
                     bsonElement = bson::advance(bsonElement, field.size());
                     continue;
                 }
diff --git a/src/mongo/db/exec/sbe/stages/scan.h b/src/mongo/db/exec/sbe/stages/scan.h
index 4c358efdd75..6642c4ce1e8 100644
--- a/src/mongo/db/exec/sbe/stages/scan.h
+++ b/src/mongo/db/exec/sbe/stages/scan.h
@@ -30,6 +30,7 @@
 #pragma once
 
 #include "mongo/config.h"
+#include "mongo/db/exec/field_name_bloom_filter.h"
 #include "mongo/db/exec/sbe/expressions/expression.h"
 #include "mongo/db/exec/sbe/expressions/runtime_environment.h"
 #include "mongo/db/exec/sbe/stages/collection_helpers.h"
@@ -217,7 +218,7 @@ private:
     // the accessor quickly rather than having to look it up in the _fieldAccessors hashtable.
     value::SlotAccessor* _tsFieldAccessor{nullptr};
 
-    uint64_t _fieldsBloomFilter{0};
+    FieldNameBloomFilter<computeFieldMask> _fieldsBloomFilter;
 
     RecordId _recordId;
 
diff --git a/src/mongo/db/exec/sbe/values/sort_spec.h b/src/mongo/db/exec/sbe/values/sort_spec.h
index af48415c539..fad29a4f09f 100644
--- a/src/mongo/db/exec/sbe/values/sort_spec.h
+++ b/src/mongo/db/exec/sbe/values/sort_spec.h
@@ -29,26 +29,56 @@
 
 #pragma once
 
+#include "mongo/bson/ordering.h"
 #include "mongo/db/exec/sbe/values/value.h"
 #include "mongo/db/index/btree_key_generator.h"
+#include "mongo/db/index/sort_key_generator.h"
+#include "mongo/db/query/sort_pattern.h"
 
 namespace mongo::sbe::value {
 /**
  * SortSpec is a wrapper around a BSONObj giving a sort pattern (encoded as a BSONObj), a collator,
- * and a BtreeKeyGenerator object.
+ * and a SortKeyGenerator object.
  */
 class SortSpec {
 public:
-    SortSpec(const BSONObj& sortPattern)
-        : _sortPattern(sortPattern.getOwned()), _keyGen(initKeyGen()) {}
-    SortSpec(const SortSpec& other) : _sortPattern(other._sortPattern), _keyGen(initKeyGen()) {}
+    SortSpec(const BSONObj& sortPatternBson)
+        : _sortPatternBson(sortPatternBson.getOwned()),
+          _sortPattern(_sortPatternBson, nullptr /* expCtx, needed for meta sorts */),
+          _sortKeyGen(_sortPattern, nullptr /* collator */) {
+        _localBsonEltStorage.resize(_sortPattern.size());
+        _localSortKeyComponentStorage.elts.resize(_sortPattern.size());
+    }
+    SortSpec(const SortSpec& other)
+        : _sortPatternBson(other._sortPatternBson),
+          _sortPattern(other._sortPattern),
+          _sortKeyGen(_sortPattern, nullptr /* collator */) {
+        _localBsonEltStorage.resize(_sortPattern.size());
+        _localSortKeyComponentStorage.elts.resize(_sortPattern.size());
+    }
 
     SortSpec& operator=(const SortSpec&) = delete;
 
     KeyString::Value generateSortKey(const BSONObj& obj, const CollatorInterface* collator);
 
+
+    /*
+     * Creates a sort key that's cheaper to generate but more expensive to compare.
+     *
+     * The underlying memory for the returned SortKeyComponentVector is owned by the SortSpec
+     * itself. It is the caller's responsibility to ensure the SortSpec remains alive while the
+     * return value of this function is used. The return value is valid until the next call to
+     * generateSortKeyComponentVector().
+     *
+     * If the passed in 'obj' is owned, this class takes ownership of it. If it is not owned,
+     * the passed in obj must remain alive as long as the return value from this function may
+     * be used.
+     */
+    value::SortKeyComponentVector* generateSortKeyComponentVector(
+        FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator);
+
     const BSONObj& getPattern() const {
-        return _sortPattern;
+        return _sortPatternBson;
     }
 
     size_t getApproximateSize() const;
@@ -56,7 +86,19 @@ public:
 private:
     BtreeKeyGenerator initKeyGen() const;
 
-    const BSONObj _sortPattern;
-    const BtreeKeyGenerator _keyGen;
+    const BSONObj _sortPatternBson;
+
+    const SortPattern _sortPattern;
+    SortKeyGenerator _sortKeyGen;
+
+    // Storage for the sort key component vector returned by generateSortKeyComponentVector().
+    std::vector<BSONElement> _localBsonEltStorage;
+    value::SortKeyComponentVector _localSortKeyComponentStorage;
+
+    // These members store objects that may be held by the key generator. For example, if the
+    // caller generates keys using an object that is temporary, it will get stashed here so that it
+    // remains alive while the sort keys can be used.
+    BSONObj _tempObj;
+    boost::optional<ValueGuard> _tempVal;
 };
 }  // namespace mongo::sbe::value
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index a0089938d8a..4e6fec83b7f 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -149,27 +149,59 @@ std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex) {
 }
 
 KeyString::Value SortSpec::generateSortKey(const BSONObj& obj, const CollatorInterface* collator) {
-    KeyStringSet keySet;
-    SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
-    const bool skipMultikey = false;
-    MultikeyPaths* multikeyPaths = nullptr;
-    _keyGen.getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, collator);
+    _sortKeyGen.setCollator(collator);
+    return _sortKeyGen.computeSortKeyString(obj);
+}
+
+value::SortKeyComponentVector* SortSpec::generateSortKeyComponentVector(
+    FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator) {
+    auto [objOwned, objTag, objVal] = obj;
+    ValueGuard guard(objOwned, objTag, objVal);
+
+    // While this function accepts any type of object, for now we simply convert everything
+    // to BSON. In the future, we may change this function to avoid the conversion.
+    auto bsonObj = [&, objTag = objTag, objVal = objVal, objOwned = objOwned]() {
+        if (objTag == value::TypeTags::bsonObject) {
+            if (objOwned) {
+                // Take ownership of the temporary object here.
+                _tempVal.emplace(objTag, objVal);
+                guard.reset();
+            }
+            return BSONObj{value::bitcastTo<const char*>(objVal)};
+        } else if (objTag == value::TypeTags::Object) {
+            BSONObjBuilder objBuilder;
+            bson::convertToBsonObj(objBuilder, value::getObjectView(objVal));
+            _tempObj = objBuilder.obj();
+            return _tempObj;
+        } else {
+            MONGO_UNREACHABLE_TASSERT(7103703);
+        }
+    }();
+
 
-    // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least
-    // one key into 'keySet', so this assertion should always be true.
-    tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty());
+    _sortKeyGen.setCollator(collator);
+    // Use the generic API for getting an array of bson elements representing the
+    // sort key.
+    _sortKeyGen.generateSortKeyComponentVector(bsonObj, &_localBsonEltStorage);
 
-    // Return the first KeyString in the set.
-    return std::move(*keySet.extract_sequence().begin());
+    // Convert this array of BSONElements into the SBE SortKeyComponentVector type.
+    {
+        size_t i = 0;
+        for (auto& elt : _localBsonEltStorage) {
+            _localSortKeyComponentStorage.elts[i++] = bson::convertFrom<true>(elt);
+        }
+    }
+    return &_localSortKeyComponentStorage;
 }
 
 BtreeKeyGenerator SortSpec::initKeyGen() const {
-    tassert(
-        5037003, "SortSpec should not be passed an empty sort pattern", !_sortPattern.isEmpty());
+    tassert(5037003,
+            "SortSpec should not be passed an empty sort pattern",
+            !_sortPatternBson.isEmpty());
 
     std::vector<const char*> fields;
     std::vector<BSONElement> fixed;
-    for (auto&& elem : _sortPattern) {
+    for (auto&& elem : _sortPatternBson) {
         fields.push_back(elem.fieldName());
 
         // BtreeKeyGenerator's constructor's first parameter (the 'fields' vector) and second
@@ -183,15 +215,15 @@ BtreeKeyGenerator SortSpec::initKeyGen() const {
 
     const bool isSparse = false;
     auto version = KeyString::Version::kLatestVersion;
-    auto ordering = Ordering::make(_sortPattern);
+    auto ordering = Ordering::make(_sortPatternBson);
 
     return {std::move(fields), std::move(fixed), isSparse, version, ordering};
 }
 
 size_t SortSpec::getApproximateSize() const {
     auto size = sizeof(SortSpec);
-    size += _sortPattern.isOwned() ? _sortPattern.objsize() : 0;
-    size += _keyGen.getApproximateSize() - sizeof(_keyGen);
+    size += _sortKeyGen.getApproximateSize();
+    size += _sortPatternBson.isOwned() ? _sortPatternBson.objsize() : 0;
     return size;
 }
 
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index 8332008796e..01aee0de8ff 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -126,6 +126,10 @@ enum class TypeTags : uint8_t {
     MinKey,
     MaxKey,
 
+    // Pointer to sort key component vector. This type is always owned within a SortSpec,
+    // and is never created, copied, or destroyed by SBE.
+    sortKeyComponentVector,
+
     // Pointer to a struct with data necessary to read values from a columnstore index cell. The
     // values of this type are fully owned by the column_scan stage and are never created, cloned or
     // destroyed by SBE.
@@ -919,6 +923,13 @@ private:
     ValueSetType _values;
 };
 
+/*
+ * A vector of values representing a sort key. The values are NOT owned by this object.
+ */
+struct SortKeyComponentVector {
+    std::vector<std::pair<value::TypeTags, value::Value>> elts;
+};
+
 bool operator==(const ArraySet& lhs, const ArraySet& rhs);
 bool operator!=(const ArraySet& lhs, const ArraySet& rhs);
 
@@ -1260,6 +1271,10 @@ inline IndexBounds* getIndexBoundsView(Value val) noexcept {
     return reinterpret_cast<IndexBounds*>(val);
 }
 
+inline SortKeyComponentVector* getSortKeyComponentVectorView(Value v) noexcept {
+    return reinterpret_cast<SortKeyComponentVector*>(v);
+}
+
 inline sbe::value::CsiCell* getCsiCellView(Value val) noexcept {
     return reinterpret_cast<sbe::value::CsiCell*>(val);
 }
diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp
index 2405f698f3f..8a78037c0ff 100644
--- a/src/mongo/db/exec/sbe/values/value_printer.cpp
+++ b/src/mongo/db/exec/sbe/values/value_printer.cpp
@@ -166,6 +166,9 @@ void ValuePrinter<T>::writeTagToStream(TypeTags tag) {
         case TypeTags::csiCell:
             stream << "csiCell";
             break;
+        case TypeTags::sortKeyComponentVector:
+            stream << "SortKeyComponentVector";
+            break;
         default:
             stream << "unknown tag";
             break;
@@ -539,6 +542,13 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth)
         case TypeTags::csiCell:
             stream << "CsiCell(" << getCsiCellView(val) << ")";
             break;
+        case TypeTags::sortKeyComponentVector:
+            stream << "SortKeyComponentVector(";
+            for (auto elt : getSortKeyComponentVectorView(val)->elts) {
+                stream << elt << ", ";
+            }
+            stream << ")";
+            break;
         default:
             MONGO_UNREACHABLE;
     }
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 860a106668b..fd8a9561c2e 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -4937,27 +4937,52 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGetRegexFlags(Ar
     return {true, strType, strValue};
 }
 
-FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) {
+std::pair<value::SortSpec*, CollatorInterface*> ByteCode::generateSortKeyHelper(ArityType arity) {
     invariant(arity == 2 || arity == 3);
 
     auto [ssOwned, ssTag, ssVal] = getFromStack(0);
     auto [objOwned, objTag, objVal] = getFromStack(1);
     if (ssTag != value::TypeTags::sortSpec || !value::isObject(objTag)) {
-        return {false, value::TypeTags::Nothing, 0};
+        return {nullptr, nullptr};
     }
 
     CollatorInterface* collator{nullptr};
     if (arity == 3) {
         auto [collatorOwned, collatorTag, collatorVal] = getFromStack(2);
         if (collatorTag != value::TypeTags::collator) {
-            return {false, value::TypeTags::Nothing, 0};
+            return {nullptr, nullptr};
         }
         collator = value::getCollatorView(collatorVal);
     }
 
     auto ss = value::getSortSpecView(ssVal);
+    return {ss, collator};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateCheapSortKey(
+    ArityType arity) {
+    auto [sortSpec, collator] = generateSortKeyHelper(arity);
+    if (!sortSpec) {
+        return {false, value::TypeTags::Nothing, 0};
+    }
+
+    // We "move" the object argument into the sort spec.
+    auto sortKeyComponentVector =
+        sortSpec->generateSortKeyComponentVector(moveFromStack(1), collator);
+
+    return {false,
+            value::TypeTags::sortKeyComponentVector,
+            value::bitcastFrom<value::SortKeyComponentVector*>(sortKeyComponentVector)};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) {
+    auto [sortSpec, collator] = generateSortKeyHelper(arity);
+    if (!sortSpec) {
+        return {false, value::TypeTags::Nothing, 0};
+    }
 
-    auto obj = [objTag = objTag, objVal = objVal]() {
+    auto [objOwned, objTag, objVal] = getFromStack(1);
+    auto bsonObj = [objTag = objTag, objVal = objVal]() {
         if (objTag == value::TypeTags::bsonObject) {
             return BSONObj{value::bitcastTo<const char*>(objVal)};
         } else if (objTag == value::TypeTags::Object) {
@@ -4972,7 +4997,26 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(
     return {true,
             value::TypeTags::ksValue,
             value::bitcastFrom<KeyString::Value*>(
-                new KeyString::Value(ss->generateSortKey(obj, collator)))};
+                new KeyString::Value(sortSpec->generateSortKey(bsonObj, collator)))};
+}
+
+FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinSortKeyComponentVectorGetElement(
+    ArityType arity) {
+    invariant(arity == 2);
+
+    auto [sortVecOwned, sortVecTag, sortVecVal] = getFromStack(0);
+    auto [idxOwned, idxTag, idxVal] = getFromStack(1);
+    if (sortVecTag != value::TypeTags::sortKeyComponentVector ||
+        idxTag != value::TypeTags::NumberInt32) {
+        return {false, value::TypeTags::Nothing, 0};
+    }
+
+    auto* sortObj = value::getSortKeyComponentVectorView(sortVecVal);
+    const auto idxInt32 = value::bitcastTo<int32_t>(idxVal);
+
+    invariant(idxInt32 >= 0 && static_cast<size_t>(idxInt32) < sortObj->elts.size());
+    auto [outTag, outVal] = sortObj->elts[idxInt32];
+    return {false, outTag, outVal};
 }
 
 FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinMakeBsonObj(ArityType arity) {
@@ -5538,6 +5582,10 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin
             return builtinFtsMatch(arity);
         case Builtin::generateSortKey:
             return builtinGenerateSortKey(arity);
+        case Builtin::generateCheapSortKey:
+            return builtinGenerateCheapSortKey(arity);
+        case Builtin::sortKeyComponentVectorGetElement:
+            return builtinSortKeyComponentVectorGetElement(arity);
         case Builtin::makeBsonObj:
             return builtinMakeBsonObj(arity);
         case Builtin::tsSecond:
@@ -5773,6 +5821,10 @@ std::string builtinToString(Builtin b) {
             return "ftsMatch";
         case Builtin::generateSortKey:
             return "generateSortKey";
+        case Builtin::generateCheapSortKey:
+            return "generateCheapSortKey";
+        case Builtin::sortKeyComponentVectorGetElement:
+            return "sortKeyComponentVectorGetElement";
         case Builtin::makeBsonObj:
             return "makeBsonObj";
         case Builtin::tsSecond:
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 20dd1983f2f..ab5465221e9 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -726,6 +726,9 @@ enum class Builtin : uint8_t {
     hash,
     ftsMatch,
     generateSortKey,
+    generateCheapSortKey,
+    sortKeyComponentVectorGetElement,
+
     makeBsonObj,
     tsSecond,
     tsIncrement,
@@ -1378,7 +1381,11 @@ private:
     FastTuple<bool, value::TypeTags, value::Value> builtinGetRegexFlags(ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinHash(ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinFtsMatch(ArityType arity);
+    std::pair<value::SortSpec*, CollatorInterface*> generateSortKeyHelper(ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinGenerateSortKey(ArityType arity);
+    FastTuple<bool, value::TypeTags, value::Value> builtinGenerateCheapSortKey(ArityType arity);
+    FastTuple<bool, value::TypeTags, value::Value> builtinSortKeyComponentVectorGetElement(
+        ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinMakeBsonObj(ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinTsSecond(ArityType arity);
     FastTuple<bool, value::TypeTags, value::Value> builtinTsIncrement(ArityType arity);
diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp
index 34bfbb7f61a..31c8fe4758c 100644
--- a/src/mongo/db/index/sort_key_generator.cpp
+++ b/src/mongo/db/index/sort_key_generator.cpp
@@ -36,27 +36,29 @@
 #include "mongo/util/overloaded_visitor.h"
 
 namespace mongo {
-
-SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator)
-    : _collator(collator), _sortPattern(std::move(sortPattern)) {
+namespace {
+BSONObj createSortSpecWithoutMeta(const SortPattern& sortPattern) {
     BSONObjBuilder btreeBob;
-    size_t nFields = 0;
-
-    for (auto&& part : _sortPattern) {
+    for (auto&& part : sortPattern) {
         if (part.fieldPath) {
             btreeBob.append(part.fieldPath->fullPath(), part.isAscending ? 1 : -1);
-            ++nFields;
         }
     }
+    return btreeBob.obj();
+}
+}  // namespace
 
-    // The fake index key pattern used to generate Btree keys.
-    _sortSpecWithoutMeta = btreeBob.obj();
-    _sortHasMeta = nFields < _sortPattern.size();
-
+SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator)
+    : _collator(collator),
+      _sortPattern(std::move(sortPattern)),
+      _sortSpecWithoutMeta(createSortSpecWithoutMeta(_sortPattern)),
+      _ordering(Ordering::make(_sortSpecWithoutMeta)),
+      _sortHasMeta(static_cast<size_t>(_sortSpecWithoutMeta.nFields()) < _sortPattern.size()) {
     // If we're just sorting by meta, don't bother creating an index key generator.
     if (_sortSpecWithoutMeta.isEmpty()) {
         return;
     }
+    const auto nFields = _sortSpecWithoutMeta.nFields();
 
     // We'll need to treat arrays as if we were to create an index over them. that is, we may need
     // to unnest the first level and consider each array element to decide the sort order. In order
@@ -69,11 +71,24 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf
     }
 
     constexpr bool isSparse = false;
-    _indexKeyGen = std::make_unique<BtreeKeyGenerator>(fieldNames,
-                                                       fixed,
-                                                       isSparse,
-                                                       KeyString::Version::kLatestVersion,
-                                                       Ordering::make(_sortSpecWithoutMeta));
+    _indexKeyGen = std::make_unique<BtreeKeyGenerator>(
+        fieldNames, fixed, isSparse, KeyString::Version::kLatestVersion, _ordering);
+
+    {
+        // TODO SERVER-74725: Remove this.
+        std::set<std::string> fieldNameSet;
+        for (auto& fn : fieldNames) {
+            fieldNameSet.insert(fn);
+        }
+        _sortHasRepeatKey = (fieldNameSet.size() != fieldNames.size());
+    }
+
+    if (!_sortHasMeta && !_sortHasRepeatKey) {
+        size_t i = 0;
+        for (auto&& keyPart : _sortPattern) {
+            _sortKeyTreeRoot.addSortPatternPart(&keyPart, 0, i++);
+        }
+    }
 }
 
 Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
@@ -84,6 +99,21 @@ Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
     return computeSortKeyFromIndexKey(wsm);
 }
 
+KeyString::Value SortKeyGenerator::computeSortKeyString(const BSONObj& obj) const {
+    // This function could be optimized to have a fast path for the non-array case.
+
+    KeyStringSet keySet;
+    SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
+    const bool skipMultikey = false;
+    MultikeyPaths* multikeyPaths = nullptr;
+    _indexKeyGen->getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, _collator);
+
+    // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least
+    // one key into 'keySet', so this assertion should always be true.
+    tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty());
+    return std::move(*keySet.extract_sequence().begin());
+}
+
 Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& member) const {
     invariant(member.getState() == WorkingSetMember::RID_AND_IDX);
     invariant(!_sortHasMeta);
@@ -314,4 +344,110 @@ Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc,
                                                       extractKeyWithArray(doc, metadata));
 }
 
+
+bool SortKeyGenerator::fastFillOutSortKeyParts(const BSONObj& bson,
+                                               const SortKeyGenerator::SortKeyTreeNode& tree,
+                                               std::vector<BSONElement>* out) {
+    // Walk the BSON once and fill out the sort key parts.
+    // Returns false if array is encountered and fallback is needed.
+    BSONObjIterator it(bson);
+
+    size_t childrenVisited = 0;
+    while (it.more()) {
+        auto elt = it.next();
+
+        const SortKeyGenerator::SortKeyTreeNode* childNode = nullptr;
+        for (auto& child : tree.children) {
+            auto fieldNameSd = elt.fieldNameStringData();
+            if (tree.bloomFilter.maybeContains(fieldNameSd.rawData(), fieldNameSd.size())) {
+                // Could use a hash table, but sort patterns are small so brute force search is good
+                // enough.
+                if (child->name == fieldNameSd) {
+                    childNode = child.get();
+                    break;
+                }
+            }
+        }
+
+        if (childNode) {
+            if (elt.type() == BSONType::Array) {
+                // Slow path needed.
+                return false;
+            }
+
+            if (childNode->part) {
+                (*out)[childNode->partIdx] = elt;
+            }
+
+            if (elt.type() == BSONType::Object && !childNode->children.empty()) {
+                if (!fastFillOutSortKeyParts(elt.embeddedObject(), *childNode, out)) {
+                    return false;
+                }
+            }
+
+            ++childrenVisited;
+            if (childrenVisited == tree.children.size()) {
+                return true;
+            }
+        }
+    }
+
+    return true;
+}
+
+void SortKeyGenerator::generateSortKeyComponentVector(const BSONObj& bson,
+                                                      std::vector<BSONElement>* eltsOut) {
+    tassert(7103704, "Sort cannot have meta", !_sortHasMeta);
+    tassert(7103701, "Sort cannot have repeat keys", !_sortHasRepeatKey);
+    tassert(7103702, "Cannot pass null as eltsOut", eltsOut);
+    const static BSONObj kBsonWithNull = BSON("" << NullLabeler{});
+    const static BSONElement kNullElement = kBsonWithNull.firstElement();
+
+    for (size_t i = 0; i < eltsOut->size(); ++i) {
+        // In MQL missing sort keys are substituted with JS null.
+        (*eltsOut)[i] = kNullElement;
+    }
+
+    const bool fastPathSucceeded = fastFillOutSortKeyParts(bson, _sortKeyTreeRoot, eltsOut);
+
+    if (fastPathSucceeded) {
+        if (_collator) {
+            // Eventually we could reuse the buffer from _localObjStorage here.
+            BSONObjBuilder bob;
+            for (auto elt : *eltsOut) {
+                CollationIndexKey::collationAwareIndexKeyAppend(elt, _collator, &bob);
+            }
+            _localObjStorage = bob.obj();
+
+            {
+                size_t i = 0;
+                for (auto elt : _localObjStorage) {
+                    (*eltsOut)[i++] = elt;
+                }
+            }
+        }
+
+        // Fast path succeeded, we're done.
+        return;
+    }
+
+    // Slow, generic path, used when an array was encountered.
+    Document doc(bson);
+    DocumentMetadataFields meta;
+
+    Value sortKeyVal = computeSortKeyFromDocument(doc, meta);
+
+    Document outDoc(std::vector<std::pair<StringData, Value>>{{""_sd, sortKeyVal}});
+    _localObjStorage = outDoc.toBson();
+    if (_localObjStorage.firstElement().type() == BSONType::Array) {
+        size_t i = 0;
+        for (auto& elt : _localObjStorage.firstElement().embeddedObject()) {
+            (*eltsOut)[i++] = elt;
+        }
+    } else {
+        (*eltsOut)[0] = _localObjStorage.firstElement();
+    }
+}
+
+
 }  // namespace mongo
diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h
index 40e270dac21..e2fe5ed0dcf 100644
--- a/src/mongo/db/index/sort_key_generator.h
+++ b/src/mongo/db/index/sort_key_generator.h
@@ -30,11 +30,14 @@
 #pragma once
 
 #include "mongo/bson/bsonobj.h"
+#include "mongo/db/exec/field_name_bloom_filter.h"
 #include "mongo/db/exec/working_set.h"
 #include "mongo/db/index/btree_key_generator.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/query/collation/collator_interface.h"
 #include "mongo/db/query/sort_pattern.h"
+#include "mongo/db/storage/key_string.h"
+#include "mongo/util/assert_util.h"
 
 namespace mongo {
 
@@ -58,6 +61,18 @@ public:
     Value computeSortKey(const WorkingSetMember&) const;
 
     /**
+     * Computes a KeyString that can be used as the sort key for this object.
+     */
+    KeyString::Value computeSortKeyString(const BSONObj& bson) const;
+
+    /**
+     * Determines all of the portions of the sort key for the given document and populates the
+     * output vector with their positions. The results in the output vector are valid until the
+     * next call.
+     */
+    void generateSortKeyComponentVector(const BSONObj& obj, std::vector<BSONElement>* out);
+
+    /**
      * Returns the sort key for the input 'doc' as a Value or throws if no key could be generated.
      * When the sort pattern has multiple components, the resulting sort key is an Array-typed Value
      * with one element for each component. For sort patterns with just one component, the sort key
@@ -79,7 +94,68 @@ public:
         return _sortPattern;
     }
 
+    void setCollator(const CollatorInterface* c) {
+        _collator = c;
+    }
+
+    size_t getApproximateSize() const {
+        return sizeof(this) + (_indexKeyGen ? _indexKeyGen->getApproximateSize() : 0) +
+            _sortKeyTreeRoot.getApproximateSize() + _sortSpecWithoutMeta.objsize() +
+            _localObjStorage.objsize();
+    }
+
 private:
+    /* Tree representation of the sort key pattern. E.g. {a.b:1, a.x: 1}
+     *      a
+     *    /  \
+     *   b    x
+     * This is used for the fast path where the sort key is generated in one pass over the bson.
+     * This is only used when the sort pattern does not include a $meta.
+     */
+    struct SortKeyTreeNode {
+        std::string name;
+        const SortPattern::SortPatternPart* part = nullptr;  // Pointer into the SortPattern.
+        std::vector<std::unique_ptr<SortKeyTreeNode>> children;
+        size_t partIdx = 0;
+
+        // Tracks field names of the children. We use this when scanning the bson to quickly skip
+        // over fields irrelevant to the sort pattern.
+        FieldNameBloomFilter<> bloomFilter;
+
+        // Adds a new component of the sort pattern to the tree.
+        void addSortPatternPart(const SortPattern::SortPatternPart* part,
+                                const size_t pathIdx,
+                                const size_t partIdx) {
+            if (pathIdx == part->fieldPath->getPathLength()) {
+                tassert(7103700, "Invalid sort tree", !this->part);
+                this->part = part;
+                this->partIdx = partIdx;
+                return;
+            }
+
+            for (auto& c : children) {
+                // Check if we already have a child with the same prefix.
+                if (c->name == part->fieldPath->getFieldName(pathIdx)) {
+                    c->addSortPatternPart(part, pathIdx + 1, partIdx);
+                    return;
+                }
+            }
+
+            children.push_back(std::make_unique<SortKeyTreeNode>());
+            children.back()->name = part->fieldPath->getFieldName(pathIdx).toString();
+            children.back()->addSortPatternPart(part, pathIdx + 1, partIdx);
+            bloomFilter.insert(children.back()->name.c_str(), children.back()->name.size());
+        }
+
+        size_t getApproximateSize() const {
+            size_t size = sizeof(this) + name.size();
+            for (auto& c : children) {
+                size += c->getApproximateSize();
+            }
+            return size;
+        }
+    };
+
     // Returns the sort key for the input 'doc' as a Value.
     //
     // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but
@@ -135,6 +211,12 @@ private:
     // should always be sorted with the simple (i.e. binary) collation.
     Value getCollationComparisonKey(const Value& val) const;
 
+    // Fast path for reading a BSON obj and computing the sort key. Returns true on success, false
+    // when an array is encountered and the slow path needs to be used instead.
+    bool fastFillOutSortKeyParts(const BSONObj& bson,
+                                 const SortKeyGenerator::SortKeyTreeNode& tree,
+                                 std::vector<BSONElement>* out);
+
     const CollatorInterface* _collator = nullptr;
 
     SortPattern _sortPattern;
@@ -142,11 +224,18 @@ private:
     // The sort pattern with any $meta sort components stripped out, since the underlying index key
     // generator does not understand $meta sort.
     BSONObj _sortSpecWithoutMeta;
+    Ordering _ordering;
 
     // If we're not sorting with a $meta value we can short-cut some work.
     bool _sortHasMeta = false;
 
+    bool _sortHasRepeatKey = false;
+
     std::unique_ptr<BtreeKeyGenerator> _indexKeyGen;
+
+    // Used for fastFillOutSortKeyParts()/extractSortKeyParts().
+    SortKeyTreeNode _sortKeyTreeRoot;
+    BSONObj _localObjStorage;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index a63b0e7b516..9178d70e863 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -1364,27 +1364,66 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
             std::move(stage), std::move(sortExpressions), root->nodeId());
 
     } else {
-        // Handle the case where two or more parts of the sort pattern have a common prefix.
-        orderBy = _slotIdGenerator.generateMultiple(1);
-        direction = {sbe::value::SortDirection::Ascending};
+        // When there's no limit on the sort, the dominating factor is number of comparisons
+        // (nlogn). A sort with a limit of k requires only nlogk comparisons. When k is small, the
+        // number of key generations (n) can actually dominate the runtime. So for all top-k sorts
+        // we use a "cheap" sort key: it's cheaper to construct but more expensive to compare. The
+        // assumption here is that k << n.
+
+        StringData sortKeyGenerator = sn->limit ? "generateCheapSortKey" : "generateSortKey";
 
         auto sortSpec = std::make_unique<sbe::value::SortSpec>(sn->pattern);
         auto sortSpecExpr =
             makeConstant(sbe::value::TypeTags::sortSpec,
                          sbe::value::bitcastFrom<sbe::value::SortSpec*>(sortSpec.release()));
 
+        const auto fullSortKeySlot = _slotIdGenerator.generate();
+
         // generateSortKey() will handle the parallel arrays check and sort key traversal for us,
         // so we don't need to generate our own sort key traversal logic in the SBE plan.
         stage = sbe::makeProjectStage(std::move(stage),
                                       root->nodeId(),
-                                      orderBy[0],
-                                      collatorSlot ? makeFunction("generateSortKey",
+                                      fullSortKeySlot,
+                                      collatorSlot ? makeFunction(sortKeyGenerator,
                                                                   std::move(sortSpecExpr),
                                                                   makeVariable(outputSlotId),
                                                                   makeVariable(*collatorSlot))
-                                                   : makeFunction("generateSortKey",
+                                                   : makeFunction(sortKeyGenerator,
                                                                   std::move(sortSpecExpr),
                                                                   makeVariable(outputSlotId)));
+
+        if (sortKeyGenerator == "generateSortKey") {
+            // In this case generateSortKey() produces a mem-comparable KeyString so we use for
+            // the comparison. We always sort in ascending order because the KeyString takes the
+            // ordering into account.
+            orderBy = {fullSortKeySlot};
+            direction = {sbe::value::SortDirection::Ascending};
+        } else {
+            // Generate the cheap sort key represented as an array then extract each component into
+            // a slot:
+            //
+            // sort [s1, s2] [asc, dsc] ...
+            // project s1=getElement(fullSortKey,0), s2=getElement(fullSortKey,1)
+            // project fullSortKey=generateSortKeyCheap(bson)
+            sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> prjSlotToExprMap;
+
+            int i = 0;
+            for (const auto& part : sortPattern) {
+                auto sortKeySlot = _slotIdGenerator.generate();
+
+                orderBy.push_back(sortKeySlot);
+                direction.push_back(part.isAscending ? sbe::value::SortDirection::Ascending
+                                                     : sbe::value::SortDirection::Descending);
+
+                prjSlotToExprMap[sortKeySlot] =
+                    makeFunction("sortKeyComponentVectorGetElement",
+                                 makeVariable(fullSortKeySlot),
+                                 makeConstant(sbe::value::TypeTags::NumberInt32, i));
+                ++i;
+            }
+            stage = sbe::makeS<sbe::ProjectStage>(
+                std::move(stage), std::move(prjSlotToExprMap), root->nodeId());
+        }
     }
 
     // Slots for sort stage to forward to parent stage. Values in these slots are not used during
author	Ian Boros <ian.boros@mongodb.com>	2023-02-17 18:43:37 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2023-03-10 23:23:36 +0000
commit	3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e (patch)
tree	0cfc867008eeca940d9f48b552d5dfc0a7dddbbf
parent	5034e23d2044179ff56b6853ae0b398e88d3e651 (diff)
download	mongo-3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e.tar.gz