diff options
author | Ian Boros <ian.boros@mongodb.com> | 2023-02-17 18:43:37 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-03-10 23:23:36 +0000 |
commit | 3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e (patch) | |
tree | 0cfc867008eeca940d9f48b552d5dfc0a7dddbbf | |
parent | 5034e23d2044179ff56b6853ae0b398e88d3e651 (diff) | |
download | mongo-3ff59aaf07bcc3e44de3bdd6a4d7b8be6b4f115e.tar.gz |
SERVER-71037 SBE top-k sort improvements
-rw-r--r-- | jstests/core/query/sort/sort5.js | 57 | ||||
-rw-r--r-- | jstests/core/query/sort/sort_dotted_paths.js | 168 | ||||
-rw-r--r-- | jstests/core/query/sort/sort_dotted_paths_collation.js | 149 | ||||
-rw-r--r-- | src/mongo/db/exec/field_name_bloom_filter.h | 75 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/expressions/expression.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/scan.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/scan.h | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/sort_spec.h | 56 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.cpp | 64 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 15 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value_printer.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.cpp | 62 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.h | 7 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.cpp | 168 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.h | 89 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 51 |
16 files changed, 752 insertions, 234 deletions
diff --git a/jstests/core/query/sort/sort5.js b/jstests/core/query/sort/sort5.js index 01a267bf6b6..36b949bb807 100644 --- a/jstests/core/query/sort/sort5.js +++ b/jstests/core/query/sort/sort5.js @@ -1,36 +1,43 @@ +(function() { // test compound sorting -var t = db.sort5; +const t = db.sort5; t.drop(); -t.save({_id: 5, x: 1, y: {a: 5, b: 4}}); -t.save({_id: 7, x: 2, y: {a: 7, b: 3}}); -t.save({_id: 2, x: 3, y: {a: 2, b: 3}}); -t.save({_id: 9, x: 4, y: {a: 9, b: 3}}); +assert.commandWorked(t.insert({_id: 5, x: 1, y: {a: 5, b: 4}})); +assert.commandWorked(t.insert({_id: 7, x: 2, y: {a: 7, b: 3}})); +assert.commandWorked(t.insert({_id: 2, x: 3, y: {a: 2, b: 3}})); +assert.commandWorked(t.insert({_id: 9, x: 4, y: {a: 9, b: 3}})); -assert.eq([4, 2, 3, 1], - t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) { - return z.x; - }), - "A no index"); +function testSortAndSortWithLimit(expected, sortPattern, description) { + // Test sort and sort with limit. + assert.eq(expected, + t.find().sort(sortPattern).map(function(z) { + return z.x; + }), + description); + + assert.eq(expected, + t.find().sort(sortPattern).limit(500).map(function(z) { + return z.x; + }), + description); +} + +testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A no index"); t.createIndex({"y.b": 1, "y.a": -1}); -assert.eq([4, 2, 3, 1], - t.find().sort({"y.b": 1, "y.a": -1}).map(function(z) { - return z.x; - }), - "A index"); +testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, "y.a": -1}, "A index"); assert(t.validate().valid, "A valid"); // test sorting on compound key involving _id -assert.eq([4, 2, 3, 1], - t.find().sort({"y.b": 1, _id: -1}).map(function(z) { - return z.x; - }), - "B no index"); +testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B no index"); t.createIndex({"y.b": 1, "_id": -1}); -assert.eq([4, 2, 3, 1], - t.find().sort({"y.b": 1, _id: -1}).map(function(z) { - return z.x; - }), - "B index"); +testSortAndSortWithLimit([4, 2, 3, 1], {"y.b": 1, _id: -1}, "B index"); assert(t.validate().valid, "B valid"); + +function testWithProj(proj, sort, expected) { + assert.eq(t.find({}, proj).sort(sort).map((doc) => doc._id), expected); + assert.eq(t.find({}, proj).sort(sort).limit(500).map((doc) => doc._id), expected); +} +testWithProj({"y.a": 1, "y.b": 1, _id: 1}, {"y.a": 1, "y.b": 1, _id: 1}, [2, 5, 7, 9]); +})(); diff --git a/jstests/core/query/sort/sort_dotted_paths.js b/jstests/core/query/sort/sort_dotted_paths.js index 1ce7ad2533e..2db3ba5683c 100644 --- a/jstests/core/query/sort/sort_dotted_paths.js +++ b/jstests/core/query/sort/sort_dotted_paths.js @@ -15,6 +15,11 @@ const coll = db.sort_dotted_paths; coll.drop(); +function testSortAndSortWithLimit(sortPattern, expectedIds) { + assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray()); + assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray()); +} + // Basic tests to verify that sorting deals with undefined, null, missing fields, and nested arrays // as expected. assert.commandWorked(coll.insert([ @@ -29,20 +34,20 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [undefined]} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: 1, _id: 1}, [{_id: 2}, {_id: 5}, {_id: 9}, {_id: 3}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 4}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: -1, _id: 1}, [{_id: 7}, {_id: 4}, {_id: 1}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 2}, {_id: 5}, {_id: 9}]); // Test out sort({"a.0":1}) on a collection of documents where field 'a' is a mix of different // types (arrays of varying size for some documents, non-arrays for other documents). -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0": 1, _id: 1}, [{_id: 9}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 6}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0": -1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 9}]); // Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array @@ -60,17 +65,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: 2, b: 3} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: 1, b: 1, _id: 1}, [{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: 1, b: -1, _id: 1}, [{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: -1, b: 1, _id: 1}, [{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: -1, b: -1, _id: 1}, [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]); // Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one @@ -113,17 +118,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: []}, {b: [1, 3]}]} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, _id: 1}, [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, _id: -1}, [{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, _id: -1}, [{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]); // Basic tests for a sort pattern that contains two paths of length 2 with a common prefix. @@ -140,33 +145,32 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: 8, c: 2}, {b: 6, c: 3}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": 1, _id: 1}, [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": -1, _id: 1}, [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": 1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": -1, _id: 1}, [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]); // Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where // one path is a prefix of the other path. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(), - [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}], - () => tojson(coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).explain())); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, a: 1, _id: 1}, + [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]); +testSortAndSortWithLimit( + {"a.c": 1, a: -1, _id: 1}, [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, a: 1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, a: -1, _id: 1}, [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]); // Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least @@ -199,47 +203,47 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: [8], c: 2}, {b: [6], c: 3}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": 1, _id: 1}, [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": -1, _id: 1}, [{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": 1, _id: 1}, [{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": -1, _id: 1}, [{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]); // More tests for a sort pattern that contains a path of length 2 and a path of length 1, where // one path is a prefix of the other path. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, a: 1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, a: -1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, a: 1, _id: 1}, [{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, a: -1, _id: 1}, [{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]); // Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are // a mix of different types. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": 1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": 1, _id: -1}, [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": -1, _id: 1}, [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": -1, _id: -1}, [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]); // Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix. @@ -256,17 +260,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: {b: 3}, c: [{d: [8]}, {d: [6]}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, "c.d": 1, _id: 1}, [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, "c.d": -1, _id: 1}, [{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, "c.d": 1, _id: 1}, [{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, "c.d": -1, _id: 1}, [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]); // Basic tests for a sort pattern that contains a path of length 3. @@ -283,16 +287,16 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: [{c: []}, {c: [1, 3]}]}]} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": 1, _id: 1}, [{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": 1, _id: -1}, [{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": -1, _id: 1}, [{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": -1, _id: -1}, [{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]); })(); diff --git a/jstests/core/query/sort/sort_dotted_paths_collation.js b/jstests/core/query/sort/sort_dotted_paths_collation.js index 10111a95441..b0b91f7126d 100644 --- a/jstests/core/query/sort/sort_dotted_paths_collation.js +++ b/jstests/core/query/sort/sort_dotted_paths_collation.js @@ -25,6 +25,11 @@ const caseInsensitive = { }; assert.commandWorked(db.createCollection(coll.getName(), caseInsensitive)); +function testSortAndSortWithLimit(sortPattern, expectedIds) { + assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).toArray()); + assert.eq(expectedIds, coll.find({}, {_id: 1}).sort(sortPattern).limit(500).toArray()); +} + // Test out sort({a:1,b:1}) on a collection where a is an array for some documents and b is an array // for other documents. assert.commandWorked(coll.insert([ @@ -39,17 +44,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: "b", b: "C"} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({a: 1, b: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: 1, b: 1, _id: 1}, [{_id: 3}, {_id: 7}, {_id: 1}, {_id: 8}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: 1, b: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: 1, b: -1, _id: 1}, [{_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 1}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: -1, b: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: -1, b: 1, _id: 1}, [{_id: 5}, {_id: 2}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 4}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({a: -1, b: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {a: -1, b: -1, _id: 1}, [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 9}, {_id: 2}, {_id: 4}, {_id: 7}, {_id: 1}, {_id: 3}]); // Verify that sort({a:1,b:1}) fails with a "parallel arrays" error when there is at least one @@ -94,17 +99,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: []}, {b: ["a", "C"]}]} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, _id: 1}, [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 3}, {_id: 7}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, _id: -1}, [{_id: 9}, {_id: 2}, {_id: 5}, {_id: 4}, {_id: 8}, {_id: 3}, {_id: 1}, {_id: 7}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 8}, {_id: 3}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, _id: -1}, [{_id: 7}, {_id: 8}, {_id: 6}, {_id: 9}, {_id: 3}, {_id: 1}, {_id: 5}, {_id: 4}, {_id: 2}]); // Basic tests for a sort pattern that contains two paths of length 2 with a common prefix. @@ -122,32 +127,32 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: "h", c: "b"}, {b: "F", c: "C"}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": 1, _id: 1}, [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": -1, _id: 1}, [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": 1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": -1, _id: 1}, [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]); // Basic tests for a sort pattern that contains a path of length 2 and a path of length 1, where // one path is a prefix of the other path. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, a: 1, _id: 1}, [{_id: 2}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 3}, {_id: 8}, {_id: 7}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, a: -1, _id: 1}, [{_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 2}, {_id: 5}, {_id: 9}, {_id: 7}, {_id: 8}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, a: 1, _id: 1}, [{_id: 7}, {_id: 6}, {_id: 4}, {_id: 9}, {_id: 5}, {_id: 8}, {_id: 1}, {_id: 2}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, a: -1, _id: 1}, [{_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 2}, {_id: 1}]); // Verify that sort({"a.b":1,"a.c":1}) fails with a "parallel arrays" error when there is at least @@ -183,47 +188,47 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: ["h"], c: "b"}, {b: ["F"], c: "C"}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": 1, _id: 1}, [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 4}, {_id: 7}, {_id: 8}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": 1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": 1, "a.b": -1, _id: 1}, [{_id: 6}, {_id: 3}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 2}, {_id: 9}, {_id: 8}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": 1, _id: 1}, [{_id: 2}, {_id: 7}, {_id: 6}, {_id: 4}, {_id: 5}, {_id: 9}, {_id: 8}, {_id: 1}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.c": -1, "a.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.c": -1, "a.b": -1, _id: 1}, [{_id: 2}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 3}, {_id: 1}]); // More tests for a sort pattern that contains a path of length 2 and a path of length 1, where // one path is a prefix of the other path. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, a: 1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, a: -1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 6}, {_id: 3}, {_id: 4}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, a: 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, a: 1, _id: 1}, [{_id: 3}, {_id: 8}, {_id: 9}, {_id: 5}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 2}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, a: -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, a: -1, _id: 1}, [{_id: 3}, {_id: 9}, {_id: 8}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 6}, {_id: 2}, {_id: 7}]); // Test out sort({"a.0.b":1}) on a collection of documents where field "a" and sub-field "b" are // a mix of different types. -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": 1, _id: 1}, [{_id: 7}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 8}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": 1, _id: -1}, [{_id: 7}, {_id: 1}, {_id: 5}, {_id: 2}, {_id: 8}, {_id: 4}, {_id: 3}, {_id: 6}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": -1, _id: 1}, [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 1}, {_id: 4}, {_id: 5}, {_id: 8}, {_id: 2}, {_id: 7}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.0.b": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.0.b": -1, _id: -1}, [{_id: 9}, {_id: 3}, {_id: 6}, {_id: 4}, {_id: 1}, {_id: 8}, {_id: 5}, {_id: 2}, {_id: 7}]); // Tests for a sort pattern that contains two paths of length 2 that do not have a common prefix. @@ -241,17 +246,17 @@ assert.commandWorked(coll.insert([ {_id: 9, a: {b: "C"}, c: [{d: ["h"]}, {d: ["F"]}]}, ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, "c.d": 1, _id: 1}, [{_id: 6}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 3}, {_id: 7}, {_id: 8}, {_id: 4}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": 1, "c.d": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": 1, "c.d": -1, _id: 1}, [{_id: 6}, {_id: 3}, {_id: 5}, {_id: 1}, {_id: 2}, {_id: 4}, {_id: 8}, {_id: 7}, {_id: 9}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, "c.d": 1, _id: 1}, [{_id: 2}, {_id: 8}, {_id: 4}, {_id: 6}, {_id: 9}, {_id: 7}, {_id: 1}, {_id: 5}, {_id: 3}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b": -1, "c.d": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b": -1, "c.d": -1, _id: 1}, [{_id: 2}, {_id: 9}, {_id: 4}, {_id: 6}, {_id: 8}, {_id: 7}, {_id: 3}, {_id: 5}, {_id: 1}]); // Basic tests for a sort pattern that contains a path of length 3. @@ -269,16 +274,16 @@ assert.commandWorked(coll.insert([ {_id: 9, a: [{b: [{c: []}, {c: ["a", "C"]}]}]} ])); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": 1, _id: 1}, [{_id: 2}, {_id: 3}, {_id: 9}, {_id: 5}, {_id: 7}, {_id: 4}, {_id: 8}, {_id: 1}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": 1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": 1, _id: -1}, [{_id: 9}, {_id: 3}, {_id: 2}, {_id: 7}, {_id: 5}, {_id: 8}, {_id: 4}, {_id: 1}, {_id: 6}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: 1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": -1, _id: 1}, [{_id: 6}, {_id: 8}, {_id: 5}, {_id: 9}, {_id: 1}, {_id: 4}, {_id: 3}, {_id: 7}, {_id: 2}]); -assert.eq( - coll.find({}, {_id: 1}).sort({"a.b.c": -1, _id: -1}).toArray(), +testSortAndSortWithLimit( + {"a.b.c": -1, _id: -1}, [{_id: 8}, {_id: 6}, {_id: 9}, {_id: 5}, {_id: 1}, {_id: 4}, {_id: 7}, {_id: 3}, {_id: 2}]); })(); diff --git a/src/mongo/db/exec/field_name_bloom_filter.h b/src/mongo/db/exec/field_name_bloom_filter.h new file mode 100644 index 00000000000..e53978806d0 --- /dev/null +++ b/src/mongo/db/exec/field_name_bloom_filter.h @@ -0,0 +1,75 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <cstddef> +#include <cstdint> + +namespace mongo { +typedef uint64_t (*FieldNameBloomFilterHasher)(const char*, size_t); +inline uint64_t fieldNameBloomFilterDefaultHash(const char* str, size_t len) { + const auto shift = static_cast<unsigned char>(str[len / 2]) & 63u; + return uint64_t{1} << shift; +} + +/* + * Tiny bloom filter intended to be used when scanning a list of [key, value] + * pairs (i.e. bson). + */ +template <FieldNameBloomFilterHasher HashFn = fieldNameBloomFilterDefaultHash> +class FieldNameBloomFilter { +public: + /** + * Inserts a string to the bloom filter. + */ + void insert(const char* str, size_t len) { + _data = _data | HashFn(str, len); + } + + /** + * If false is returned, the given string was definitely not inserted. + * If true is returned, the given string may have been inserted. + */ + bool maybeContains(const char* str, size_t len) const { + return static_cast<bool>(_data & HashFn(str, len)); + } + + /** + * Similar to maybeContains() but the caller can compute a hash themselves and check + * whether anything with that hash was maybe-inserted. + */ + bool maybeContainsHash(uint64_t hash) { + return _data & hash; + } + +private: + uint64_t _data = 0; +}; +} // namespace mongo diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp index e25df50a847..43833ecb1b1 100644 --- a/src/mongo/db/exec/sbe/expressions/expression.cpp +++ b/src/mongo/db/exec/sbe/expressions/expression.cpp @@ -715,6 +715,12 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = { {"ftsMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::ftsMatch, false}}, {"generateSortKey", BuiltinFn{[](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateSortKey, false}}, + {"generateCheapSortKey", + BuiltinFn{ + [](size_t n) { return n == 2 || n == 3; }, vm::Builtin::generateCheapSortKey, false}}, + {"sortKeyComponentVectorGetElement", + BuiltinFn{ + [](size_t n) { return n == 2; }, vm::Builtin::sortKeyComponentVectorGetElement, false}}, {"tsSecond", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsSecond, false}}, {"tsIncrement", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::tsIncrement, false}}, {"typeMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::typeMatch, false}}, diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp index 56ec29cfe13..5175529f99a 100644 --- a/src/mongo/db/exec/sbe/stages/scan.cpp +++ b/src/mongo/db/exec/sbe/stages/scan.cpp @@ -85,12 +85,10 @@ ScanStage::ScanStage(UUID collectionUuid, _fields.end())); // We cannot use a random cursor if we are seeking or requesting a reverse scan. invariant(!_useRandomCursor || (!_seekKeySlot && _forward)); - // Initialize _fieldsBloomFilter. - _fieldsBloomFilter = 0; for (size_t idx = 0; idx < _fields.size(); ++idx) { const char* str = _fields[idx].c_str(); auto len = _fields[idx].size(); - _fieldsBloomFilter = _fieldsBloomFilter | computeFieldMask(str, len); + _fieldsBloomFilter.insert(str, len); } } @@ -512,7 +510,7 @@ PlanState ScanStage::getNext() { // the field. auto field = bson::fieldNameAndLength(bsonElement); const size_t offset = computeFieldMaskOffset(field.rawData(), field.size()); - if (!(_fieldsBloomFilter & computeFieldMask(offset))) { + if (!(_fieldsBloomFilter.maybeContainsHash(computeFieldMask(offset)))) { bsonElement = bson::advance(bsonElement, field.size()); continue; } diff --git a/src/mongo/db/exec/sbe/stages/scan.h b/src/mongo/db/exec/sbe/stages/scan.h index 4c358efdd75..6642c4ce1e8 100644 --- a/src/mongo/db/exec/sbe/stages/scan.h +++ b/src/mongo/db/exec/sbe/stages/scan.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/config.h" +#include "mongo/db/exec/field_name_bloom_filter.h" #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/expressions/runtime_environment.h" #include "mongo/db/exec/sbe/stages/collection_helpers.h" @@ -217,7 +218,7 @@ private: // the accessor quickly rather than having to look it up in the _fieldAccessors hashtable. value::SlotAccessor* _tsFieldAccessor{nullptr}; - uint64_t _fieldsBloomFilter{0}; + FieldNameBloomFilter<computeFieldMask> _fieldsBloomFilter; RecordId _recordId; diff --git a/src/mongo/db/exec/sbe/values/sort_spec.h b/src/mongo/db/exec/sbe/values/sort_spec.h index af48415c539..fad29a4f09f 100644 --- a/src/mongo/db/exec/sbe/values/sort_spec.h +++ b/src/mongo/db/exec/sbe/values/sort_spec.h @@ -29,26 +29,56 @@ #pragma once +#include "mongo/bson/ordering.h" #include "mongo/db/exec/sbe/values/value.h" #include "mongo/db/index/btree_key_generator.h" +#include "mongo/db/index/sort_key_generator.h" +#include "mongo/db/query/sort_pattern.h" namespace mongo::sbe::value { /** * SortSpec is a wrapper around a BSONObj giving a sort pattern (encoded as a BSONObj), a collator, - * and a BtreeKeyGenerator object. + * and a SortKeyGenerator object. */ class SortSpec { public: - SortSpec(const BSONObj& sortPattern) - : _sortPattern(sortPattern.getOwned()), _keyGen(initKeyGen()) {} - SortSpec(const SortSpec& other) : _sortPattern(other._sortPattern), _keyGen(initKeyGen()) {} + SortSpec(const BSONObj& sortPatternBson) + : _sortPatternBson(sortPatternBson.getOwned()), + _sortPattern(_sortPatternBson, nullptr /* expCtx, needed for meta sorts */), + _sortKeyGen(_sortPattern, nullptr /* collator */) { + _localBsonEltStorage.resize(_sortPattern.size()); + _localSortKeyComponentStorage.elts.resize(_sortPattern.size()); + } + SortSpec(const SortSpec& other) + : _sortPatternBson(other._sortPatternBson), + _sortPattern(other._sortPattern), + _sortKeyGen(_sortPattern, nullptr /* collator */) { + _localBsonEltStorage.resize(_sortPattern.size()); + _localSortKeyComponentStorage.elts.resize(_sortPattern.size()); + } SortSpec& operator=(const SortSpec&) = delete; KeyString::Value generateSortKey(const BSONObj& obj, const CollatorInterface* collator); + + /* + * Creates a sort key that's cheaper to generate but more expensive to compare. + * + * The underlying memory for the returned SortKeyComponentVector is owned by the SortSpec + * itself. It is the caller's responsibility to ensure the SortSpec remains alive while the + * return value of this function is used. The return value is valid until the next call to + * generateSortKeyComponentVector(). + * + * If the passed in 'obj' is owned, this class takes ownership of it. If it is not owned, + * the passed in obj must remain alive as long as the return value from this function may + * be used. + */ + value::SortKeyComponentVector* generateSortKeyComponentVector( + FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator); + const BSONObj& getPattern() const { - return _sortPattern; + return _sortPatternBson; } size_t getApproximateSize() const; @@ -56,7 +86,19 @@ public: private: BtreeKeyGenerator initKeyGen() const; - const BSONObj _sortPattern; - const BtreeKeyGenerator _keyGen; + const BSONObj _sortPatternBson; + + const SortPattern _sortPattern; + SortKeyGenerator _sortKeyGen; + + // Storage for the sort key component vector returned by generateSortKeyComponentVector(). + std::vector<BSONElement> _localBsonEltStorage; + value::SortKeyComponentVector _localSortKeyComponentStorage; + + // These members store objects that may be held by the key generator. For example, if the + // caller generates keys using an object that is temporary, it will get stashed here so that it + // remains alive while the sort keys can be used. + BSONObj _tempObj; + boost::optional<ValueGuard> _tempVal; }; } // namespace mongo::sbe::value diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index a0089938d8a..4e6fec83b7f 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -149,27 +149,59 @@ std::pair<TypeTags, Value> makeCopyPcreRegex(const pcre::Regex& regex) { } KeyString::Value SortSpec::generateSortKey(const BSONObj& obj, const CollatorInterface* collator) { - KeyStringSet keySet; - SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); - const bool skipMultikey = false; - MultikeyPaths* multikeyPaths = nullptr; - _keyGen.getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, collator); + _sortKeyGen.setCollator(collator); + return _sortKeyGen.computeSortKeyString(obj); +} + +value::SortKeyComponentVector* SortSpec::generateSortKeyComponentVector( + FastTuple<bool, value::TypeTags, value::Value> obj, const CollatorInterface* collator) { + auto [objOwned, objTag, objVal] = obj; + ValueGuard guard(objOwned, objTag, objVal); + + // While this function accepts any type of object, for now we simply convert everything + // to BSON. In the future, we may change this function to avoid the conversion. + auto bsonObj = [&, objTag = objTag, objVal = objVal, objOwned = objOwned]() { + if (objTag == value::TypeTags::bsonObject) { + if (objOwned) { + // Take ownership of the temporary object here. + _tempVal.emplace(objTag, objVal); + guard.reset(); + } + return BSONObj{value::bitcastTo<const char*>(objVal)}; + } else if (objTag == value::TypeTags::Object) { + BSONObjBuilder objBuilder; + bson::convertToBsonObj(objBuilder, value::getObjectView(objVal)); + _tempObj = objBuilder.obj(); + return _tempObj; + } else { + MONGO_UNREACHABLE_TASSERT(7103703); + } + }(); + - // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least - // one key into 'keySet', so this assertion should always be true. - tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty()); + _sortKeyGen.setCollator(collator); + // Use the generic API for getting an array of bson elements representing the + // sort key. + _sortKeyGen.generateSortKeyComponentVector(bsonObj, &_localBsonEltStorage); - // Return the first KeyString in the set. - return std::move(*keySet.extract_sequence().begin()); + // Convert this array of BSONElements into the SBE SortKeyComponentVector type. + { + size_t i = 0; + for (auto& elt : _localBsonEltStorage) { + _localSortKeyComponentStorage.elts[i++] = bson::convertFrom<true>(elt); + } + } + return &_localSortKeyComponentStorage; } BtreeKeyGenerator SortSpec::initKeyGen() const { - tassert( - 5037003, "SortSpec should not be passed an empty sort pattern", !_sortPattern.isEmpty()); + tassert(5037003, + "SortSpec should not be passed an empty sort pattern", + !_sortPatternBson.isEmpty()); std::vector<const char*> fields; std::vector<BSONElement> fixed; - for (auto&& elem : _sortPattern) { + for (auto&& elem : _sortPatternBson) { fields.push_back(elem.fieldName()); // BtreeKeyGenerator's constructor's first parameter (the 'fields' vector) and second @@ -183,15 +215,15 @@ BtreeKeyGenerator SortSpec::initKeyGen() const { const bool isSparse = false; auto version = KeyString::Version::kLatestVersion; - auto ordering = Ordering::make(_sortPattern); + auto ordering = Ordering::make(_sortPatternBson); return {std::move(fields), std::move(fixed), isSparse, version, ordering}; } size_t SortSpec::getApproximateSize() const { auto size = sizeof(SortSpec); - size += _sortPattern.isOwned() ? _sortPattern.objsize() : 0; - size += _keyGen.getApproximateSize() - sizeof(_keyGen); + size += _sortKeyGen.getApproximateSize(); + size += _sortPatternBson.isOwned() ? _sortPatternBson.objsize() : 0; return size; } diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index 8332008796e..01aee0de8ff 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -126,6 +126,10 @@ enum class TypeTags : uint8_t { MinKey, MaxKey, + // Pointer to sort key component vector. This type is always owned within a SortSpec, + // and is never created, copied, or destroyed by SBE. + sortKeyComponentVector, + // Pointer to a struct with data necessary to read values from a columnstore index cell. The // values of this type are fully owned by the column_scan stage and are never created, cloned or // destroyed by SBE. @@ -919,6 +923,13 @@ private: ValueSetType _values; }; +/* + * A vector of values representing a sort key. The values are NOT owned by this object. + */ +struct SortKeyComponentVector { + std::vector<std::pair<value::TypeTags, value::Value>> elts; +}; + bool operator==(const ArraySet& lhs, const ArraySet& rhs); bool operator!=(const ArraySet& lhs, const ArraySet& rhs); @@ -1260,6 +1271,10 @@ inline IndexBounds* getIndexBoundsView(Value val) noexcept { return reinterpret_cast<IndexBounds*>(val); } +inline SortKeyComponentVector* getSortKeyComponentVectorView(Value v) noexcept { + return reinterpret_cast<SortKeyComponentVector*>(v); +} + inline sbe::value::CsiCell* getCsiCellView(Value val) noexcept { return reinterpret_cast<sbe::value::CsiCell*>(val); } diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp index 2405f698f3f..8a78037c0ff 100644 --- a/src/mongo/db/exec/sbe/values/value_printer.cpp +++ b/src/mongo/db/exec/sbe/values/value_printer.cpp @@ -166,6 +166,9 @@ void ValuePrinter<T>::writeTagToStream(TypeTags tag) { case TypeTags::csiCell: stream << "csiCell"; break; + case TypeTags::sortKeyComponentVector: + stream << "SortKeyComponentVector"; + break; default: stream << "unknown tag"; break; @@ -539,6 +542,13 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth) case TypeTags::csiCell: stream << "CsiCell(" << getCsiCellView(val) << ")"; break; + case TypeTags::sortKeyComponentVector: + stream << "SortKeyComponentVector("; + for (auto elt : getSortKeyComponentVectorView(val)->elts) { + stream << elt << ", "; + } + stream << ")"; + break; default: MONGO_UNREACHABLE; } diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 860a106668b..fd8a9561c2e 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -4937,27 +4937,52 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGetRegexFlags(Ar return {true, strType, strValue}; } -FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) { +std::pair<value::SortSpec*, CollatorInterface*> ByteCode::generateSortKeyHelper(ArityType arity) { invariant(arity == 2 || arity == 3); auto [ssOwned, ssTag, ssVal] = getFromStack(0); auto [objOwned, objTag, objVal] = getFromStack(1); if (ssTag != value::TypeTags::sortSpec || !value::isObject(objTag)) { - return {false, value::TypeTags::Nothing, 0}; + return {nullptr, nullptr}; } CollatorInterface* collator{nullptr}; if (arity == 3) { auto [collatorOwned, collatorTag, collatorVal] = getFromStack(2); if (collatorTag != value::TypeTags::collator) { - return {false, value::TypeTags::Nothing, 0}; + return {nullptr, nullptr}; } collator = value::getCollatorView(collatorVal); } auto ss = value::getSortSpecView(ssVal); + return {ss, collator}; +} + +FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateCheapSortKey( + ArityType arity) { + auto [sortSpec, collator] = generateSortKeyHelper(arity); + if (!sortSpec) { + return {false, value::TypeTags::Nothing, 0}; + } + + // We "move" the object argument into the sort spec. + auto sortKeyComponentVector = + sortSpec->generateSortKeyComponentVector(moveFromStack(1), collator); + + return {false, + value::TypeTags::sortKeyComponentVector, + value::bitcastFrom<value::SortKeyComponentVector*>(sortKeyComponentVector)}; +} + +FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey(ArityType arity) { + auto [sortSpec, collator] = generateSortKeyHelper(arity); + if (!sortSpec) { + return {false, value::TypeTags::Nothing, 0}; + } - auto obj = [objTag = objTag, objVal = objVal]() { + auto [objOwned, objTag, objVal] = getFromStack(1); + auto bsonObj = [objTag = objTag, objVal = objVal]() { if (objTag == value::TypeTags::bsonObject) { return BSONObj{value::bitcastTo<const char*>(objVal)}; } else if (objTag == value::TypeTags::Object) { @@ -4972,7 +4997,26 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinGenerateSortKey( return {true, value::TypeTags::ksValue, value::bitcastFrom<KeyString::Value*>( - new KeyString::Value(ss->generateSortKey(obj, collator)))}; + new KeyString::Value(sortSpec->generateSortKey(bsonObj, collator)))}; +} + +FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinSortKeyComponentVectorGetElement( + ArityType arity) { + invariant(arity == 2); + + auto [sortVecOwned, sortVecTag, sortVecVal] = getFromStack(0); + auto [idxOwned, idxTag, idxVal] = getFromStack(1); + if (sortVecTag != value::TypeTags::sortKeyComponentVector || + idxTag != value::TypeTags::NumberInt32) { + return {false, value::TypeTags::Nothing, 0}; + } + + auto* sortObj = value::getSortKeyComponentVectorView(sortVecVal); + const auto idxInt32 = value::bitcastTo<int32_t>(idxVal); + + invariant(idxInt32 >= 0 && static_cast<size_t>(idxInt32) < sortObj->elts.size()); + auto [outTag, outVal] = sortObj->elts[idxInt32]; + return {false, outTag, outVal}; } FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinMakeBsonObj(ArityType arity) { @@ -5538,6 +5582,10 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin return builtinFtsMatch(arity); case Builtin::generateSortKey: return builtinGenerateSortKey(arity); + case Builtin::generateCheapSortKey: + return builtinGenerateCheapSortKey(arity); + case Builtin::sortKeyComponentVectorGetElement: + return builtinSortKeyComponentVectorGetElement(arity); case Builtin::makeBsonObj: return builtinMakeBsonObj(arity); case Builtin::tsSecond: @@ -5773,6 +5821,10 @@ std::string builtinToString(Builtin b) { return "ftsMatch"; case Builtin::generateSortKey: return "generateSortKey"; + case Builtin::generateCheapSortKey: + return "generateCheapSortKey"; + case Builtin::sortKeyComponentVectorGetElement: + return "sortKeyComponentVectorGetElement"; case Builtin::makeBsonObj: return "makeBsonObj"; case Builtin::tsSecond: diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 20dd1983f2f..ab5465221e9 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -726,6 +726,9 @@ enum class Builtin : uint8_t { hash, ftsMatch, generateSortKey, + generateCheapSortKey, + sortKeyComponentVectorGetElement, + makeBsonObj, tsSecond, tsIncrement, @@ -1378,7 +1381,11 @@ private: FastTuple<bool, value::TypeTags, value::Value> builtinGetRegexFlags(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinHash(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinFtsMatch(ArityType arity); + std::pair<value::SortSpec*, CollatorInterface*> generateSortKeyHelper(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinGenerateSortKey(ArityType arity); + FastTuple<bool, value::TypeTags, value::Value> builtinGenerateCheapSortKey(ArityType arity); + FastTuple<bool, value::TypeTags, value::Value> builtinSortKeyComponentVectorGetElement( + ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinMakeBsonObj(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinTsSecond(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinTsIncrement(ArityType arity); diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index 34bfbb7f61a..31c8fe4758c 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -36,27 +36,29 @@ #include "mongo/util/overloaded_visitor.h" namespace mongo { - -SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator) - : _collator(collator), _sortPattern(std::move(sortPattern)) { +namespace { +BSONObj createSortSpecWithoutMeta(const SortPattern& sortPattern) { BSONObjBuilder btreeBob; - size_t nFields = 0; - - for (auto&& part : _sortPattern) { + for (auto&& part : sortPattern) { if (part.fieldPath) { btreeBob.append(part.fieldPath->fullPath(), part.isAscending ? 1 : -1); - ++nFields; } } + return btreeBob.obj(); +} +} // namespace - // The fake index key pattern used to generate Btree keys. - _sortSpecWithoutMeta = btreeBob.obj(); - _sortHasMeta = nFields < _sortPattern.size(); - +SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator) + : _collator(collator), + _sortPattern(std::move(sortPattern)), + _sortSpecWithoutMeta(createSortSpecWithoutMeta(_sortPattern)), + _ordering(Ordering::make(_sortSpecWithoutMeta)), + _sortHasMeta(static_cast<size_t>(_sortSpecWithoutMeta.nFields()) < _sortPattern.size()) { // If we're just sorting by meta, don't bother creating an index key generator. if (_sortSpecWithoutMeta.isEmpty()) { return; } + const auto nFields = _sortSpecWithoutMeta.nFields(); // We'll need to treat arrays as if we were to create an index over them. that is, we may need // to unnest the first level and consider each array element to decide the sort order. In order @@ -69,11 +71,24 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf } constexpr bool isSparse = false; - _indexKeyGen = std::make_unique<BtreeKeyGenerator>(fieldNames, - fixed, - isSparse, - KeyString::Version::kLatestVersion, - Ordering::make(_sortSpecWithoutMeta)); + _indexKeyGen = std::make_unique<BtreeKeyGenerator>( + fieldNames, fixed, isSparse, KeyString::Version::kLatestVersion, _ordering); + + { + // TODO SERVER-74725: Remove this. + std::set<std::string> fieldNameSet; + for (auto& fn : fieldNames) { + fieldNameSet.insert(fn); + } + _sortHasRepeatKey = (fieldNameSet.size() != fieldNames.size()); + } + + if (!_sortHasMeta && !_sortHasRepeatKey) { + size_t i = 0; + for (auto&& keyPart : _sortPattern) { + _sortKeyTreeRoot.addSortPatternPart(&keyPart, 0, i++); + } + } } Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const { @@ -84,6 +99,21 @@ Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const { return computeSortKeyFromIndexKey(wsm); } +KeyString::Value SortKeyGenerator::computeSortKeyString(const BSONObj& obj) const { + // This function could be optimized to have a fast path for the non-array case. + + KeyStringSet keySet; + SharedBufferFragmentBuilder allocator(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); + const bool skipMultikey = false; + MultikeyPaths* multikeyPaths = nullptr; + _indexKeyGen->getKeys(allocator, obj, skipMultikey, &keySet, multikeyPaths, _collator); + + // When 'isSparse' is false, BtreeKeyGenerator::getKeys() is guaranteed to insert at least + // one key into 'keySet', so this assertion should always be true. + tassert(5037000, "BtreeKeyGenerator failed to generate key", !keySet.empty()); + return std::move(*keySet.extract_sequence().begin()); +} + Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& member) const { invariant(member.getState() == WorkingSetMember::RID_AND_IDX); invariant(!_sortHasMeta); @@ -314,4 +344,110 @@ Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc, extractKeyWithArray(doc, metadata)); } + +bool SortKeyGenerator::fastFillOutSortKeyParts(const BSONObj& bson, + const SortKeyGenerator::SortKeyTreeNode& tree, + std::vector<BSONElement>* out) { + // Walk the BSON once and fill out the sort key parts. + // Returns false if array is encountered and fallback is needed. + BSONObjIterator it(bson); + + size_t childrenVisited = 0; + while (it.more()) { + auto elt = it.next(); + + const SortKeyGenerator::SortKeyTreeNode* childNode = nullptr; + for (auto& child : tree.children) { + auto fieldNameSd = elt.fieldNameStringData(); + if (tree.bloomFilter.maybeContains(fieldNameSd.rawData(), fieldNameSd.size())) { + // Could use a hash table, but sort patterns are small so brute force search is good + // enough. + if (child->name == fieldNameSd) { + childNode = child.get(); + break; + } + } + } + + if (childNode) { + if (elt.type() == BSONType::Array) { + // Slow path needed. + return false; + } + + if (childNode->part) { + (*out)[childNode->partIdx] = elt; + } + + if (elt.type() == BSONType::Object && !childNode->children.empty()) { + if (!fastFillOutSortKeyParts(elt.embeddedObject(), *childNode, out)) { + return false; + } + } + + ++childrenVisited; + if (childrenVisited == tree.children.size()) { + return true; + } + } + } + + return true; +} + +void SortKeyGenerator::generateSortKeyComponentVector(const BSONObj& bson, + std::vector<BSONElement>* eltsOut) { + tassert(7103704, "Sort cannot have meta", !_sortHasMeta); + tassert(7103701, "Sort cannot have repeat keys", !_sortHasRepeatKey); + tassert(7103702, "Cannot pass null as eltsOut", eltsOut); + const static BSONObj kBsonWithNull = BSON("" << NullLabeler{}); + const static BSONElement kNullElement = kBsonWithNull.firstElement(); + + for (size_t i = 0; i < eltsOut->size(); ++i) { + // In MQL missing sort keys are substituted with JS null. + (*eltsOut)[i] = kNullElement; + } + + const bool fastPathSucceeded = fastFillOutSortKeyParts(bson, _sortKeyTreeRoot, eltsOut); + + if (fastPathSucceeded) { + if (_collator) { + // Eventually we could reuse the buffer from _localObjStorage here. + BSONObjBuilder bob; + for (auto elt : *eltsOut) { + CollationIndexKey::collationAwareIndexKeyAppend(elt, _collator, &bob); + } + _localObjStorage = bob.obj(); + + { + size_t i = 0; + for (auto elt : _localObjStorage) { + (*eltsOut)[i++] = elt; + } + } + } + + // Fast path succeeded, we're done. + return; + } + + // Slow, generic path, used when an array was encountered. + Document doc(bson); + DocumentMetadataFields meta; + + Value sortKeyVal = computeSortKeyFromDocument(doc, meta); + + Document outDoc(std::vector<std::pair<StringData, Value>>{{""_sd, sortKeyVal}}); + _localObjStorage = outDoc.toBson(); + if (_localObjStorage.firstElement().type() == BSONType::Array) { + size_t i = 0; + for (auto& elt : _localObjStorage.firstElement().embeddedObject()) { + (*eltsOut)[i++] = elt; + } + } else { + (*eltsOut)[0] = _localObjStorage.firstElement(); + } +} + + } // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h index 40e270dac21..e2fe5ed0dcf 100644 --- a/src/mongo/db/index/sort_key_generator.h +++ b/src/mongo/db/index/sort_key_generator.h @@ -30,11 +30,14 @@ #pragma once #include "mongo/bson/bsonobj.h" +#include "mongo/db/exec/field_name_bloom_filter.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/index/btree_key_generator.h" #include "mongo/db/operation_context.h" #include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/query/sort_pattern.h" +#include "mongo/db/storage/key_string.h" +#include "mongo/util/assert_util.h" namespace mongo { @@ -58,6 +61,18 @@ public: Value computeSortKey(const WorkingSetMember&) const; /** + * Computes a KeyString that can be used as the sort key for this object. + */ + KeyString::Value computeSortKeyString(const BSONObj& bson) const; + + /** + * Determines all of the portions of the sort key for the given document and populates the + * output vector with their positions. The results in the output vector are valid until the + * next call. + */ + void generateSortKeyComponentVector(const BSONObj& obj, std::vector<BSONElement>* out); + + /** * Returns the sort key for the input 'doc' as a Value or throws if no key could be generated. * When the sort pattern has multiple components, the resulting sort key is an Array-typed Value * with one element for each component. For sort patterns with just one component, the sort key @@ -79,7 +94,68 @@ public: return _sortPattern; } + void setCollator(const CollatorInterface* c) { + _collator = c; + } + + size_t getApproximateSize() const { + return sizeof(this) + (_indexKeyGen ? _indexKeyGen->getApproximateSize() : 0) + + _sortKeyTreeRoot.getApproximateSize() + _sortSpecWithoutMeta.objsize() + + _localObjStorage.objsize(); + } + private: + /* Tree representation of the sort key pattern. E.g. {a.b:1, a.x: 1} + * a + * / \ + * b x + * This is used for the fast path where the sort key is generated in one pass over the bson. + * This is only used when the sort pattern does not include a $meta. + */ + struct SortKeyTreeNode { + std::string name; + const SortPattern::SortPatternPart* part = nullptr; // Pointer into the SortPattern. + std::vector<std::unique_ptr<SortKeyTreeNode>> children; + size_t partIdx = 0; + + // Tracks field names of the children. We use this when scanning the bson to quickly skip + // over fields irrelevant to the sort pattern. + FieldNameBloomFilter<> bloomFilter; + + // Adds a new component of the sort pattern to the tree. + void addSortPatternPart(const SortPattern::SortPatternPart* part, + const size_t pathIdx, + const size_t partIdx) { + if (pathIdx == part->fieldPath->getPathLength()) { + tassert(7103700, "Invalid sort tree", !this->part); + this->part = part; + this->partIdx = partIdx; + return; + } + + for (auto& c : children) { + // Check if we already have a child with the same prefix. + if (c->name == part->fieldPath->getFieldName(pathIdx)) { + c->addSortPatternPart(part, pathIdx + 1, partIdx); + return; + } + } + + children.push_back(std::make_unique<SortKeyTreeNode>()); + children.back()->name = part->fieldPath->getFieldName(pathIdx).toString(); + children.back()->addSortPatternPart(part, pathIdx + 1, partIdx); + bloomFilter.insert(children.back()->name.c_str(), children.back()->name.size()); + } + + size_t getApproximateSize() const { + size_t size = sizeof(this) + name.size(); + for (auto& c : children) { + size += c->getApproximateSize(); + } + return size; + } + }; + // Returns the sort key for the input 'doc' as a Value. // // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but @@ -135,6 +211,12 @@ private: // should always be sorted with the simple (i.e. binary) collation. Value getCollationComparisonKey(const Value& val) const; + // Fast path for reading a BSON obj and computing the sort key. Returns true on success, false + // when an array is encountered and the slow path needs to be used instead. + bool fastFillOutSortKeyParts(const BSONObj& bson, + const SortKeyGenerator::SortKeyTreeNode& tree, + std::vector<BSONElement>* out); + const CollatorInterface* _collator = nullptr; SortPattern _sortPattern; @@ -142,11 +224,18 @@ private: // The sort pattern with any $meta sort components stripped out, since the underlying index key // generator does not understand $meta sort. BSONObj _sortSpecWithoutMeta; + Ordering _ordering; // If we're not sorting with a $meta value we can short-cut some work. bool _sortHasMeta = false; + bool _sortHasRepeatKey = false; + std::unique_ptr<BtreeKeyGenerator> _indexKeyGen; + + // Used for fastFillOutSortKeyParts()/extractSortKeyParts(). + SortKeyTreeNode _sortKeyTreeRoot; + BSONObj _localObjStorage; }; } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index a63b0e7b516..9178d70e863 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -1364,27 +1364,66 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::move(stage), std::move(sortExpressions), root->nodeId()); } else { - // Handle the case where two or more parts of the sort pattern have a common prefix. - orderBy = _slotIdGenerator.generateMultiple(1); - direction = {sbe::value::SortDirection::Ascending}; + // When there's no limit on the sort, the dominating factor is number of comparisons + // (nlogn). A sort with a limit of k requires only nlogk comparisons. When k is small, the + // number of key generations (n) can actually dominate the runtime. So for all top-k sorts + // we use a "cheap" sort key: it's cheaper to construct but more expensive to compare. The + // assumption here is that k << n. + + StringData sortKeyGenerator = sn->limit ? "generateCheapSortKey" : "generateSortKey"; auto sortSpec = std::make_unique<sbe::value::SortSpec>(sn->pattern); auto sortSpecExpr = makeConstant(sbe::value::TypeTags::sortSpec, sbe::value::bitcastFrom<sbe::value::SortSpec*>(sortSpec.release())); + const auto fullSortKeySlot = _slotIdGenerator.generate(); + // generateSortKey() will handle the parallel arrays check and sort key traversal for us, // so we don't need to generate our own sort key traversal logic in the SBE plan. stage = sbe::makeProjectStage(std::move(stage), root->nodeId(), - orderBy[0], - collatorSlot ? makeFunction("generateSortKey", + fullSortKeySlot, + collatorSlot ? makeFunction(sortKeyGenerator, std::move(sortSpecExpr), makeVariable(outputSlotId), makeVariable(*collatorSlot)) - : makeFunction("generateSortKey", + : makeFunction(sortKeyGenerator, std::move(sortSpecExpr), makeVariable(outputSlotId))); + + if (sortKeyGenerator == "generateSortKey") { + // In this case generateSortKey() produces a mem-comparable KeyString so we use for + // the comparison. We always sort in ascending order because the KeyString takes the + // ordering into account. + orderBy = {fullSortKeySlot}; + direction = {sbe::value::SortDirection::Ascending}; + } else { + // Generate the cheap sort key represented as an array then extract each component into + // a slot: + // + // sort [s1, s2] [asc, dsc] ... + // project s1=getElement(fullSortKey,0), s2=getElement(fullSortKey,1) + // project fullSortKey=generateSortKeyCheap(bson) + sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> prjSlotToExprMap; + + int i = 0; + for (const auto& part : sortPattern) { + auto sortKeySlot = _slotIdGenerator.generate(); + + orderBy.push_back(sortKeySlot); + direction.push_back(part.isAscending ? sbe::value::SortDirection::Ascending + : sbe::value::SortDirection::Descending); + + prjSlotToExprMap[sortKeySlot] = + makeFunction("sortKeyComponentVectorGetElement", + makeVariable(fullSortKeySlot), + makeConstant(sbe::value::TypeTags::NumberInt32, i)); + ++i; + } + stage = sbe::makeS<sbe::ProjectStage>( + std::move(stage), std::move(prjSlotToExprMap), root->nodeId()); + } } // Slots for sort stage to forward to parent stage. Values in these slots are not used during |