summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuoxin Xu <ruoxin.xu@mongodb.com>2022-03-15 15:40:06 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-15 16:40:02 +0000
commit63a4f1e011860e34b8a29b6171e3cbdd58f15c8d (patch)
tree9040f6bb43278ae541d1c647d5361b3234abb7a5
parente43a65ada2aea66c9ab21134bbd72a71fa581758 (diff)
downloadmongo-63a4f1e011860e34b8a29b6171e3cbdd58f15c8d.tar.gz
SERVER-61421 Change SBE plan cache key encoding to reflect auto-parameterization
-rw-r--r--buildscripts/resmokeconfig/fully_disabled_feature_flags.yml1
-rw-r--r--jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js421
-rw-r--r--src/mongo/db/matcher/expression.h2
-rw-r--r--src/mongo/db/pipeline/expression_walker.h2
-rw-r--r--src/mongo/db/query/canonical_query.cpp9
-rw-r--r--src/mongo/db/query/canonical_query_encoder.cpp436
-rw-r--r--src/mongo/db/query/get_executor.cpp16
-rw-r--r--src/mongo/db/query/query_feature_flags.idl7
-rw-r--r--src/mongo/db/query/sbe_stage_builder.h1
9 files changed, 881 insertions, 14 deletions
diff --git a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
index c86f4bae027..036bf0029fd 100644
--- a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
+++ b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
@@ -11,3 +11,4 @@
# Disable featureFlagSBELookupPushdown until integration tests can pass with basic SBE
# lookup translation.
- featureFlagSBELookupPushdown
+- featureFlagAutoParameterization
diff --git a/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js b/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js
new file mode 100644
index 00000000000..13074276090
--- /dev/null
+++ b/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js
@@ -0,0 +1,421 @@
+/**
+ * Tests that auto-parameterized collection scan plans are correctly stored and in the SBE plan
+ * cache, and that they can be correctly recovered from the cache with new parameter values.
+ *
+ * TODO SERVER-64137: Move this test to jstests/core/ once we no longer need to specially configure
+ * 'featureFlagAutoParameterization'.
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");
+load("jstests/libs/sbe_util.js");
+
+// TODO SERVER-64315: re-enable this test. This test depends on caching single solution plans,
+// which is disabled temporarily due to a bug.
+if (true) {
+ jsTest.log("This test is temporarily disabled");
+ return;
+}
+
+const conn = MongoRunner.runMongod({setParameter: "featureFlagAutoParameterization=true"});
+assert.neq(conn, null, "mongod failed to start up");
+
+const dbName = jsTestName();
+const db = conn.getDB(dbName);
+
+// This test is specifically verifying the behavior of the SBE plan cache. So if either the SBE plan
+// cache or SBE itself are disabled, bail out.
+if (!checkSBEEnabled(db, ["featureFlagSbePlanCache"])) {
+ jsTestLog("Skipping test because either SBE engine or SBE plan cache is disabled");
+ MongoRunner.stopMongod(conn);
+ return;
+}
+
+assert.commandWorked(db.dropDatabase());
+const coll = db.coll;
+
+let data = [
+ {_id: 0, a: 1, c: "foo"},
+ {_id: 1, a: 2, c: "foo"},
+ {_id: 2, a: 3, c: "foo"},
+ {_id: 3, a: 4, c: "foo"},
+ {_id: 4, a: 4, c: "foo"},
+ {_id: 5, a: [3, 4, 5, 6], c: "foo"},
+ {_id: 6, a: [3, 5, 8], c: "foo"},
+ {_id: 7, c: "foo"},
+ {_id: 8, a: [], c: "foo"},
+ {_id: 9, a: undefined, c: "foo"},
+ {_id: 10, a: null, c: "foo"},
+ {_id: 11, a: [{b: 3}, {b: 4}], c: "foo"},
+ {_id: 12, a: [{b: 5}, {b: 6}], c: "foo"},
+ {_id: 13, a: "foo", c: "foo"},
+ {_id: 14, a: /foo/, c: "foo"},
+ {_id: 15, a: "zbarz", c: "foo"},
+ // A 12-byte BinData where the last 6 bits are 1 and all preceding bits are 0.
+ {_id: 16, a: BinData(0, "AAAAAAAAAAAAAAA/"), c: "foo"},
+];
+assert.commandWorked(coll.insert(data));
+
+function assertSbePlanCacheEntryExists(cacheKey) {
+ const entries =
+ coll.aggregate([{$planCacheStats: {}}, {$match: {planCacheKey: cacheKey}}]).toArray();
+ assert.eq(entries.length, 1, entries);
+ const entry = entries[0];
+ // The version:"2" field indicates that this is an SBE plan cache entry.
+ assert.eq(entry.version, "2", entry);
+ assert.eq(entry.planCacheKey, cacheKey, entry);
+ // Since there is only ever one possible candidate plan (collection scan), we expect the cache
+ // entry to be both active and pinned.
+ assert.eq(entry.isActive, true, entry);
+ assert.eq(entry.isPinned, true, entry);
+}
+
+// Given a document with the format {query: <filter>, projection: <projection>, sort: <sort>}, where
+// each field is optional, runs the corresponding find command and returns the results as an array.
+function runFindCommandFromShapeDoc(shape) {
+ let cursor = coll.find(shape.query, shape.projection);
+ if (shape.sort) {
+ cursor = cursor.sort(shape.sort);
+ }
+ return cursor.toArray();
+}
+
+/**
+ * Runs a single end-to-end test case for auto-parameterization of collection scan plans.
+ * - 'shape1' is a description of a find command as a document {query: <filter>, projection:
+ * <projection>, sort: <sort>}.
+ * - 'expectedResults1' is an array containing the results expected from running 'shape1' against
+ * the test collection. This function verifies that the actual results match the expected ones.
+ * The order of the result set is not considered significant (since not all test queries specify a
+ * sort).
+ * - 'shape2' is a second find command, expressed with the same format as 'shape1' and whose
+ * results are compared to `expectedResults2'. Again, the order of the result set is not
+ * significant.
+ * - If 'sameCacheKey' is true, then verifies that 'shape1' and 'shape2' have the same plan cache
+ * key using $planCacheStats. Otherwise, verifies that the two test queries have different plan
+ * cache keys.
+ *
+ * Also uses $planCacheStats to verify that the expected cache entries are created.
+ */
+function runTest(shape1, expectedResults1, shape2, expectedResults2, sameCacheKey) {
+ // Flush the cache before starting the test to make sure we are starting from a clean slate.
+ coll.getPlanCache().clear();
+
+ for (let shape of [shape1, shape2]) {
+ shape.collection = coll;
+ shape.db = db;
+ }
+
+ const cacheKey1 = getPlanCacheKeyFromShape(shape1);
+ const cacheKey2 = getPlanCacheKeyFromShape(shape2);
+ if (sameCacheKey) {
+ assert.eq(cacheKey1, cacheKey2, "expected SBE plan cache keys to be the same");
+ } else {
+ assert.neq(cacheKey1, cacheKey2, "expected SBE plan cache keys to be different");
+ }
+
+ // Run each query twice in order to make sure that each query still returns the same results
+ // after the state of the cache has been altered.
+ [...Array(2)].forEach(() => {
+ const actualResults1 = runFindCommandFromShapeDoc(shape1);
+ assert.sameMembers(actualResults1, expectedResults1, shape1);
+ assertSbePlanCacheEntryExists(cacheKey1);
+
+ const actualResults2 = runFindCommandFromShapeDoc(shape2);
+ assert.sameMembers(actualResults2, expectedResults2, shape2);
+ assertSbePlanCacheEntryExists(cacheKey2);
+ });
+}
+
+// Test basic auto-parameterization of $eq.
+runTest({query: {a: 1}},
+ [{_id: 0, a: 1, c: "foo"}],
+ {query: {a: 4}},
+ [{_id: 3, a: 4, c: "foo"}, {_id: 4, a: 4, c: "foo"}, {_id: 5, a: [3, 4, 5, 6], c: "foo"}],
+ true);
+
+// Test that different projections result in different cache keys.
+runTest({query: {a: 1}, projection: {_id: 0}},
+ [{a: 1, c: "foo"}],
+ {query: {a: 4}, projection: {_id: 0, c: 0}},
+ [{a: 4}, {a: 4}, {a: [3, 4, 5, 6]}],
+ false);
+
+// Test that different sorts result in different cache keys.
+runTest({query: {a: 1}, sort: {_id: -1}, projection: {c: 0}},
+ [{_id: 0, a: 1}],
+ {query: {a: 4}, sort: {_id: 1}, projection: {c: 0}},
+ [{_id: 3, a: 4}, {_id: 4, a: 4}, {_id: 5, a: [3, 4, 5, 6]}],
+ false);
+
+// Queries on different paths should result in different cache keys.
+runTest({query: {a: 1}},
+ [{_id: 0, a: 1, c: "foo"}],
+ {query: {"a.b": 6}},
+ [{_id: 12, a: [{b: 5}, {b: 6}], c: "foo"}],
+ false);
+
+// Test $eq:null queries do not get auto-parameterized.
+runTest({query: {a: 1}, projection: {c: 0}},
+ [{_id: 0, a: 1}],
+ {query: {a: null}, projection: {c: 0}},
+ [{_id: 7}, {_id: 9, a: undefined}, {_id: 10, a: null}],
+ false);
+
+// Test basic auto-parameterization of $lt.
+runTest({query: {a: {$lt: 3}}, projection: {c: 0}},
+ [{_id: 0, a: 1}, {_id: 1, a: 2}],
+ {query: {a: {$lt: 4}}, projection: {c: 0}},
+ [
+ {_id: 0, a: 1},
+ {_id: 1, a: 2},
+ {_id: 2, a: 3},
+ {_id: 5, a: [3, 4, 5, 6]},
+ {_id: 6, a: [3, 5, 8]}
+ ],
+ true);
+
+// Test basic auto-parameterization of $lte.
+runTest({query: {a: {$lte: 2}}, projection: {c: 0}},
+ [{_id: 0, a: 1}, {_id: 1, a: 2}],
+ {query: {a: {$lte: 3}}, projection: {c: 0}},
+ [
+ {_id: 0, a: 1},
+ {_id: 1, a: 2},
+ {_id: 2, a: 3},
+ {_id: 5, a: [3, 4, 5, 6]},
+ {_id: 6, a: [3, 5, 8]}
+ ],
+ true);
+
+// Test basic auto-parameterization of $gt.
+runTest({query: {a: {$gt: 5}}, projection: {c: 0}},
+ [{_id: 5, a: [3, 4, 5, 6]}, {_id: 6, a: [3, 5, 8]}],
+ {query: {a: {$gt: 6}}, projection: {c: 0}},
+ [{_id: 6, a: [3, 5, 8]}],
+ true);
+
+// Test basic auto-parameterization of $gte.
+runTest({query: {a: {$gte: 6}}, projection: {c: 0}},
+ [{_id: 5, a: [3, 4, 5, 6]}, {_id: 6, a: [3, 5, 8]}],
+ {query: {a: {$gte: 7}}, projection: {c: 0}},
+ [{_id: 6, a: [3, 5, 8]}],
+ true);
+
+// Test basic auto-parameterization of $bitsAllClear.
+runTest({query: {a: {$bitsAllClear: [0, 3]}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 16}],
+ {query: {a: {$bitsAllClear: [0, 2, 65]}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 6}, {_id: 16}],
+ true);
+
+// Test basic auto-parameterization of $bitsAllSet.
+runTest({query: {a: {$bitsAllSet: [0, 2]}}, projection: {_id: 1}},
+ [{_id: 5}, {_id: 6}],
+ {query: {a: {$bitsAllSet: [0, 1]}}, projection: {_id: 1}},
+ [{_id: 2}, {_id: 5}, {_id: 6}],
+ true);
+
+// Test basic auto-parameterization of $bitsAnyClear.
+runTest({query: {a: {$bitsAnyClear: 1}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 16}],
+ {query: {a: {$bitsAnyClear: 3}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 16}],
+ true);
+
+// Test basic auto-parameterization of $bitsAnySet.
+runTest({query: {a: {$bitsAnySet: 1}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 2}, {_id: 5}, {_id: 6}],
+ {query: {a: {$bitsAnySet: 3}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}],
+ true);
+
+// Auto-parameterization of bit-test operators should work even if looking past 64 bits is required
+// in order to match against binary data.
+runTest({query: {a: {$bitsAllSet: [0, 94]}}, projection: {_id: 1}},
+ [],
+ {query: {a: {$bitsAllSet: [88, 89, 90, 91, 92, 93]}}, projection: {_id: 1}},
+ [{_id: 16}],
+ true);
+
+// Test auto-parameterization of $elemMatch object.
+runTest({query: {a: {$elemMatch: {b: {$gt: 3, $lt: 5}}}}, projection: {_id: 1}},
+ [{_id: 11}],
+ {query: {a: {$elemMatch: {b: {$gt: 4, $lt: 6}}}}, projection: {_id: 1}},
+ [{_id: 12}],
+ true);
+
+// Test a conjunction with two auto-parameterized predicates.
+runTest({query: {$and: [{a: 3}, {a: 6}]}, projection: {_id: 1}},
+ [{_id: 5}],
+ {query: {$and: [{a: 5}, {a: 8}]}, projection: {_id: 1}},
+ [{_id: 6}],
+ true);
+
+// Test a disjunction with two auto-parameterized predicates.
+runTest({query: {$or: [{a: 3}, {a: 6}]}, projection: {_id: 1}},
+ [{_id: 2}, {_id: 5}, {_id: 6}],
+ {query: {$or: [{a: 1}, {a: 4}]}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 3}, {_id: 4}, {_id: 5}],
+ true);
+
+// Test a $nor with three auto-parmeterized child predicates.
+runTest({query: {$nor: [{a: 3}, {a: 6}], a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 4}],
+ {query: {$nor: [{a: 1}, {a: 4}], a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 2}, {_id: 6}],
+ true);
+
+// Test an auto-parameterized $ne.
+runTest({query: {$and: [{a: {$ne: 4}}, {a: {$type: "number"}}]}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 6}],
+ {query: {$and: [{a: {$ne: 6}}, {a: {$type: "number"}}]}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 6}],
+ true);
+
+// Test an auto-parameterized $not-$lt.
+runTest({query: {$and: [{a: {$not: {$lt: 4}}}, {a: {$type: "number"}}]}, projection: {_id: 1}},
+ [{_id: 3}, {_id: 4}],
+ {query: {$and: [{a: {$not: {$lt: 3}}}, {a: {$type: "number"}}]}, projection: {_id: 1}},
+ [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ true);
+
+// Verify that $exists queries are not auto-parameterized, meaning that $exists:true and
+// $exists:false queries get different cache keys.
+runTest({query: {a: {$exists: true}}, projection: {_id: 1}},
+ [
+ {_id: 0},
+ {_id: 1},
+ {_id: 2},
+ {_id: 3},
+ {_id: 4},
+ {_id: 5},
+ {_id: 6},
+ {_id: 8},
+ {_id: 9},
+ {_id: 10},
+ {_id: 11},
+ {_id: 12},
+ {_id: 13},
+ {_id: 14},
+ {_id: 15},
+ {_id: 16},
+ ],
+ {query: {a: {$exists: false}}, projection: {_id: 1}},
+ [{_id: 7}],
+ false);
+
+// Test that comparisons expressed as $expr are not auto-parameterized.
+runTest({query: {$expr: {$eq: ["$a", 3]}}, projection: {_id: 1}},
+ [{_id: 2}],
+ {query: {$expr: {$eq: ["$a", 4]}}, projection: {_id: 1}},
+ [{_id: 3}, {_id: 4}],
+ false);
+runTest({query: {$expr: {$lt: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}],
+ {query: {$expr: {$lt: ["$a", 4]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}],
+ false);
+runTest({query: {$expr: {$lte: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}],
+ {query: {$expr: {$lte: ["$a", 4]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}],
+ false);
+runTest({query: {$expr: {$gt: ["$a", 2]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ {query: {$expr: {$gt: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ false);
+runTest({query: {$expr: {$gte: ["$a", 2]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ {query: {$expr: {$gte: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}},
+ [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ false);
+
+// Test that the entire list of $in values is treated as a parameter.
+runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}],
+ {query: {a: {$in: [1, 2, 3, 4]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ true);
+
+// Adding a null value to an $in inhibits auto-parameterization.
+runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}],
+ {query: {a: {$in: [1, 2, null]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 7}, {_id: 9}, {_id: 10}],
+ false);
+
+// Adding a regex to an $in inhibits auto-parameterization.
+runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}],
+ {query: {a: {$in: [1, 2, /foo/]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 13}],
+ false);
+
+// Adding a nested array to an $in inhibits auto-parameterization.
+runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}],
+ {query: {a: {$in: [1, 2, []]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 8}],
+ false);
+
+// Test auto-parameterization of $mod.
+runTest({query: {a: {$mod: [2, 0]}}, projection: {_id: 1}},
+ [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ {query: {a: {$mod: [3, 1]}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 3}, {_id: 4}, {_id: 5}],
+ true);
+
+// Test auto-parameterization of $size.
+runTest({query: {a: {$size: 4}}, projection: {_id: 1}},
+ [{_id: 5}],
+ {query: {a: {$size: 2}}, projection: {_id: 1}},
+ [{_id: 11}, {_id: 12}],
+ true);
+
+// Test auto-parameterization of $where.
+runTest({query: {$where: "this.a == 1;"}, projection: {_id: 1}},
+ [{_id: 0}],
+ {query: {$where: "this.a == 2;"}, projection: {_id: 1}},
+ [{_id: 1}],
+ true);
+// $where queries use the same plan regardless of the exact JS code.
+runTest({
+ query: {
+ $where: function() {
+ const date = new Date();
+ return this.c == 1;
+ }
+ },
+ projection: {_id: 1}
+},
+ [],
+ {query: {$where: "this.a == 2;"}, projection: {_id: 1}},
+ [{_id: 1}],
+ true);
+
+// Test auto-parameterization of $regex.
+runTest({query: {a: /foo/}, projection: {_id: 1}},
+ [{_id: 13}, {_id: 14}],
+ {query: {a: {$regex: "bar"}}, projection: {_id: 1}},
+ [{_id: 15}],
+ true);
+
+// Test auto-parameterization of $type.
+runTest({query: {a: {$type: "double"}}, projection: {_id: 1}},
+ [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}],
+ {query: {a: {$type: ["string", "regex"]}}, projection: {_id: 1}},
+ [{_id: 13}, {_id: 14}, {_id: 15}],
+ true);
+
+// Test that $type is not auto-parameterized when the type set includes "array".
+runTest({query: {a: {$type: ["string", "regex"]}}, projection: {_id: 1}},
+ [{_id: 13}, {_id: 14}, {_id: 15}],
+ {query: {a: {$type: ["string", "array"]}}, projection: {_id: 1}},
+ [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 11}, {_id: 12}, {_id: 13}, {_id: 15}],
+ false);
+
+MongoRunner.stopMongod(conn);
+}());
diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h
index 812781a3ad9..b844f33dd75 100644
--- a/src/mongo/db/matcher/expression.h
+++ b/src/mongo/db/matcher/expression.h
@@ -208,7 +208,7 @@ public:
using Iterator = MatchExpressionIterator<false>;
using ConstIterator = MatchExpressionIterator<true>;
- using InputParamId = int64_t;
+ using InputParamId = int32_t;
/**
* Tracks the information needed to generate a document validation error for a
diff --git a/src/mongo/db/pipeline/expression_walker.h b/src/mongo/db/pipeline/expression_walker.h
index 6aec35ed7af..b492bd0ad2b 100644
--- a/src/mongo/db/pipeline/expression_walker.h
+++ b/src/mongo/db/pipeline/expression_walker.h
@@ -60,7 +60,7 @@ template <typename Node, typename Walker>
inline constexpr auto hasVoidPreVisit =
stdx::is_detected_exact_v<void, PreVisit, Walker, MaybeConstPtr<std::is_const_v<Node>, Node>>;
/**
- * hasVoidPreVisit is a template variable indicating whether such a pointer-returning member
+ * hasPtrPreVisit is a template variable indicating whether such a pointer-returning member
* function exists for a given Walker type when called on a pointer to our Node type.
*/
template <typename Node, typename Walker>
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index ec8c798ca5e..82d496b1ba9 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -203,10 +203,11 @@ Status CanonicalQuery::init(OperationContext* opCtx,
}
auto unavailableMetadata = validStatus.getValue();
_root = MatchExpression::normalize(std::move(root));
- if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
- // TODO SERVER-61421: Call 'MatchExpression::parameterize()' on '_root' in order to enable
- // auto-parameterization. This cannot be done until the SBE plan cache code is prepared to
- // deal with auto-parameterized queries.
+ if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
+ feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) {
+ // Both the SBE plan cache and auto-parameterization are enabled. Add parameter markers to
+ // the appropriate match expression leaf nodes.
+ MatchExpression::parameterize(_root.get());
}
// The tree must always be valid after normalization.
dassert(isValid(_root.get(), *_findCommand).isOK());
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp
index 4892e62fdc2..9bd348b20fe 100644
--- a/src/mongo/db/query/canonical_query_encoder.cpp
+++ b/src/mongo/db/query/canonical_query_encoder.cpp
@@ -29,16 +29,22 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-#include "mongo/platform/basic.h"
-
#include "mongo/db/query/canonical_query_encoder.h"
#include <boost/iterator/transform_iterator.hpp>
#include "mongo/base/simple_string_data_comparator.h"
#include "mongo/db/matcher/expression_array.h"
+#include "mongo/db/matcher/expression_expr.h"
#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/db/matcher/expression_text.h"
+#include "mongo/db/matcher/expression_text_noop.h"
+#include "mongo/db/matcher/expression_where.h"
+#include "mongo/db/matcher/expression_where_noop.h"
#include "mongo/db/query/projection.h"
+#include "mongo/db/query/query_feature_flags_gen.h"
+#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/db/query/tree_walker.h"
#include "mongo/logv2/log.h"
#include "mongo/util/base64.h"
@@ -78,11 +84,35 @@ const char kEncodeRegexFlagsSeparator = '/';
const char kEncodeSortSection = '~';
const char kEncodeEngineSection = '@';
+// These special bytes are used in the encoding of auto-parameterized match expressions in the SBE
+// plan cache key.
+
+// Precedes the id number of a parameter marker.
+const char kEncodeParamMarker = '?';
+// Precedes the encoding of a constant when that constant has not been auto-paramterized. The
+// constant is typically encoded as a BSON type byte followed by a BSON value (without the
+// BSONElement's field name).
+const char kEncodeConstantLiteralMarker = ':';
+
+/**
+ * AppendChar provides the compiler with a type for a "appendChar(...)" member function.
+ */
+template <class BuilderType>
+using AppendChar = decltype(std::declval<BuilderType>().appendChar(std::declval<char>()));
+
+/**
+ * hasAppendChar is a template variable indicating whether such a void-returning member function
+ * exists for a 'BuilderType'.
+ */
+template <typename BuilderType>
+inline constexpr auto hasAppendChar = stdx::is_detected_exact_v<void, AppendChar, BuilderType>;
+
/**
- * Encode user-provided string. Cache key delimiters seen in the
- * user string are escaped with a backslash.
+ * Encode user-provided string. Cache key delimiters seen in the user string are escaped with a
+ * backslash.
*/
-void encodeUserString(StringData s, StringBuilder* keyBuilder) {
+template <class BuilderType>
+void encodeUserString(StringData s, BuilderType* builder) {
for (size_t i = 0; i < s.size(); ++i) {
char c = s[i];
switch (c) {
@@ -95,11 +125,21 @@ void encodeUserString(StringData s, StringBuilder* keyBuilder) {
case kEncodeRegexFlagsSeparator:
case kEncodeSortSection:
case kEncodeEngineSection:
+ case kEncodeParamMarker:
+ case kEncodeConstantLiteralMarker:
case '\\':
- *keyBuilder << '\\';
+ if constexpr (hasAppendChar<BuilderType>) {
+ builder->appendChar('\\');
+ } else {
+ *builder << '\\';
+ }
// Fall through to default case.
default:
- *keyBuilder << c;
+ if constexpr (hasAppendChar<BuilderType>) {
+ builder->appendChar(c);
+ } else {
+ *builder << c;
+ }
}
}
}
@@ -621,7 +661,380 @@ CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) {
return keyBuilder.str();
}
+namespace {
+/**
+ * A visitor intended for use in combination with the corresponding walker class below to encode a
+ * 'MatchExpression' into the SBE plan cache key.
+ *
+ * Handles potentially parameterized queries, in which case parameter markers are encoded into the
+ * cache key in place of the actual constant values.
+ */
+class MatchExpressionSbePlanCacheKeySerializationVisitor final
+ : public MatchExpressionConstVisitor {
+public:
+ explicit MatchExpressionSbePlanCacheKeySerializationVisitor(BufBuilder* builder)
+ : _builder(builder) {
+ invariant(_builder);
+ }
+
+ void visit(const BitsAllClearMatchExpression* expr) final {
+ encodeBitTestExpression(expr);
+ }
+ void visit(const BitsAllSetMatchExpression* expr) final {
+ encodeBitTestExpression(expr);
+ }
+ void visit(const BitsAnyClearMatchExpression* expr) final {
+ encodeBitTestExpression(expr);
+ }
+ void visit(const BitsAnySetMatchExpression* expr) final {
+ encodeBitTestExpression(expr);
+ }
+
+ void visit(const ExistsMatchExpression* expr) final {
+ encodeRhs(expr);
+ }
+
+ void visit(const ExprMatchExpression* expr) final {
+ encodeFull(expr);
+ }
+
+ void visit(const EqualityMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+ void visit(const GTEMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+ void visit(const GTMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+ void visit(const LTEMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+ void visit(const LTMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+
+ void visit(const InMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+
+ void visit(const ModMatchExpression* expr) final {
+ auto divisorParam = expr->getDivisorInputParamId();
+ auto remainderParam = expr->getRemainderInputParamId();
+ if (divisorParam) {
+ tassert(6142105,
+ "$mod expression had divisor param but not remainder param",
+ remainderParam);
+ encodeParamMarker(*divisorParam);
+ encodeParamMarker(*remainderParam);
+ } else {
+ // TODO SERVER-64137: remove this branch and assert the existence of both params once
+ // auto-parameterization flag is removed.
+ tassert(6142106,
+ "$mod expression had remainder param but not divisor param",
+ !remainderParam);
+ encodeRhs(expr);
+ }
+ }
+
+ void visit(const RegexMatchExpression* expr) final {
+ auto sourceRegexParam = expr->getSourceRegexInputParamId();
+ auto compiledRegexParam = expr->getCompiledRegexInputParamId();
+ if (sourceRegexParam) {
+ tassert(6142107,
+ "regex expression had source param but not compiled param",
+ compiledRegexParam);
+ encodeParamMarker(*sourceRegexParam);
+ encodeParamMarker(*compiledRegexParam);
+ } else {
+ // TODO SERVER-64137: remove this branch and assert the existence of both params once
+ // auto-parameterization flag is removed.
+ tassert(6142108,
+ "regex expression had compiled param but not source param",
+ !compiledRegexParam);
+ encodeRhs(expr);
+ }
+ }
+
+ void visit(const SizeMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+
+ void visit(const TextMatchExpression* expr) final {
+ encodeFull(expr);
+ }
+ void visit(const TextNoOpMatchExpression* expr) final {
+ encodeFull(expr);
+ }
+
+ void visit(const TypeMatchExpression* expr) final {
+ encodeSingleParamPathNode(expr);
+ }
+
+ void visit(const WhereMatchExpression* expr) final {
+ encodeSingleParamNode(expr);
+ }
+ void visit(const WhereNoOpMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142109);
+ }
+
+ /**
+ * Nothing needs to be encoded for these nodes beyond their type, their path (if they have one),
+ * and their children.
+ */
+ void visit(const AlwaysFalseMatchExpression* expr) final {}
+ void visit(const AlwaysTrueMatchExpression* expr) final {}
+ void visit(const AndMatchExpression* expr) final {}
+ void visit(const ElemMatchObjectMatchExpression* matchExpr) final {}
+ void visit(const NorMatchExpression* expr) final {}
+ void visit(const NotMatchExpression* expr) final {}
+ void visit(const OrMatchExpression* expr) final {}
+ // The 'InternalExpr*' match expressions are generated internally from a $expr, so they do not
+ // need to contribute anything else to the cache key.
+ void visit(const InternalExprEqMatchExpression* expr) final {}
+ void visit(const InternalExprGTEMatchExpression* expr) final {}
+ void visit(const InternalExprGTMatchExpression* expr) final {}
+ void visit(const InternalExprLTEMatchExpression* expr) final {}
+ void visit(const InternalExprLTMatchExpression* expr) final {}
+
+ /**
+ * These node types are not yet supported in SBE.
+ */
+ void visit(const ElemMatchValueMatchExpression* matchExpr) final {
+ MONGO_UNREACHABLE_TASSERT(6142110);
+ }
+ void visit(const GeoMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142111);
+ }
+ void visit(const GeoNearMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142112);
+ }
+ void visit(const InternalBucketGeoWithinMatchExpression* expr) final {
+ // This is only used for time-series collections, but SBE isn't yet used for querying
+ // time-series collections.
+ MONGO_UNREACHABLE_TASSERT(6142113);
+ }
+ void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142114);
+ }
+ void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142115);
+ }
+ void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142116);
+ }
+ void visit(const InternalSchemaBinDataSubTypeExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142117);
+ }
+ void visit(const InternalSchemaCondMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142118);
+ }
+ void visit(const InternalSchemaEqMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142119);
+ }
+ void visit(const InternalSchemaFmodMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142120);
+ }
+ void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142121);
+ }
+ void visit(const InternalSchemaMaxItemsMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142122);
+ }
+ void visit(const InternalSchemaMaxLengthMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142123);
+ }
+ void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142124);
+ }
+ void visit(const InternalSchemaMinItemsMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142125);
+ }
+ void visit(const InternalSchemaMinLengthMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142126);
+ }
+ void visit(const InternalSchemaMinPropertiesMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142127);
+ }
+ void visit(const InternalSchemaObjectMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142128);
+ }
+ void visit(const InternalSchemaRootDocEqMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142129);
+ }
+ void visit(const InternalSchemaTypeExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142130);
+ }
+ void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142131);
+ }
+ void visit(const InternalSchemaXorMatchExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142132);
+ }
+ // Used in the implementation of geoNear, which is not yet supported in SBE.
+ void visit(const TwoDPtInAnnulusExpression* expr) final {
+ MONGO_UNREACHABLE_TASSERT(6142133);
+ }
+
+private:
+ /**
+ * Encodes a 'PathMatchExpression' node of type T whose constant can be replaced with a single
+ * parameter marker. If the parameter marker is not present, encodes the node's BSON constant
+ * into the cache key.
+ */
+ template <typename T,
+ typename = std::enable_if_t<std::is_convertible_v<T*, PathMatchExpression*>>>
+ void encodeSingleParamPathNode(const T* expr) {
+ if (expr->getInputParamId()) {
+ encodeParamMarker(*expr->getInputParamId());
+ } else {
+ encodeRhs(expr);
+ }
+ }
+
+ /**
+ * Encodes a non-path 'MatchExpression' node of type T whose constant can be replaced with a
+ * single parameter marker. If the parameter marker is not present, encodes the entire node into
+ * the cache key.
+ */
+ template <typename T>
+ void encodeSingleParamNode(const T* expr) {
+ static_assert(!std::is_convertible_v<T*, PathMatchExpression*>);
+ if (expr->getInputParamId()) {
+ encodeParamMarker(*expr->getInputParamId());
+ } else {
+ encodeFull(expr);
+ }
+ }
+
+ void encodeBitTestExpression(const BitTestMatchExpression* expr) {
+ auto bitPositionsParam = expr->getBitPositionsParamId();
+ auto bitMaskParam = expr->getBitMaskParamId();
+ if (bitPositionsParam) {
+ tassert(6142100,
+ "bit-test expression had bit positions param but not bitmask param",
+ bitMaskParam);
+ encodeParamMarker(*bitPositionsParam);
+ encodeParamMarker(*bitMaskParam);
+ } else {
+ // TODO SERVER-64137: remove this branch and assert the existence of both params once
+ // auto-parameterization flag is removed.
+ tassert(6142101,
+ "bit-test expression had bitmask param but not bit positions param",
+ !bitMaskParam);
+ encodeRhs(expr);
+ }
+ }
+
+ /**
+ * Adds a special parameter marker byte to the cache key, followed by a four byte integer for
+ * the parameter id.
+ */
+ void encodeParamMarker(MatchExpression::InputParamId paramId) {
+ _builder->appendChar(kEncodeParamMarker);
+ _builder->appendNum(paramId);
+ }
+
+ /**
+ * For path match expressions which can be written as {"some.path": {$operator: <RHS>}}, encodes
+ * the right-hand side portion of the expression verbatim. Illegal to call if 'expr' has a
+ * parameter marker.
+ */
+ void encodeRhs(const PathMatchExpression* expr) {
+ encodeHelper(expr->getSerializedRightHandSide());
+ }
+
+ /**
+ * Similar to 'encodeRhs()' above, but for non-path match expressions. In this case, rather than
+ * encode just the right-hand side, we call 'serialize()' to get a serialized version of the
+ * full expression, and encode the result into the plan cache key. Illegal to call if 'expr' has
+ * a parameter marker.
+ */
+ void encodeFull(const MatchExpression* expr) {
+ encodeHelper(expr->serialize());
+ }
+
+ void encodeHelper(BSONObj toEncode) {
+ tassert(6142102, "expected object to encode to be non-empty", !toEncode.isEmpty());
+ BSONObjIterator objIter{toEncode};
+ BSONElement firstElem = objIter.next();
+ tassert(6142103, "expected object to encode to have exactly one element", !objIter.more());
+ encodeBsonValue(firstElem);
+ }
+
+ /**
+ * Encodes a special byte to mark a constant, followed by a byte for the BSON type of 'elem',
+ * followed by the bytes of the value part of 'elem' (for types that have such a value).
+ *
+ * Note that the element's field name is not encoded, just the type and value.
+ */
+ void encodeBsonValue(BSONElement elem) {
+ _builder->appendChar(kEncodeConstantLiteralMarker);
+ _builder->appendChar(elem.type());
+ _builder->appendBuf(elem.value(), elem.valuesize());
+ }
+
+ BufBuilder* const _builder;
+};
+
+/**
+ * A tree walker which walks a 'MatchExpression' tree and encodes the corresponding portion of the
+ * SBE plan cache key into 'builder'.
+ *
+ * Handles potentially parameterized queries, in which case parameter markers are encoded into the
+ * cache key in place of the actual constant values.
+ */
+class MatchExpressionSbePlanCacheKeySerializationWalker {
+public:
+ explicit MatchExpressionSbePlanCacheKeySerializationWalker(BufBuilder* builder)
+ : _builder{builder}, _visitor{_builder} {
+ invariant(_builder);
+ }
+
+ void preVisit(const MatchExpression* expr) {
+ // Encode the type of the node as well as the path (if there is a non-empty path).
+ _builder->appendStr(encodeMatchType(expr->matchType()));
+ encodeUserString(expr->path(), _builder);
+
+ // The node encodes itself, and then its children.
+ expr->acceptVisitor(&_visitor);
+
+ if (expr->numChildren() > 0) {
+ _builder->appendChar(kEncodeChildrenBegin);
+ }
+ }
+
+ void inVisit(long count, const MatchExpression* expr) {
+ _builder->appendChar(kEncodeChildrenSeparator);
+ }
+
+ void postVisit(const MatchExpression* expr) {
+ if (expr->numChildren() > 0) {
+ _builder->appendChar(kEncodeChildrenEnd);
+ }
+ }
+
+private:
+ BufBuilder* const _builder;
+ MatchExpressionSbePlanCacheKeySerializationVisitor _visitor;
+};
+
+/**
+ * Given a 'matchExpr' which may have parameter markers, encodes a key into 'builder' with the
+ * following property: Two match expression trees which are identical after auto-parameterization
+ * have the same key, otherwise the keys must differ.
+ */
+void encodeKeyForAutoParameterizedMatchSBE(MatchExpression* matchExpr, BufBuilder* builder) {
+ MatchExpressionSbePlanCacheKeySerializationWalker walker{builder};
+ tree_walker::walk<true, MatchExpression>(matchExpr, &walker);
+}
+} // namespace
+
std::string encodeSBE(const CanonicalQuery& cq) {
+ tassert(6142104,
+ "attempting to encode SBE plan cache key for SBE-incompatible query",
+ cq.isSbeCompatible());
+
const auto& filter = cq.getQueryObj();
const auto& proj = cq.getFindCommandRequest().getProjection();
const auto& sort = cq.getFindCommandRequest().getSort();
@@ -639,7 +1052,14 @@ std::string encodeSBE(const CanonicalQuery& cq) {
kBufferSizeConstant + (let ? let->objsize() : 0);
BufBuilder bufBuilder(bufSize);
- bufBuilder.appendBuf(filter.objdata(), filter.objsize());
+ if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) {
+ encodeKeyForAutoParameterizedMatchSBE(cq.root(), &bufBuilder);
+ } else {
+ // When auto-parameterization is off, just add the entire filter BSON to the cache key,
+ // including any constants.
+ bufBuilder.appendBuf(filter.objdata(), filter.objsize());
+ }
+
bufBuilder.appendBuf(proj.objdata(), proj.objsize());
// TODO SERVER-62100: No need to encode the entire "let" object.
if (let) {
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 797a61f064e..106889832e8 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -281,6 +281,13 @@ void fillOutIndexEntries(OperationContext* opCtx,
CanonicalQuery* canonicalQuery,
const CollectionPtr& collection,
std::vector<IndexEntry>& entries) {
+ // TODO SERVER-63352: Eliminate this check once we support auto-parameterized index scan plans.
+ if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) {
+ // Indexed plans are not yet supported when auto-parameterization is enabled, so make it
+ // look to the planner like there are no indexes.
+ return;
+ }
+
auto ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
while (ii->more()) {
const IndexCatalogEntry* ice = ii->next();
@@ -984,6 +991,15 @@ protected:
invariant(descriptor);
invariant(plannerParams);
+ // Auto-parameterization currently only works for collection scan plans, but idhack plans
+ // use the _id index. Therefore, we inhibit idhack when auto-parametrization is enabled.
+ //
+ // TODO SERVER-64237: Eliminate this check once we support auto-parameterized ID hack
+ // plans.
+ if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) {
+ return nullptr;
+ }
+
tassert(5536100,
"SBE cannot handle query with metadata",
!_cq->metadataDeps()[DocumentMetadataFields::kSortKey]);
diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl
index b3565da4f5f..b60ea4d04cc 100644
--- a/src/mongo/db/query/query_feature_flags.idl
+++ b/src/mongo/db/query/query_feature_flags.idl
@@ -82,6 +82,13 @@ feature_flags:
cpp_varname: gFeatureFlagSbePlanCache
default: false
+ featureFlagAutoParameterization:
+ description: "Feature flag for enabling auto-parameterization of match expressions. This
+ feature is used to store auto-parameterized plans in the SBE plan cache, so this flag is only
+ meaningful to turn on in combination with 'featureFlagSbePlanCache'."
+ cpp_varname: gFeatureFlagAutoParameterization
+ default: false
+
featureFlagSortArray:
description: "Feature flag for allowing use of the $sortArray aggregation expression"
cpp_varname: gFeatureFlagSortArray
diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h
index f5a0904a773..852f187c6f0 100644
--- a/src/mongo/db/query/sbe_stage_builder.h
+++ b/src/mongo/db/query/sbe_stage_builder.h
@@ -334,6 +334,7 @@ private:
} else {
debugInfo.reset();
}
+ inputParamToSlotMap = other.inputParamToSlotMap;
}
};