diff options
-rw-r--r-- | buildscripts/resmokeconfig/fully_disabled_feature_flags.yml | 1 | ||||
-rw-r--r-- | jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js | 421 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_walker.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query_encoder.cpp | 436 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/query/query_feature_flags.idl | 7 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.h | 1 |
9 files changed, 881 insertions, 14 deletions
diff --git a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml index c86f4bae027..036bf0029fd 100644 --- a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml +++ b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml @@ -11,3 +11,4 @@ # Disable featureFlagSBELookupPushdown until integration tests can pass with basic SBE # lookup translation. - featureFlagSBELookupPushdown +- featureFlagAutoParameterization diff --git a/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js b/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js new file mode 100644 index 00000000000..13074276090 --- /dev/null +++ b/jstests/noPassthrough/sbe_plan_cache_autoparameterize_collscan.js @@ -0,0 +1,421 @@ +/** + * Tests that auto-parameterized collection scan plans are correctly stored and in the SBE plan + * cache, and that they can be correctly recovered from the cache with new parameter values. + * + * TODO SERVER-64137: Move this test to jstests/core/ once we no longer need to specially configure + * 'featureFlagAutoParameterization'. + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); +load("jstests/libs/sbe_util.js"); + +// TODO SERVER-64315: re-enable this test. This test depends on caching single solution plans, +// which is disabled temporarily due to a bug. +if (true) { + jsTest.log("This test is temporarily disabled"); + return; +} + +const conn = MongoRunner.runMongod({setParameter: "featureFlagAutoParameterization=true"}); +assert.neq(conn, null, "mongod failed to start up"); + +const dbName = jsTestName(); +const db = conn.getDB(dbName); + +// This test is specifically verifying the behavior of the SBE plan cache. So if either the SBE plan +// cache or SBE itself are disabled, bail out. +if (!checkSBEEnabled(db, ["featureFlagSbePlanCache"])) { + jsTestLog("Skipping test because either SBE engine or SBE plan cache is disabled"); + MongoRunner.stopMongod(conn); + return; +} + +assert.commandWorked(db.dropDatabase()); +const coll = db.coll; + +let data = [ + {_id: 0, a: 1, c: "foo"}, + {_id: 1, a: 2, c: "foo"}, + {_id: 2, a: 3, c: "foo"}, + {_id: 3, a: 4, c: "foo"}, + {_id: 4, a: 4, c: "foo"}, + {_id: 5, a: [3, 4, 5, 6], c: "foo"}, + {_id: 6, a: [3, 5, 8], c: "foo"}, + {_id: 7, c: "foo"}, + {_id: 8, a: [], c: "foo"}, + {_id: 9, a: undefined, c: "foo"}, + {_id: 10, a: null, c: "foo"}, + {_id: 11, a: [{b: 3}, {b: 4}], c: "foo"}, + {_id: 12, a: [{b: 5}, {b: 6}], c: "foo"}, + {_id: 13, a: "foo", c: "foo"}, + {_id: 14, a: /foo/, c: "foo"}, + {_id: 15, a: "zbarz", c: "foo"}, + // A 12-byte BinData where the last 6 bits are 1 and all preceding bits are 0. + {_id: 16, a: BinData(0, "AAAAAAAAAAAAAAA/"), c: "foo"}, +]; +assert.commandWorked(coll.insert(data)); + +function assertSbePlanCacheEntryExists(cacheKey) { + const entries = + coll.aggregate([{$planCacheStats: {}}, {$match: {planCacheKey: cacheKey}}]).toArray(); + assert.eq(entries.length, 1, entries); + const entry = entries[0]; + // The version:"2" field indicates that this is an SBE plan cache entry. + assert.eq(entry.version, "2", entry); + assert.eq(entry.planCacheKey, cacheKey, entry); + // Since there is only ever one possible candidate plan (collection scan), we expect the cache + // entry to be both active and pinned. + assert.eq(entry.isActive, true, entry); + assert.eq(entry.isPinned, true, entry); +} + +// Given a document with the format {query: <filter>, projection: <projection>, sort: <sort>}, where +// each field is optional, runs the corresponding find command and returns the results as an array. +function runFindCommandFromShapeDoc(shape) { + let cursor = coll.find(shape.query, shape.projection); + if (shape.sort) { + cursor = cursor.sort(shape.sort); + } + return cursor.toArray(); +} + +/** + * Runs a single end-to-end test case for auto-parameterization of collection scan plans. + * - 'shape1' is a description of a find command as a document {query: <filter>, projection: + * <projection>, sort: <sort>}. + * - 'expectedResults1' is an array containing the results expected from running 'shape1' against + * the test collection. This function verifies that the actual results match the expected ones. + * The order of the result set is not considered significant (since not all test queries specify a + * sort). + * - 'shape2' is a second find command, expressed with the same format as 'shape1' and whose + * results are compared to `expectedResults2'. Again, the order of the result set is not + * significant. + * - If 'sameCacheKey' is true, then verifies that 'shape1' and 'shape2' have the same plan cache + * key using $planCacheStats. Otherwise, verifies that the two test queries have different plan + * cache keys. + * + * Also uses $planCacheStats to verify that the expected cache entries are created. + */ +function runTest(shape1, expectedResults1, shape2, expectedResults2, sameCacheKey) { + // Flush the cache before starting the test to make sure we are starting from a clean slate. + coll.getPlanCache().clear(); + + for (let shape of [shape1, shape2]) { + shape.collection = coll; + shape.db = db; + } + + const cacheKey1 = getPlanCacheKeyFromShape(shape1); + const cacheKey2 = getPlanCacheKeyFromShape(shape2); + if (sameCacheKey) { + assert.eq(cacheKey1, cacheKey2, "expected SBE plan cache keys to be the same"); + } else { + assert.neq(cacheKey1, cacheKey2, "expected SBE plan cache keys to be different"); + } + + // Run each query twice in order to make sure that each query still returns the same results + // after the state of the cache has been altered. + [...Array(2)].forEach(() => { + const actualResults1 = runFindCommandFromShapeDoc(shape1); + assert.sameMembers(actualResults1, expectedResults1, shape1); + assertSbePlanCacheEntryExists(cacheKey1); + + const actualResults2 = runFindCommandFromShapeDoc(shape2); + assert.sameMembers(actualResults2, expectedResults2, shape2); + assertSbePlanCacheEntryExists(cacheKey2); + }); +} + +// Test basic auto-parameterization of $eq. +runTest({query: {a: 1}}, + [{_id: 0, a: 1, c: "foo"}], + {query: {a: 4}}, + [{_id: 3, a: 4, c: "foo"}, {_id: 4, a: 4, c: "foo"}, {_id: 5, a: [3, 4, 5, 6], c: "foo"}], + true); + +// Test that different projections result in different cache keys. +runTest({query: {a: 1}, projection: {_id: 0}}, + [{a: 1, c: "foo"}], + {query: {a: 4}, projection: {_id: 0, c: 0}}, + [{a: 4}, {a: 4}, {a: [3, 4, 5, 6]}], + false); + +// Test that different sorts result in different cache keys. +runTest({query: {a: 1}, sort: {_id: -1}, projection: {c: 0}}, + [{_id: 0, a: 1}], + {query: {a: 4}, sort: {_id: 1}, projection: {c: 0}}, + [{_id: 3, a: 4}, {_id: 4, a: 4}, {_id: 5, a: [3, 4, 5, 6]}], + false); + +// Queries on different paths should result in different cache keys. +runTest({query: {a: 1}}, + [{_id: 0, a: 1, c: "foo"}], + {query: {"a.b": 6}}, + [{_id: 12, a: [{b: 5}, {b: 6}], c: "foo"}], + false); + +// Test $eq:null queries do not get auto-parameterized. +runTest({query: {a: 1}, projection: {c: 0}}, + [{_id: 0, a: 1}], + {query: {a: null}, projection: {c: 0}}, + [{_id: 7}, {_id: 9, a: undefined}, {_id: 10, a: null}], + false); + +// Test basic auto-parameterization of $lt. +runTest({query: {a: {$lt: 3}}, projection: {c: 0}}, + [{_id: 0, a: 1}, {_id: 1, a: 2}], + {query: {a: {$lt: 4}}, projection: {c: 0}}, + [ + {_id: 0, a: 1}, + {_id: 1, a: 2}, + {_id: 2, a: 3}, + {_id: 5, a: [3, 4, 5, 6]}, + {_id: 6, a: [3, 5, 8]} + ], + true); + +// Test basic auto-parameterization of $lte. +runTest({query: {a: {$lte: 2}}, projection: {c: 0}}, + [{_id: 0, a: 1}, {_id: 1, a: 2}], + {query: {a: {$lte: 3}}, projection: {c: 0}}, + [ + {_id: 0, a: 1}, + {_id: 1, a: 2}, + {_id: 2, a: 3}, + {_id: 5, a: [3, 4, 5, 6]}, + {_id: 6, a: [3, 5, 8]} + ], + true); + +// Test basic auto-parameterization of $gt. +runTest({query: {a: {$gt: 5}}, projection: {c: 0}}, + [{_id: 5, a: [3, 4, 5, 6]}, {_id: 6, a: [3, 5, 8]}], + {query: {a: {$gt: 6}}, projection: {c: 0}}, + [{_id: 6, a: [3, 5, 8]}], + true); + +// Test basic auto-parameterization of $gte. +runTest({query: {a: {$gte: 6}}, projection: {c: 0}}, + [{_id: 5, a: [3, 4, 5, 6]}, {_id: 6, a: [3, 5, 8]}], + {query: {a: {$gte: 7}}, projection: {c: 0}}, + [{_id: 6, a: [3, 5, 8]}], + true); + +// Test basic auto-parameterization of $bitsAllClear. +runTest({query: {a: {$bitsAllClear: [0, 3]}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 16}], + {query: {a: {$bitsAllClear: [0, 2, 65]}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 6}, {_id: 16}], + true); + +// Test basic auto-parameterization of $bitsAllSet. +runTest({query: {a: {$bitsAllSet: [0, 2]}}, projection: {_id: 1}}, + [{_id: 5}, {_id: 6}], + {query: {a: {$bitsAllSet: [0, 1]}}, projection: {_id: 1}}, + [{_id: 2}, {_id: 5}, {_id: 6}], + true); + +// Test basic auto-parameterization of $bitsAnyClear. +runTest({query: {a: {$bitsAnyClear: 1}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 16}], + {query: {a: {$bitsAnyClear: 3}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 16}], + true); + +// Test basic auto-parameterization of $bitsAnySet. +runTest({query: {a: {$bitsAnySet: 1}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 2}, {_id: 5}, {_id: 6}], + {query: {a: {$bitsAnySet: 3}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 5}, {_id: 6}], + true); + +// Auto-parameterization of bit-test operators should work even if looking past 64 bits is required +// in order to match against binary data. +runTest({query: {a: {$bitsAllSet: [0, 94]}}, projection: {_id: 1}}, + [], + {query: {a: {$bitsAllSet: [88, 89, 90, 91, 92, 93]}}, projection: {_id: 1}}, + [{_id: 16}], + true); + +// Test auto-parameterization of $elemMatch object. +runTest({query: {a: {$elemMatch: {b: {$gt: 3, $lt: 5}}}}, projection: {_id: 1}}, + [{_id: 11}], + {query: {a: {$elemMatch: {b: {$gt: 4, $lt: 6}}}}, projection: {_id: 1}}, + [{_id: 12}], + true); + +// Test a conjunction with two auto-parameterized predicates. +runTest({query: {$and: [{a: 3}, {a: 6}]}, projection: {_id: 1}}, + [{_id: 5}], + {query: {$and: [{a: 5}, {a: 8}]}, projection: {_id: 1}}, + [{_id: 6}], + true); + +// Test a disjunction with two auto-parameterized predicates. +runTest({query: {$or: [{a: 3}, {a: 6}]}, projection: {_id: 1}}, + [{_id: 2}, {_id: 5}, {_id: 6}], + {query: {$or: [{a: 1}, {a: 4}]}, projection: {_id: 1}}, + [{_id: 0}, {_id: 3}, {_id: 4}, {_id: 5}], + true); + +// Test a $nor with three auto-parmeterized child predicates. +runTest({query: {$nor: [{a: 3}, {a: 6}], a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 3}, {_id: 4}], + {query: {$nor: [{a: 1}, {a: 4}], a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 2}, {_id: 6}], + true); + +// Test an auto-parameterized $ne. +runTest({query: {$and: [{a: {$ne: 4}}, {a: {$type: "number"}}]}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 6}], + {query: {$and: [{a: {$ne: 6}}, {a: {$type: "number"}}]}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 6}], + true); + +// Test an auto-parameterized $not-$lt. +runTest({query: {$and: [{a: {$not: {$lt: 4}}}, {a: {$type: "number"}}]}, projection: {_id: 1}}, + [{_id: 3}, {_id: 4}], + {query: {$and: [{a: {$not: {$lt: 3}}}, {a: {$type: "number"}}]}, projection: {_id: 1}}, + [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + true); + +// Verify that $exists queries are not auto-parameterized, meaning that $exists:true and +// $exists:false queries get different cache keys. +runTest({query: {a: {$exists: true}}, projection: {_id: 1}}, + [ + {_id: 0}, + {_id: 1}, + {_id: 2}, + {_id: 3}, + {_id: 4}, + {_id: 5}, + {_id: 6}, + {_id: 8}, + {_id: 9}, + {_id: 10}, + {_id: 11}, + {_id: 12}, + {_id: 13}, + {_id: 14}, + {_id: 15}, + {_id: 16}, + ], + {query: {a: {$exists: false}}, projection: {_id: 1}}, + [{_id: 7}], + false); + +// Test that comparisons expressed as $expr are not auto-parameterized. +runTest({query: {$expr: {$eq: ["$a", 3]}}, projection: {_id: 1}}, + [{_id: 2}], + {query: {$expr: {$eq: ["$a", 4]}}, projection: {_id: 1}}, + [{_id: 3}, {_id: 4}], + false); +runTest({query: {$expr: {$lt: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}], + {query: {$expr: {$lt: ["$a", 4]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}], + false); +runTest({query: {$expr: {$lte: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}], + {query: {$expr: {$lte: ["$a", 4]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}], + false); +runTest({query: {$expr: {$gt: ["$a", 2]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + {query: {$expr: {$gt: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + false); +runTest({query: {$expr: {$gte: ["$a", 2]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + {query: {$expr: {$gte: ["$a", 3]}, a: {$type: "number"}}, projection: {_id: 1}}, + [{_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + false); + +// Test that the entire list of $in values is treated as a parameter. +runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}], + {query: {a: {$in: [1, 2, 3, 4]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + true); + +// Adding a null value to an $in inhibits auto-parameterization. +runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}], + {query: {a: {$in: [1, 2, null]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 7}, {_id: 9}, {_id: 10}], + false); + +// Adding a regex to an $in inhibits auto-parameterization. +runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}], + {query: {a: {$in: [1, 2, /foo/]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 13}], + false); + +// Adding a nested array to an $in inhibits auto-parameterization. +runTest({query: {a: {$in: [1, 2]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}], + {query: {a: {$in: [1, 2, []]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 8}], + false); + +// Test auto-parameterization of $mod. +runTest({query: {a: {$mod: [2, 0]}}, projection: {_id: 1}}, + [{_id: 1}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + {query: {a: {$mod: [3, 1]}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 3}, {_id: 4}, {_id: 5}], + true); + +// Test auto-parameterization of $size. +runTest({query: {a: {$size: 4}}, projection: {_id: 1}}, + [{_id: 5}], + {query: {a: {$size: 2}}, projection: {_id: 1}}, + [{_id: 11}, {_id: 12}], + true); + +// Test auto-parameterization of $where. +runTest({query: {$where: "this.a == 1;"}, projection: {_id: 1}}, + [{_id: 0}], + {query: {$where: "this.a == 2;"}, projection: {_id: 1}}, + [{_id: 1}], + true); +// $where queries use the same plan regardless of the exact JS code. +runTest({ + query: { + $where: function() { + const date = new Date(); + return this.c == 1; + } + }, + projection: {_id: 1} +}, + [], + {query: {$where: "this.a == 2;"}, projection: {_id: 1}}, + [{_id: 1}], + true); + +// Test auto-parameterization of $regex. +runTest({query: {a: /foo/}, projection: {_id: 1}}, + [{_id: 13}, {_id: 14}], + {query: {a: {$regex: "bar"}}, projection: {_id: 1}}, + [{_id: 15}], + true); + +// Test auto-parameterization of $type. +runTest({query: {a: {$type: "double"}}, projection: {_id: 1}}, + [{_id: 0}, {_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}], + {query: {a: {$type: ["string", "regex"]}}, projection: {_id: 1}}, + [{_id: 13}, {_id: 14}, {_id: 15}], + true); + +// Test that $type is not auto-parameterized when the type set includes "array". +runTest({query: {a: {$type: ["string", "regex"]}}, projection: {_id: 1}}, + [{_id: 13}, {_id: 14}, {_id: 15}], + {query: {a: {$type: ["string", "array"]}}, projection: {_id: 1}}, + [{_id: 5}, {_id: 6}, {_id: 8}, {_id: 11}, {_id: 12}, {_id: 13}, {_id: 15}], + false); + +MongoRunner.stopMongod(conn); +}()); diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 812781a3ad9..b844f33dd75 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -208,7 +208,7 @@ public: using Iterator = MatchExpressionIterator<false>; using ConstIterator = MatchExpressionIterator<true>; - using InputParamId = int64_t; + using InputParamId = int32_t; /** * Tracks the information needed to generate a document validation error for a diff --git a/src/mongo/db/pipeline/expression_walker.h b/src/mongo/db/pipeline/expression_walker.h index 6aec35ed7af..b492bd0ad2b 100644 --- a/src/mongo/db/pipeline/expression_walker.h +++ b/src/mongo/db/pipeline/expression_walker.h @@ -60,7 +60,7 @@ template <typename Node, typename Walker> inline constexpr auto hasVoidPreVisit = stdx::is_detected_exact_v<void, PreVisit, Walker, MaybeConstPtr<std::is_const_v<Node>, Node>>; /** - * hasVoidPreVisit is a template variable indicating whether such a pointer-returning member + * hasPtrPreVisit is a template variable indicating whether such a pointer-returning member * function exists for a given Walker type when called on a pointer to our Node type. */ template <typename Node, typename Walker> diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index ec8c798ca5e..82d496b1ba9 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -203,10 +203,11 @@ Status CanonicalQuery::init(OperationContext* opCtx, } auto unavailableMetadata = validStatus.getValue(); _root = MatchExpression::normalize(std::move(root)); - if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) { - // TODO SERVER-61421: Call 'MatchExpression::parameterize()' on '_root' in order to enable - // auto-parameterization. This cannot be done until the SBE plan cache code is prepared to - // deal with auto-parameterized queries. + if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() && + feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + // Both the SBE plan cache and auto-parameterization are enabled. Add parameter markers to + // the appropriate match expression leaf nodes. + MatchExpression::parameterize(_root.get()); } // The tree must always be valid after normalization. dassert(isValid(_root.get(), *_findCommand).isOK()); diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index 4892e62fdc2..9bd348b20fe 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -29,16 +29,22 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery -#include "mongo/platform/basic.h" - #include "mongo/db/query/canonical_query_encoder.h" #include <boost/iterator/transform_iterator.hpp> #include "mongo/base/simple_string_data_comparator.h" #include "mongo/db/matcher/expression_array.h" +#include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/matcher/expression_text.h" +#include "mongo/db/matcher/expression_text_noop.h" +#include "mongo/db/matcher/expression_where.h" +#include "mongo/db/matcher/expression_where_noop.h" #include "mongo/db/query/projection.h" +#include "mongo/db/query/query_feature_flags_gen.h" +#include "mongo/db/query/query_knobs_gen.h" +#include "mongo/db/query/tree_walker.h" #include "mongo/logv2/log.h" #include "mongo/util/base64.h" @@ -78,11 +84,35 @@ const char kEncodeRegexFlagsSeparator = '/'; const char kEncodeSortSection = '~'; const char kEncodeEngineSection = '@'; +// These special bytes are used in the encoding of auto-parameterized match expressions in the SBE +// plan cache key. + +// Precedes the id number of a parameter marker. +const char kEncodeParamMarker = '?'; +// Precedes the encoding of a constant when that constant has not been auto-paramterized. The +// constant is typically encoded as a BSON type byte followed by a BSON value (without the +// BSONElement's field name). +const char kEncodeConstantLiteralMarker = ':'; + +/** + * AppendChar provides the compiler with a type for a "appendChar(...)" member function. + */ +template <class BuilderType> +using AppendChar = decltype(std::declval<BuilderType>().appendChar(std::declval<char>())); + +/** + * hasAppendChar is a template variable indicating whether such a void-returning member function + * exists for a 'BuilderType'. + */ +template <typename BuilderType> +inline constexpr auto hasAppendChar = stdx::is_detected_exact_v<void, AppendChar, BuilderType>; + /** - * Encode user-provided string. Cache key delimiters seen in the - * user string are escaped with a backslash. + * Encode user-provided string. Cache key delimiters seen in the user string are escaped with a + * backslash. */ -void encodeUserString(StringData s, StringBuilder* keyBuilder) { +template <class BuilderType> +void encodeUserString(StringData s, BuilderType* builder) { for (size_t i = 0; i < s.size(); ++i) { char c = s[i]; switch (c) { @@ -95,11 +125,21 @@ void encodeUserString(StringData s, StringBuilder* keyBuilder) { case kEncodeRegexFlagsSeparator: case kEncodeSortSection: case kEncodeEngineSection: + case kEncodeParamMarker: + case kEncodeConstantLiteralMarker: case '\\': - *keyBuilder << '\\'; + if constexpr (hasAppendChar<BuilderType>) { + builder->appendChar('\\'); + } else { + *builder << '\\'; + } // Fall through to default case. default: - *keyBuilder << c; + if constexpr (hasAppendChar<BuilderType>) { + builder->appendChar(c); + } else { + *builder << c; + } } } } @@ -621,7 +661,380 @@ CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) { return keyBuilder.str(); } +namespace { +/** + * A visitor intended for use in combination with the corresponding walker class below to encode a + * 'MatchExpression' into the SBE plan cache key. + * + * Handles potentially parameterized queries, in which case parameter markers are encoded into the + * cache key in place of the actual constant values. + */ +class MatchExpressionSbePlanCacheKeySerializationVisitor final + : public MatchExpressionConstVisitor { +public: + explicit MatchExpressionSbePlanCacheKeySerializationVisitor(BufBuilder* builder) + : _builder(builder) { + invariant(_builder); + } + + void visit(const BitsAllClearMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAllSetMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAnyClearMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + void visit(const BitsAnySetMatchExpression* expr) final { + encodeBitTestExpression(expr); + } + + void visit(const ExistsMatchExpression* expr) final { + encodeRhs(expr); + } + + void visit(const ExprMatchExpression* expr) final { + encodeFull(expr); + } + + void visit(const EqualityMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const GTEMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const GTMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const LTEMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + void visit(const LTMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const InMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const ModMatchExpression* expr) final { + auto divisorParam = expr->getDivisorInputParamId(); + auto remainderParam = expr->getRemainderInputParamId(); + if (divisorParam) { + tassert(6142105, + "$mod expression had divisor param but not remainder param", + remainderParam); + encodeParamMarker(*divisorParam); + encodeParamMarker(*remainderParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142106, + "$mod expression had remainder param but not divisor param", + !remainderParam); + encodeRhs(expr); + } + } + + void visit(const RegexMatchExpression* expr) final { + auto sourceRegexParam = expr->getSourceRegexInputParamId(); + auto compiledRegexParam = expr->getCompiledRegexInputParamId(); + if (sourceRegexParam) { + tassert(6142107, + "regex expression had source param but not compiled param", + compiledRegexParam); + encodeParamMarker(*sourceRegexParam); + encodeParamMarker(*compiledRegexParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142108, + "regex expression had compiled param but not source param", + !compiledRegexParam); + encodeRhs(expr); + } + } + + void visit(const SizeMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const TextMatchExpression* expr) final { + encodeFull(expr); + } + void visit(const TextNoOpMatchExpression* expr) final { + encodeFull(expr); + } + + void visit(const TypeMatchExpression* expr) final { + encodeSingleParamPathNode(expr); + } + + void visit(const WhereMatchExpression* expr) final { + encodeSingleParamNode(expr); + } + void visit(const WhereNoOpMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142109); + } + + /** + * Nothing needs to be encoded for these nodes beyond their type, their path (if they have one), + * and their children. + */ + void visit(const AlwaysFalseMatchExpression* expr) final {} + void visit(const AlwaysTrueMatchExpression* expr) final {} + void visit(const AndMatchExpression* expr) final {} + void visit(const ElemMatchObjectMatchExpression* matchExpr) final {} + void visit(const NorMatchExpression* expr) final {} + void visit(const NotMatchExpression* expr) final {} + void visit(const OrMatchExpression* expr) final {} + // The 'InternalExpr*' match expressions are generated internally from a $expr, so they do not + // need to contribute anything else to the cache key. + void visit(const InternalExprEqMatchExpression* expr) final {} + void visit(const InternalExprGTEMatchExpression* expr) final {} + void visit(const InternalExprGTMatchExpression* expr) final {} + void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalExprLTMatchExpression* expr) final {} + + /** + * These node types are not yet supported in SBE. + */ + void visit(const ElemMatchValueMatchExpression* matchExpr) final { + MONGO_UNREACHABLE_TASSERT(6142110); + } + void visit(const GeoMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142111); + } + void visit(const GeoNearMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142112); + } + void visit(const InternalBucketGeoWithinMatchExpression* expr) final { + // This is only used for time-series collections, but SBE isn't yet used for querying + // time-series collections. + MONGO_UNREACHABLE_TASSERT(6142113); + } + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142114); + } + void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142115); + } + void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142116); + } + void visit(const InternalSchemaBinDataSubTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142117); + } + void visit(const InternalSchemaCondMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142118); + } + void visit(const InternalSchemaEqMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142119); + } + void visit(const InternalSchemaFmodMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142120); + } + void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142121); + } + void visit(const InternalSchemaMaxItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142122); + } + void visit(const InternalSchemaMaxLengthMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142123); + } + void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142124); + } + void visit(const InternalSchemaMinItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142125); + } + void visit(const InternalSchemaMinLengthMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142126); + } + void visit(const InternalSchemaMinPropertiesMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142127); + } + void visit(const InternalSchemaObjectMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142128); + } + void visit(const InternalSchemaRootDocEqMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142129); + } + void visit(const InternalSchemaTypeExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142130); + } + void visit(const InternalSchemaUniqueItemsMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142131); + } + void visit(const InternalSchemaXorMatchExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142132); + } + // Used in the implementation of geoNear, which is not yet supported in SBE. + void visit(const TwoDPtInAnnulusExpression* expr) final { + MONGO_UNREACHABLE_TASSERT(6142133); + } + +private: + /** + * Encodes a 'PathMatchExpression' node of type T whose constant can be replaced with a single + * parameter marker. If the parameter marker is not present, encodes the node's BSON constant + * into the cache key. + */ + template <typename T, + typename = std::enable_if_t<std::is_convertible_v<T*, PathMatchExpression*>>> + void encodeSingleParamPathNode(const T* expr) { + if (expr->getInputParamId()) { + encodeParamMarker(*expr->getInputParamId()); + } else { + encodeRhs(expr); + } + } + + /** + * Encodes a non-path 'MatchExpression' node of type T whose constant can be replaced with a + * single parameter marker. If the parameter marker is not present, encodes the entire node into + * the cache key. + */ + template <typename T> + void encodeSingleParamNode(const T* expr) { + static_assert(!std::is_convertible_v<T*, PathMatchExpression*>); + if (expr->getInputParamId()) { + encodeParamMarker(*expr->getInputParamId()); + } else { + encodeFull(expr); + } + } + + void encodeBitTestExpression(const BitTestMatchExpression* expr) { + auto bitPositionsParam = expr->getBitPositionsParamId(); + auto bitMaskParam = expr->getBitMaskParamId(); + if (bitPositionsParam) { + tassert(6142100, + "bit-test expression had bit positions param but not bitmask param", + bitMaskParam); + encodeParamMarker(*bitPositionsParam); + encodeParamMarker(*bitMaskParam); + } else { + // TODO SERVER-64137: remove this branch and assert the existence of both params once + // auto-parameterization flag is removed. + tassert(6142101, + "bit-test expression had bitmask param but not bit positions param", + !bitMaskParam); + encodeRhs(expr); + } + } + + /** + * Adds a special parameter marker byte to the cache key, followed by a four byte integer for + * the parameter id. + */ + void encodeParamMarker(MatchExpression::InputParamId paramId) { + _builder->appendChar(kEncodeParamMarker); + _builder->appendNum(paramId); + } + + /** + * For path match expressions which can be written as {"some.path": {$operator: <RHS>}}, encodes + * the right-hand side portion of the expression verbatim. Illegal to call if 'expr' has a + * parameter marker. + */ + void encodeRhs(const PathMatchExpression* expr) { + encodeHelper(expr->getSerializedRightHandSide()); + } + + /** + * Similar to 'encodeRhs()' above, but for non-path match expressions. In this case, rather than + * encode just the right-hand side, we call 'serialize()' to get a serialized version of the + * full expression, and encode the result into the plan cache key. Illegal to call if 'expr' has + * a parameter marker. + */ + void encodeFull(const MatchExpression* expr) { + encodeHelper(expr->serialize()); + } + + void encodeHelper(BSONObj toEncode) { + tassert(6142102, "expected object to encode to be non-empty", !toEncode.isEmpty()); + BSONObjIterator objIter{toEncode}; + BSONElement firstElem = objIter.next(); + tassert(6142103, "expected object to encode to have exactly one element", !objIter.more()); + encodeBsonValue(firstElem); + } + + /** + * Encodes a special byte to mark a constant, followed by a byte for the BSON type of 'elem', + * followed by the bytes of the value part of 'elem' (for types that have such a value). + * + * Note that the element's field name is not encoded, just the type and value. + */ + void encodeBsonValue(BSONElement elem) { + _builder->appendChar(kEncodeConstantLiteralMarker); + _builder->appendChar(elem.type()); + _builder->appendBuf(elem.value(), elem.valuesize()); + } + + BufBuilder* const _builder; +}; + +/** + * A tree walker which walks a 'MatchExpression' tree and encodes the corresponding portion of the + * SBE plan cache key into 'builder'. + * + * Handles potentially parameterized queries, in which case parameter markers are encoded into the + * cache key in place of the actual constant values. + */ +class MatchExpressionSbePlanCacheKeySerializationWalker { +public: + explicit MatchExpressionSbePlanCacheKeySerializationWalker(BufBuilder* builder) + : _builder{builder}, _visitor{_builder} { + invariant(_builder); + } + + void preVisit(const MatchExpression* expr) { + // Encode the type of the node as well as the path (if there is a non-empty path). + _builder->appendStr(encodeMatchType(expr->matchType())); + encodeUserString(expr->path(), _builder); + + // The node encodes itself, and then its children. + expr->acceptVisitor(&_visitor); + + if (expr->numChildren() > 0) { + _builder->appendChar(kEncodeChildrenBegin); + } + } + + void inVisit(long count, const MatchExpression* expr) { + _builder->appendChar(kEncodeChildrenSeparator); + } + + void postVisit(const MatchExpression* expr) { + if (expr->numChildren() > 0) { + _builder->appendChar(kEncodeChildrenEnd); + } + } + +private: + BufBuilder* const _builder; + MatchExpressionSbePlanCacheKeySerializationVisitor _visitor; +}; + +/** + * Given a 'matchExpr' which may have parameter markers, encodes a key into 'builder' with the + * following property: Two match expression trees which are identical after auto-parameterization + * have the same key, otherwise the keys must differ. + */ +void encodeKeyForAutoParameterizedMatchSBE(MatchExpression* matchExpr, BufBuilder* builder) { + MatchExpressionSbePlanCacheKeySerializationWalker walker{builder}; + tree_walker::walk<true, MatchExpression>(matchExpr, &walker); +} +} // namespace + std::string encodeSBE(const CanonicalQuery& cq) { + tassert(6142104, + "attempting to encode SBE plan cache key for SBE-incompatible query", + cq.isSbeCompatible()); + const auto& filter = cq.getQueryObj(); const auto& proj = cq.getFindCommandRequest().getProjection(); const auto& sort = cq.getFindCommandRequest().getSort(); @@ -639,7 +1052,14 @@ std::string encodeSBE(const CanonicalQuery& cq) { kBufferSizeConstant + (let ? let->objsize() : 0); BufBuilder bufBuilder(bufSize); - bufBuilder.appendBuf(filter.objdata(), filter.objsize()); + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + encodeKeyForAutoParameterizedMatchSBE(cq.root(), &bufBuilder); + } else { + // When auto-parameterization is off, just add the entire filter BSON to the cache key, + // including any constants. + bufBuilder.appendBuf(filter.objdata(), filter.objsize()); + } + bufBuilder.appendBuf(proj.objdata(), proj.objsize()); // TODO SERVER-62100: No need to encode the entire "let" object. if (let) { diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 797a61f064e..106889832e8 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -281,6 +281,13 @@ void fillOutIndexEntries(OperationContext* opCtx, CanonicalQuery* canonicalQuery, const CollectionPtr& collection, std::vector<IndexEntry>& entries) { + // TODO SERVER-63352: Eliminate this check once we support auto-parameterized index scan plans. + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + // Indexed plans are not yet supported when auto-parameterization is enabled, so make it + // look to the planner like there are no indexes. + return; + } + auto ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); while (ii->more()) { const IndexCatalogEntry* ice = ii->next(); @@ -984,6 +991,15 @@ protected: invariant(descriptor); invariant(plannerParams); + // Auto-parameterization currently only works for collection scan plans, but idhack plans + // use the _id index. Therefore, we inhibit idhack when auto-parametrization is enabled. + // + // TODO SERVER-64237: Eliminate this check once we support auto-parameterized ID hack + // plans. + if (feature_flags::gFeatureFlagAutoParameterization.isEnabledAndIgnoreFCV()) { + return nullptr; + } + tassert(5536100, "SBE cannot handle query with metadata", !_cq->metadataDeps()[DocumentMetadataFields::kSortKey]); diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl index b3565da4f5f..b60ea4d04cc 100644 --- a/src/mongo/db/query/query_feature_flags.idl +++ b/src/mongo/db/query/query_feature_flags.idl @@ -82,6 +82,13 @@ feature_flags: cpp_varname: gFeatureFlagSbePlanCache default: false + featureFlagAutoParameterization: + description: "Feature flag for enabling auto-parameterization of match expressions. This + feature is used to store auto-parameterized plans in the SBE plan cache, so this flag is only + meaningful to turn on in combination with 'featureFlagSbePlanCache'." + cpp_varname: gFeatureFlagAutoParameterization + default: false + featureFlagSortArray: description: "Feature flag for allowing use of the $sortArray aggregation expression" cpp_varname: gFeatureFlagSortArray diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index f5a0904a773..852f187c6f0 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -334,6 +334,7 @@ private: } else { debugInfo.reset(); } + inputParamToSlotMap = other.inputParamToSlotMap; } }; |