diff options
author | Ben Shteinfeld <ben.shteinfeld@mongodb.com> | 2023-04-10 21:30:02 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-10 23:29:42 +0000 |
commit | 9fd56c17736f2e6557e5a91c6d607040ec191289 (patch) | |
tree | 103a2eeb801619e09f6f8e9dce362f07f4b155c4 | |
parent | ae16cf75e35a7a6e0dc0da817044a55eaee3e9c1 (diff) | |
download | mongo-9fd56c17736f2e6557e5a91c6d607040ec191289.tar.gz |
SERVER-72814 Add $_internalEqHash MatchExpression
25 files changed, 620 insertions, 3 deletions
diff --git a/jstests/core/query/internal_hash_eq/expr_rewrites.js b/jstests/core/query/internal_hash_eq/expr_rewrites.js new file mode 100644 index 00000000000..b95284dc9c6 --- /dev/null +++ b/jstests/core/query/internal_hash_eq/expr_rewrites.js @@ -0,0 +1,169 @@ +/** + * Tests that $expr with equality of $toHashedIndexKey to a NumberLong results in an IXSCAN plan + * with a point bound. This is because we rewrite this structure to a $_internalEqHash expression + * and generate a tight index bound. + * @tags: [requires_fcv_70] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); + +const collName = jsTestName(); +const coll = db.getCollection(collName); + +/** + * Helper function to get the index hash of given field in the row matching the given filterSpec. + * @param {Collection} coll + * @param {Object} filterSpec - Object representing an equality predicate. Expected to uniquely + * identify a single document. + * @param {String} field - Name of the field which has a hashed index defined. + * @param {Object} indexSpec - Object representing the index specification. Must have field as a + * hashed index. + */ +function getHash(coll, filterSpec, field, indexSpec) { + const res = coll.aggregate( + [ + {$match: filterSpec}, + {$set: {hashVal: {$let: {vars: {key: {$meta: "indexKey"}}, in : `$$key.${field}`}}}}, + ], + {hint: indexSpec}); + return res.toArray()[0].hashVal; +} + +/** + * Given the explain output of a plan, assert that the plan used an index scan, the given index and + * the number of keys examined. + * @param {Object} explainPlan - Output of explain run with 'executionStats'. + * @param {Object} expectedIndexSpec - The expected key pattern of the index scan. + * @param {int} expectedKeysExamined - The expected number of keys in the index that were examined. + */ +function assertExplainIxscan(explainPlan, expectedIndexSpec, expectedKeysExamined = 1) { + assert(isIxscan(db, explainPlan), explainPlan); + let execStages = getExecutionStages(explainPlan); + execStages.forEach(execStage => { + if (execStage.stage == "SHARDING_FILTER" && execStage.nReturned == 0) { + return; + } + let ixscan = getPlanStages(execStage, "IXSCAN")[0]; + jsTestLog(ixscan); + assert.eq(expectedIndexSpec, ixscan.keyPattern); + assert.eq(expectedKeysExamined, ixscan.keysExamined); + }); +} + +(function testSingleFieldHashedIndex() { + coll.drop(); + assert.commandWorked(coll.insert([ + {_id: 1, a: "foo"}, + {_id: 2, a: 123}, + {_id: 3, a: new Date()}, + ])); + const indexSpec = {a: "hashed"}; + assert.commandWorked(coll.createIndex(indexSpec)); + + const hash = getHash(coll, {a: "foo"}, "a", indexSpec); + const testQuery = {$expr: {$eq: [{$toHashedIndexKey: '$a'}, hash]}}; + const explainPlan = coll.find(testQuery).hint(indexSpec).explain("executionStats"); + + assertExplainIxscan(explainPlan, indexSpec); +})(); + +(function testCompoundIndexFirstFieldHashed() { + coll.drop(); + assert.commandWorked(coll.insert([ + {_id: 1, a: 10, b: "foo"}, + {_id: 2, a: 20, b: "bar"}, + {_id: 3, a: 30, b: "baz"}, + ])); + const indexSpec = {a: "hashed", b: 1}; + assert.commandWorked(coll.createIndex(indexSpec)); + + const hash = getHash(coll, {a: 10}, "a", indexSpec); + const testQuery = { + $and: [ + {$expr: {$eq: [{$toHashedIndexKey: '$a'}, hash]}}, + {$expr: {$eq: ['$b', "foo"]}}, + ] + }; + const explainPlan = + coll.explain("executionStats").aggregate([{$match: testQuery}], {hint: indexSpec}); + + assertExplainIxscan(explainPlan, indexSpec); +})(); + +(function testCompoundIndexLastFieldHashed() { + coll.drop(); + assert.commandWorked(coll.insert([ + {_id: 1, a: 10, b: "foo"}, + {_id: 2, a: 20, b: "bar"}, + {_id: 3, a: 30, b: "baz"}, + ])); + const indexSpec = {b: 1, a: "hashed"}; + assert.commandWorked(coll.createIndex(indexSpec)); + + const hash = getHash(coll, {a: 10}, "a", indexSpec); + const testQuery = { + $and: [ + {$expr: {$eq: [{$toHashedIndexKey: '$a'}, hash]}}, + {$expr: {$eq: ['$b', "foo"]}}, + ] + }; + const explainPlan = + coll.explain("executionStats").aggregate([{$match: testQuery}], {hint: indexSpec}); + + assertExplainIxscan(explainPlan, indexSpec); +})(); + +(function testCompoundIndexMiddleFieldHashed() { + coll.drop(); + assert.commandWorked(coll.insert([ + {_id: 1, a: 10, b: "foo", c: "fred"}, + {_id: 2, a: 20, b: "bar", c: "waldo"}, + {_id: 3, a: 30, b: "baz", c: "thud"}, + ])); + const indexSpec = {b: 1, a: "hashed", c: 1}; + assert.commandWorked(coll.createIndex(indexSpec)); + + const hash = getHash(coll, {a: 10}, "a", indexSpec); + const testQuery = { + $and: [ + {$expr: {$eq: [{$toHashedIndexKey: '$a'}, hash]}}, + {$expr: {$eq: ['$b', "foo"]}}, + {$expr: {$eq: ['$c', "fred"]}}, + ] + }; + const explainPlan = + coll.explain("executionStats").aggregate([{$match: testQuery}], {hint: indexSpec}); + + assertExplainIxscan(explainPlan, indexSpec); +})(); + +(function testNoHashedIndex() { + coll.drop(); + coll.dropIndexes(); + assert.commandWorked(coll.insert([ + {_id: 1, a: 10}, + {_id: 2, a: 20}, + {_id: 3, a: 30}, + ])); + const hashIndexSpec = {a: 1}; + assert.commandWorked(coll.createIndex(hashIndexSpec)); + const hash = getHash(coll, {a: 10}, "a", hashIndexSpec); + + coll.dropIndexes(); + + const indexSpec = {a: 1}; + assert.commandWorked(coll.createIndex(indexSpec)); + + const testQuery = {$expr: {$eq: [{$toHashedIndexKey: '$a'}, hash]}}; + // Note that this query is hinting a bad index. Since it is not hashed, the plan degenerates + // into a IXSCAN + FETCH, whereas if we didn't hint the index, the plan enumerator wouldn't have + // even considered the '{a: 1}' index as eligible. + const explainPlan = + coll.explain("executionStats").aggregate([{$match: testQuery}], {hint: indexSpec}); + + // We couldn't create a tight bound for the index scan as the index is not hashed. + assertExplainIxscan(explainPlan, indexSpec, 3 /* keyExamined */); +})(); +})();
\ No newline at end of file diff --git a/jstests/core/query/internal_hash_eq/lookup_using_hash_key.js b/jstests/core/query/internal_hash_eq/lookup_using_hash_key.js new file mode 100644 index 00000000000..8e462651337 --- /dev/null +++ b/jstests/core/query/internal_hash_eq/lookup_using_hash_key.js @@ -0,0 +1,49 @@ +/** + * Tests that the combination of $meta: "indexKey", $lookup, and the $toHashedIndexKey expressions + * can be used to look up values by their hash key. + * + * This is expected to work by optimizing into an $_internalEqHash match expression which can be + * used to seek the hashed index. + * @tags: [ + * does_not_support_transactions, + * requires_fcv_70, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For 'isCollscan()' and similar. +load("jstests/aggregation/extras/utils.js"); // For 'resultsEq().' + +const coll = db.lookup_using_hash_key; + +coll.drop(); + +const allDocs = [{_id: 0}, {_id: 1, a: 3}, {_id: 2, a: "3"}]; +assert.commandWorked(coll.insert(allDocs)); +assert.commandWorked(coll.createIndex({a: "hashed"})); + +let results = coll.aggregate( + [ + {$set: { + hashVal: { + $let: { + vars: {key: {$meta: "indexKey"}}, + in: "$$key.a" + } + } + }}, + {$lookup: { + from: coll.getName(), + let: {correlated_hash: "$hashVal"}, + as: "relookup", + pipeline: [{$match: {$expr: {$eq: [{$toHashedIndexKey: "$a"}, "$$correlated_hash"]}}}] + }}, + {$unset: "hashVal"}, + ], + {hint: {a: "hashed"}}).toArray(); + +// We essentially just looked up ourselves for each document. +let expected = allDocs.map(doc => Object.merge(doc, {relookup: [doc]})); +assert(resultsEq(results, expected, true), [results, expected]); +}()); diff --git a/jstests/core/query/internal_hash_eq/match_internal_eq_hash.js b/jstests/core/query/internal_hash_eq/match_internal_eq_hash.js new file mode 100644 index 00000000000..d5135f57587 --- /dev/null +++ b/jstests/core/query/internal_hash_eq/match_internal_eq_hash.js @@ -0,0 +1,148 @@ +/** + * Basic tests for the $_internalEqHash match expression. + * @tags: [requires_fcv_70] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For 'isCollscan()' and similar. +load("jstests/aggregation/extras/utils.js"); // For 'resultsEq().' + +const coll = db.match_internal_eq_hash; + +(function testTopLevel() { + coll.drop(); + + assert.commandWorked(coll.insert([ + {_id: 0}, + {_id: 1, a: 1}, + {_id: 2, a: NumberLong(1)}, + {_id: 3, a: "1"}, + {_id: 4, a: null} + ])); + + // Test that the expression works without an index - just doesn't crash or match anything in + // this case. + assert.eq(coll.find({a: {$_internalEqHash: NumberLong(0)}}).toArray(), + [], + "Expected nothing to hash to 0"); + let explainPlan = coll.find({a: {$_internalEqHash: NumberLong(0)}}).explain(); + assert(isCollscan(db, explainPlan)); + + // Test that the expression works with a hashed index. + assert.commandWorked(coll.createIndex({a: "hashed"})); + const doc = coll.findOne({a: 1}, {key: {$meta: "indexKey"}}, {hint: {a: "hashed"}}); + jsTestLog(coll.find({}, {key: {$meta: "indexKey"}}, 0, 0, {hint: {a: "hashed"}}).explain()); + jsTestLog(doc); + const hashOfInterest = doc.key.a; + + const testQuery = {a: {$_internalEqHash: hashOfInterest}}; + assert(resultsEq(coll.find(testQuery).toArray(), [{_id: 1, a: 1}, {_id: 2, a: NumberLong(1)}])); + explainPlan = coll.find(testQuery).explain(); + assert(isIxscan(db, explainPlan), explainPlan); + + // Make sure that multikey hashed indexes are not supported. If they are someday, then test + // should be modified to ensure this expression still works. + assert.commandFailedWithCode(coll.insert({a: [1]}), 16766); + assert.commandFailedWithCode(coll.insert({a: [0, 1, 2, 3]}), 16766); + + // Now drop the index and use a collscan again - should get the same results. + assert.commandWorked(coll.dropIndex({a: "hashed"})); + explainPlan = coll.find(testQuery, {_id: 1}).explain(); + assert(isCollscan(db, explainPlan)); + assert(resultsEq(coll.find(testQuery).toArray(), [{_id: 1, a: 1}, {_id: 2, a: NumberLong(1)}])); + + // Now add a compound hashed index and test it can still work on a leading hash component. + assert.commandWorked(coll.createIndex({a: "hashed", b: 1})); + + assert(resultsEq(coll.find(testQuery).toArray(), [{_id: 1, a: 1}, {_id: 2, a: NumberLong(1)}])); + explainPlan = coll.find(testQuery).explain(); + assert(isIxscan(db, explainPlan), explainPlan); + + // Now add a compound hashed index and test it cannot work on a trailing hash component (could + // not effectively seek). + assert.commandWorked(coll.dropIndex({a: "hashed", b: 1})); + assert.commandWorked(coll.createIndex({b: 1, a: "hashed"})); + + assert(resultsEq(coll.find(testQuery).toArray(), [{_id: 1, a: 1}, {_id: 2, a: NumberLong(1)}])); + explainPlan = coll.find(testQuery).explain(); + assert(isCollscan(db, explainPlan), explainPlan); + + // Now add a non-hashed index and test it still works correctly but does not mistakenly try to + // use the non-hashed index. It should still do a collection scan. + assert.commandWorked(coll.dropIndex({b: 1, a: "hashed"})); + assert.commandWorked(coll.createIndex({a: 1})); + + assert(resultsEq(coll.find(testQuery).toArray(), [{_id: 1, a: 1}, {_id: 2, a: NumberLong(1)}])); + explainPlan = coll.find(testQuery).explain(); + assert(isCollscan(db, explainPlan), explainPlan); +}()); + +(function testDotted() { + coll.drop(); + + assert.commandWorked(coll.insert([ + {}, + {a: 1}, + {a: {}}, + {a: {b: 1}}, + {a: {b: {c: NumberDecimal("1.0")}}}, + {a: {b: {c: 2}}}, + {"a.b.c": 1}, + ])); + + assert.commandWorked(coll.createIndex({"a.b.c": "hashed"})); + const hashOfInterest = + coll.findOne({"a.b.c": 1}, {key: {$meta: "indexKey"}}, {hint: {"a.b.c": "hashed"}}) + .key["a.b.c"]; + const testQuery = {"a.b.c": {$_internalEqHash: hashOfInterest}}; + assert( + resultsEq(coll.find(testQuery, {_id: 0}).toArray(), [{a: {b: {c: NumberDecimal("1.0")}}}])); + + let explainPlan = coll.find(testQuery).explain(); + assert(isIxscan(db, explainPlan), explainPlan); + + // Now drop the index and use a collscan again - should get the same results. + assert.commandWorked(coll.dropIndex({"a.b.c": "hashed"})); + explainPlan = coll.find(testQuery, {_id: 1}).explain(); + assert(isCollscan(db, explainPlan)); + assert( + resultsEq(coll.find(testQuery, {_id: 0}).toArray(), [{a: {b: {c: NumberDecimal("1.0")}}}])); +}()); + +(function testInvalidTypes() { + coll.drop(); + + assert.commandWorked(coll.createIndex({a: "hashed"})); + const invalidBsonTypes = [ + MinKey, + NaN, + -Infinity, + 0.0, + Infinity, + "not a number", + {}, + {"embedded": "object"}, + [], + [1], + [1, 2, 3], + null, + BinData(0, 'asdf'), + UUID("326d92af-2d76-452b-a03f-69f05ab98416"), + undefined, + ObjectId("62d05ec744ca83616c92772c"), + false, + true, + ISODate("2023-03-28T18:34:28.937Z"), + /a/, + function inc(x) { + return x + 1; + }, + MaxKey, + ]; + invalidBsonTypes.forEach(function(v) { + assert.commandFailedWithCode( + db.runCommand({find: "match_internal_eq_hash", filter: {a: {$_internalEqHash: v}}}), 2); + }); +})(); +}()); diff --git a/jstests/libs/analyze_plan.js b/jstests/libs/analyze_plan.js index eea90ac4994..308119d5262 100644 --- a/jstests/libs/analyze_plan.js +++ b/jstests/libs/analyze_plan.js @@ -185,7 +185,8 @@ function hasRejectedPlans(root) { * Returns an array of execution stages from the given replset or sharded explain output. */ function getExecutionStages(root) { - if (root.executionStats.executionStages.hasOwnProperty("shards")) { + if (root.hasOwnProperty("executionStats") && + root.executionStats.executionStages.hasOwnProperty("shards")) { const executionStages = []; for (let shard of root.executionStats.executionStages.shards) { executionStages.push(Object.assign( @@ -194,6 +195,13 @@ function getExecutionStages(root) { } return executionStages; } + if (root.hasOwnProperty("shards")) { + const executionStages = []; + for (const shard in root.shards) { + executionStages.push(root.shards[shard].executionStats.executionStages); + } + return executionStages; + } return [root.executionStats.executionStages]; } diff --git a/src/mongo/db/matcher/doc_validation_error.cpp b/src/mongo/db/matcher/doc_validation_error.cpp index 971c8190592..fe301c9aa43 100644 --- a/src/mongo/db/matcher/doc_validation_error.cpp +++ b/src/mongo/db/matcher/doc_validation_error.cpp @@ -824,6 +824,7 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { switch (toItemsKeywordType(*expr)) { case ItemsKeywordType::kItems: { @@ -1843,6 +1844,7 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final { if (_context->shouldGenerateError(*expr)) { @@ -2058,6 +2060,7 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { switch (toItemsKeywordType(*expr)) { case ItemsKeywordType::kItems: diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 0b040a657b2..0976404646d 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -119,6 +119,9 @@ public: INTERNAL_EXPR_LT, INTERNAL_EXPR_LTE, + // Used to represent the comparison to a hashed index key value. + INTERNAL_EQ_HASHED_KEY, + // JSON Schema expressions. INTERNAL_SCHEMA_ALLOWED_PROPERTIES, INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX, diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 0ba3448ade5..d44c8b88b58 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -608,6 +608,7 @@ std::unique_ptr<MatchExpression> splitMatchExpressionForColumns( case MatchExpression::INTERNAL_EXPR_GTE: case MatchExpression::INTERNAL_EXPR_LT: case MatchExpression::INTERNAL_EXPR_LTE: + case MatchExpression::INTERNAL_EQ_HASHED_KEY: case MatchExpression::INTERNAL_SCHEMA_ALLOWED_PROPERTIES: case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: case MatchExpression::INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE: diff --git a/src/mongo/db/matcher/expression_expr.cpp b/src/mongo/db/matcher/expression_expr.cpp index 7eae4170ee5..9a8ad5b18e9 100644 --- a/src/mongo/db/matcher/expression_expr.cpp +++ b/src/mongo/db/matcher/expression_expr.cpp @@ -27,9 +27,12 @@ * it in the license file. */ +#include "mongo/db/matcher/expression_visitor.h" +#include "mongo/db/pipeline/expression.h" #include "mongo/platform/basic.h" #include "mongo/db/matcher/expression_expr.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/util/fail_point.h" @@ -131,6 +134,49 @@ bool ExprMatchExpression::isTriviallyTrue() const { return exprConst && exprConst->getValue().coerceToBool(); } +namespace { + +// Return nullptr on failure. +std::unique_ptr<MatchExpression> attemptToRewriteEqHash(ExprMatchExpression& expr) { + auto childExpr = expr.getExpression(); + + // Looking for: + // $eq + // $toHashedIndexKey {$const: NumberLong(?)} + // "$a" + // + // Where "a" can be any field path and ? can be any number. + if (auto eq = dynamic_cast<ExpressionCompare*>(childExpr.get()); + eq && eq->getOp() == ExpressionCompare::CmpOp::EQ) { + auto children = eq->getChildren(); + tassert(7281406, "should have 2 $eq children", children.size() == 2ul); + + auto eqFirst = children[0].get(); + auto eqSecond = children[1].get(); + if (auto hashingExpr = dynamic_cast<ExpressionToHashedIndexKey*>(eqFirst)) { + // Matched $toHashedIndexKey - keep going. + tassert(7281407, + "should have 1 $toHashedIndexKey child", + hashingExpr->getChildren().size() == 1ul); + auto hashChild = hashingExpr->getChildren()[0].get(); + + if (auto fieldPath = dynamic_cast<ExpressionFieldPath*>(hashChild); + fieldPath && !fieldPath->isVariableReference() && !fieldPath->isROOT()) { + auto path = fieldPath->getFieldPathWithoutCurrentPrefix(); + + // Matched "$a" in the example above! Now look for the constant long: + if (auto constant = dynamic_cast<ExpressionConstant*>(eqSecond); + constant && constant->getValue().getType() == BSONType::NumberLong) { + long long hashTarget = constant->getValue().getLong(); + return std::make_unique<InternalEqHashedKey>(path.fullPath(), hashTarget); + } + } + } + } + return nullptr; +} +} // namespace + MatchExpression::ExpressionOptimizerFunc ExprMatchExpression::getOptimizer() const { return [](std::unique_ptr<MatchExpression> expression) { auto& exprMatchExpr = static_cast<ExprMatchExpression&>(*expression); @@ -144,6 +190,10 @@ MatchExpression::ExpressionOptimizerFunc ExprMatchExpression::getOptimizer() con } exprMatchExpr._expression = exprMatchExpr._expression->optimize(); + if (auto successfulEqHashRewrite = attemptToRewriteEqHash(exprMatchExpr)) { + return successfulEqHashRewrite; + } + exprMatchExpr._rewriteResult = RewriteExpr::rewrite(exprMatchExpr._expression, exprMatchExpr._expCtx->getCollator()); diff --git a/src/mongo/db/matcher/expression_internal_eq_hashed_key.h b/src/mongo/db/matcher/expression_internal_eq_hashed_key.h new file mode 100644 index 00000000000..58ab69eea1e --- /dev/null +++ b/src/mongo/db/matcher/expression_internal_eq_hashed_key.h @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/bson/dotted_path_support.h" +#include "mongo/db/hasher.h" +#include "mongo/db/matcher/expression_leaf.h" + +namespace mongo { + +/** + * An internal match expression node which represents a field path's hash being equal to the given + * NumberLong constant. This expression is not intended to be written directly by users, but rather + * generated by a rewrite of {$expr: {$eq: [{$toHashedIndexKey: '$fieldPath'}, NumberLong(123)]}}. + * The purpose of this is to allow us to generate index bounds on a hashed index to satisfy such a + * query. + */ +class InternalEqHashedKey : public ComparisonMatchExpressionBase { +public: + static constexpr StringData kName = "$_internalEqHash"_sd; + + InternalEqHashedKey(boost::optional<StringData> path, long long hashVal) + : ComparisonMatchExpressionBase(MatchType::INTERNAL_EQ_HASHED_KEY, + path, + Value(hashVal), + // Match the agg expression semantics for traversals. + ElementPath::LeafArrayBehavior::kNoTraversal, + ElementPath::NonLeafArrayBehavior::kMatchSubpath) {} + + InternalEqHashedKey(std::string path, long long hashVal) + : InternalEqHashedKey(boost::optional<StringData>(path), hashVal) {} + + InternalEqHashedKey(boost::optional<StringData> path, BSONElement value) + // Checking the type should happen earlier during parsing. + : InternalEqHashedKey(path, value.numberLong()) {} + + bool matchesSingleElement(const BSONElement& elem, MatchDetails* details) const final { + tassert(7281401, "hashed value must be a long", _rhs.type() == BSONType::NumberLong); + const auto hashVal = BSONElementHasher::hash64(elem, BSONElementHasher::DEFAULT_HASH_SEED); + return hashVal == _rhs.numberLong(); + }; + + std::unique_ptr<MatchExpression> clone() const final { + auto clone = std::make_unique<InternalEqHashedKey>(path(), _rhs); + clone->setCollator(_collator); + if (getTag()) { + clone->setTag(getTag()->clone()); + } + return clone; + } + + bool matches(const MatchableDocument* doc, MatchDetails* details = nullptr) const override { + // Sadly, we need to match EOO elements to null index keys, as a special case. + if (!dotted_path_support::extractElementAtPath(doc->toBSON(), path())) { + return BSONElementHasher::hash64(BSON("" << BSONNULL).firstElement(), + BSONElementHasher::DEFAULT_HASH_SEED) == + _rhs.numberLong(); + } + + // Otherwise, we let this traversal work for us. + return PathMatchExpression::matches(doc, details); + } + + StringData name() const final { + return kName; + } + + void acceptVisitor(MatchExpressionMutableVisitor* visitor) final { + visitor->visit(this); + } + + void acceptVisitor(MatchExpressionConstVisitor* visitor) const final { + visitor->visit(this); + } +}; + +} // namespace mongo diff --git a/src/mongo/db/matcher/expression_parameterization.h b/src/mongo/db/matcher/expression_parameterization.h index 194b6dfd4c4..79a12362c9c 100644 --- a/src/mongo/db/matcher/expression_parameterization.h +++ b/src/mongo/db/matcher/expression_parameterization.h @@ -114,6 +114,9 @@ public: void visit(InternalExprGTEMatchExpression* expr) final {} void visit(InternalExprLTMatchExpression* expr) final {} void visit(InternalExprLTEMatchExpression* expr) final {} + void visit(InternalEqHashedKey* expr) final { + // Don't support parameterization of InternEqHashedKey because it is not implemented in SBE. + } void visit(InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} void visit(InternalSchemaAllowedPropertiesMatchExpression* expr) final {} void visit(InternalSchemaBinDataEncryptedTypeExpression* expr) final {} diff --git a/src/mongo/db/matcher/expression_parameterization_test.cpp b/src/mongo/db/matcher/expression_parameterization_test.cpp index c8bd2acbc77..8f5a64ccddc 100644 --- a/src/mongo/db/matcher/expression_parameterization_test.cpp +++ b/src/mongo/db/matcher/expression_parameterization_test.cpp @@ -83,6 +83,7 @@ public: countParam(expr->getInputParamId()); } void visit(const InternalBucketGeoWithinMatchExpression* expr) final {} + void visit(const InternalEqHashedKey*) final {} void visit(const InternalExprEqMatchExpression* expr) final {} void visit(const InternalExprGTMatchExpression* expr) final {} void visit(const InternalExprGTEMatchExpression* expr) final {} diff --git a/src/mongo/db/matcher/expression_parser.cpp b/src/mongo/db/matcher/expression_parser.cpp index d5cccd9c3ae..cca4c984f79 100644 --- a/src/mongo/db/matcher/expression_parser.cpp +++ b/src/mongo/db/matcher/expression_parser.cpp @@ -44,6 +44,7 @@ #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_tree.h" @@ -1871,6 +1872,21 @@ StatusWithMatchExpression parseSubField(const BSONObj& context, return {std::move(exprLteExpr)}; } + case PathAcceptingKeyword::INTERNAL_EQ_HASHED_KEY: { + if (e.type() != BSONType::NumberLong) { + return {Status(ErrorCodes::BadValue, + str::stream() + << InternalEqHashedKey::kName + << " can only be used to compare to NumberLong. Was given: " + << typeName(e.type()))}; + } + + auto exprEqHash = std::make_unique<InternalEqHashedKey>(name, e); + exprEqHash->setCollator(expCtx->getCollator()); + expCtx->sbeCompatibility = SbeCompatibility::notCompatible; + return {std::move(exprEqHash)}; + } + // Handles bitwise query operators. case PathAcceptingKeyword::BITS_ALL_SET: { return parseBitTest<BitsAllSetMatchExpression>(name, e, expCtx); @@ -2181,6 +2197,7 @@ MONGO_INITIALIZER(MatchExpressionParser)(InitializerContext* context) { {"_internalExprGte", PathAcceptingKeyword::INTERNAL_EXPR_GTE}, {"_internalExprLt", PathAcceptingKeyword::INTERNAL_EXPR_LT}, {"_internalExprLte", PathAcceptingKeyword::INTERNAL_EXPR_LTE}, + {"_internalEqHash", PathAcceptingKeyword::INTERNAL_EQ_HASHED_KEY}, {"_internalSchemaAllElemMatchFromIndex", PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX}, {"_internalSchemaBinDataEncryptedType", diff --git a/src/mongo/db/matcher/expression_parser.h b/src/mongo/db/matcher/expression_parser.h index 101a7e10332..038e9eb12ec 100644 --- a/src/mongo/db/matcher/expression_parser.h +++ b/src/mongo/db/matcher/expression_parser.h @@ -66,6 +66,7 @@ enum class PathAcceptingKeyword { INTERNAL_EXPR_GTE, INTERNAL_EXPR_LT, INTERNAL_EXPR_LTE, + INTERNAL_EQ_HASHED_KEY, INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX, INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE, INTERNAL_SCHEMA_BIN_DATA_SUBTYPE, diff --git a/src/mongo/db/matcher/expression_path.h b/src/mongo/db/matcher/expression_path.h index 1ca435c21ac..dcab87590f6 100644 --- a/src/mongo/db/matcher/expression_path.h +++ b/src/mongo/db/matcher/expression_path.h @@ -53,7 +53,7 @@ public: ElementPath(*path, leafArrBehavior, nonLeafArrayBehavior)) : boost::none) {} - bool matches(const MatchableDocument* doc, MatchDetails* details = nullptr) const final { + bool matches(const MatchableDocument* doc, MatchDetails* details = nullptr) const override { invariant(_elementPath); MatchableDocument::IteratorHolder cursor(doc, &*_elementPath); while (cursor->more()) { diff --git a/src/mongo/db/matcher/expression_visitor.h b/src/mongo/db/matcher/expression_visitor.h index 76d1288eb82..94d32f7fc68 100644 --- a/src/mongo/db/matcher/expression_visitor.h +++ b/src/mongo/db/matcher/expression_visitor.h @@ -55,6 +55,7 @@ class InternalExprGTMatchExpression; class InternalExprGTEMatchExpression; class InternalExprLTMatchExpression; class InternalExprLTEMatchExpression; +class InternalEqHashedKey; class InternalSchemaAllElemMatchFromIndexMatchExpression; class InternalSchemaAllowedPropertiesMatchExpression; class InternalSchemaBinDataEncryptedTypeExpression; @@ -128,6 +129,7 @@ public: virtual void visit(MaybeConstPtr<InternalExprGTEMatchExpression> expr) = 0; virtual void visit(MaybeConstPtr<InternalExprLTMatchExpression> expr) = 0; virtual void visit(MaybeConstPtr<InternalExprLTEMatchExpression> expr) = 0; + virtual void visit(MaybeConstPtr<InternalEqHashedKey> expr) = 0; virtual void visit(MaybeConstPtr<InternalSchemaAllElemMatchFromIndexMatchExpression> expr) = 0; virtual void visit(MaybeConstPtr<InternalSchemaAllowedPropertiesMatchExpression> expr) = 0; virtual void visit(MaybeConstPtr<InternalSchemaBinDataEncryptedTypeExpression> expr) = 0; @@ -213,6 +215,7 @@ struct SelectiveMatchExpressionVisitorBase : public MatchExpressionVisitor<IsCon void visit(MaybeConstPtr<InternalExprGTEMatchExpression> expr) override {} void visit(MaybeConstPtr<InternalExprLTMatchExpression> expr) override {} void visit(MaybeConstPtr<InternalExprLTEMatchExpression> expr) override {} + void visit(MaybeConstPtr<InternalEqHashedKey> expr) override {} void visit(MaybeConstPtr<InternalSchemaAllElemMatchFromIndexMatchExpression> expr) override {} void visit(MaybeConstPtr<InternalSchemaAllowedPropertiesMatchExpression> expr) override {} void visit(MaybeConstPtr<InternalSchemaBinDataEncryptedTypeExpression> expr) override {} diff --git a/src/mongo/db/matcher/match_expression_dependencies.cpp b/src/mongo/db/matcher/match_expression_dependencies.cpp index 6ff4a702def..cd28008f3df 100644 --- a/src/mongo/db/matcher/match_expression_dependencies.cpp +++ b/src/mongo/db/matcher/match_expression_dependencies.cpp @@ -34,6 +34,7 @@ #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_text.h" @@ -190,6 +191,10 @@ public: visitPathExpression(expr); } + void visit(const InternalEqHashedKey* expr) override { + visitPathExpression(expr); + } + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) override { visitPathExpression(expr); } diff --git a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp index 52d9a36dd6f..4449cdb5ad4 100644 --- a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp +++ b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp @@ -34,6 +34,7 @@ #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_text.h" @@ -284,6 +285,10 @@ public: _ctx.push(make<PathConstant>(Constant::boolean(true))); } + void visit(const InternalEqHashedKey* expr) override { + unsupportedExpression(expr); + } + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) override { unsupportedExpression(expr); } diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 8856a1def3b..83079a6629d 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -274,6 +274,7 @@ Document redactSafePortionDollarOps(BSONObj expr) { case PathAcceptingKeyword::INTERNAL_EXPR_GTE: case PathAcceptingKeyword::INTERNAL_EXPR_LT: case PathAcceptingKeyword::INTERNAL_EXPR_LTE: + case PathAcceptingKeyword::INTERNAL_EQ_HASHED_KEY: case PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: case PathAcceptingKeyword::INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE: case PathAcceptingKeyword::INTERNAL_SCHEMA_BIN_DATA_SUBTYPE: diff --git a/src/mongo/db/query/bind_input_params.cpp b/src/mongo/db/query/bind_input_params.cpp index c655567c2eb..b93698b8585 100644 --- a/src/mongo/db/query/bind_input_params.cpp +++ b/src/mongo/db/query/bind_input_params.cpp @@ -217,6 +217,7 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {} void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final {} diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index c16bac6a781..949501604e2 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -231,6 +231,9 @@ const char* encodeMatchType(MatchExpression::MatchType mt) { case MatchExpression::INTERNAL_EXPR_LTE: return "ele"; + case MatchExpression::INTERNAL_EQ_HASHED_KEY: + return "ieh"; + case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: return "internalSchemaAllElemMatchFromIndex"; @@ -832,9 +835,13 @@ public: void visit(const InternalExprLTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} + /** * These node types are not yet supported in SBE. */ + void visit(const InternalEqHashedKey* expr) final { + MONGO_UNREACHABLE_TASSERT(7281402); + } void visit(const GeoMatchExpression* expr) final { MONGO_UNREACHABLE_TASSERT(6142111); } diff --git a/src/mongo/db/query/cqf_command_utils.cpp b/src/mongo/db/query/cqf_command_utils.cpp index 4902b601fd2..a6279c43400 100644 --- a/src/mongo/db/query/cqf_command_utils.cpp +++ b/src/mongo/db/query/cqf_command_utils.cpp @@ -39,6 +39,7 @@ #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_text.h" @@ -219,6 +220,10 @@ public: unsupportedExpression(expr); } + void visit(const InternalEqHashedKey* expr) override { + unsupportedExpression(expr); + } + void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) override { unsupportedExpression(expr); } diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index d965677cc71..804bdef8466 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -43,6 +43,7 @@ #include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/query/analyze_regex.h" #include "mongo/db/query/collation/collation_index_key.h" @@ -974,6 +975,24 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, Interval interval = makeRangeInterval(dataObj, BoundInclusion::kIncludeBothStartAndEndKeys); oilOut->intervals.push_back(interval); *tightnessOut = getInequalityPredicateTightness(interval, dataElt, index); + } else if (MatchExpression::INTERNAL_EQ_HASHED_KEY == expr->matchType()) { + ON_BLOCK_EXIT([ietBuilder, oilOut] { + if (ietBuilder != nullptr) { + ietBuilder->addConst(*oilOut); + } + }); + + tassert(7281403, "Expected a hashed index", index.type == INDEX_HASHED); + + const auto* node = static_cast<const InternalEqHashedKey*>(expr); + BSONObj dataObj = BSON("" << node->getData()); + + Interval interval = makePointInterval(dataObj); + oilOut->intervals.push_back(interval); + + // Technically this could be EXACT_FETCH, if such a thing existed. But we don't need to + // optimize this that much. + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; } else if (MatchExpression::REGEX == expr->matchType()) { const RegexMatchExpression* rme = static_cast<const RegexMatchExpression*>(expr); translateRegex(rme, index, oilOut, tightnessOut); diff --git a/src/mongo/db/query/indexability.h b/src/mongo/db/query/indexability.h index 48b9e0d91b4..72a4d4d4d4d 100644 --- a/src/mongo/db/query/indexability.h +++ b/src/mongo/db/query/indexability.h @@ -201,7 +201,8 @@ private: me->matchType() == MatchExpression::INTERNAL_EXPR_GT || me->matchType() == MatchExpression::INTERNAL_EXPR_GTE || me->matchType() == MatchExpression::INTERNAL_EXPR_LT || - me->matchType() == MatchExpression::INTERNAL_EXPR_LTE; + me->matchType() == MatchExpression::INTERNAL_EXPR_LTE || + me->matchType() == MatchExpression::INTERNAL_EQ_HASHED_KEY; } }; diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 8db7435e48e..e119a51dc2e 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -431,6 +431,10 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } + if (exprtype == MatchExpression::INTERNAL_EQ_HASHED_KEY && index.type != INDEX_HASHED) { + return false; + } + if (indexedFieldType.empty()) { // We can't use a sparse index for certain match expressions. if (index.sparse && !nodeIsSupportedBySparseIndex(node, isChildOfElemMatchValue)) { @@ -742,6 +746,9 @@ bool QueryPlannerIXSelect::nodeIsSupportedByHashedIndex(const MatchExpression* q if (ComparisonMatchExpressionBase::isEquality(queryExpr->matchType())) { return true; } + if (queryExpr->matchType() == MatchExpression::INTERNAL_EQ_HASHED_KEY) { + return true; + } // An $in can be answered so long as its operand contains only simple equalities. if (queryExpr->matchType() == MatchExpression::MATCH_IN) { const InMatchExpression* expr = static_cast<const InMatchExpression*>(queryExpr); diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index 91ecad15189..20d0d693f8c 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -46,6 +46,7 @@ #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/matcher/expression_internal_eq_hashed_key.h" #include "mongo/db/matcher/expression_internal_expr_comparison.h" #include "mongo/db/matcher/expression_text.h" #include "mongo/db/matcher/expression_text_noop.h" @@ -583,6 +584,9 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final { + unsupportedExpression(expr); + } void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final { unsupportedExpression(expr); } @@ -991,6 +995,7 @@ public: generateAlwaysBoolean(_context, true); } + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalBucketGeoWithinMatchExpression* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {} @@ -1155,6 +1160,7 @@ public: void visit(const InternalExprGTEMatchExpression* expr) final {} void visit(const InternalExprLTMatchExpression* expr) final {} void visit(const InternalExprLTEMatchExpression* expr) final {} + void visit(const InternalEqHashedKey* expr) final {} void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) final {} void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) final {} void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) final {} |