From 63fd04ad7dcfdd982a2c726d5f21a73786807a49 Mon Sep 17 00:00:00 2001 From: Arun Banala Date: Thu, 21 Nov 2019 12:46:55 +0000 Subject: SERVER-43912 Query planner support for compound hashed indexes --- jstests/core/compound_hashed_index.js | 50 -- jstests/core/hashed_index_collation.js | 114 +++ jstests/core/hashed_index_covered_queries.js | 103 +++ jstests/core/hashed_index_queries.js | 182 +++++ .../hashed_index_queries_with_logical_operators.js | 133 ++++ jstests/core/hashed_index_sort.js | 156 ++++ jstests/core/hashed_index_with_arrays.js | 78 ++ jstests/core/hashed_partial_and_sparse_index.js | 73 ++ jstests/libs/analyze_plan.js | 19 + src/mongo/db/query/SConscript | 1 + .../db/query/query_planner_hashed_index_test.cpp | 815 +++++++++++++++++++++ src/mongo/db/query/query_solution.cpp | 19 +- 12 files changed, 1688 insertions(+), 55 deletions(-) delete mode 100644 jstests/core/compound_hashed_index.js create mode 100644 jstests/core/hashed_index_collation.js create mode 100644 jstests/core/hashed_index_covered_queries.js create mode 100644 jstests/core/hashed_index_queries.js create mode 100644 jstests/core/hashed_index_queries_with_logical_operators.js create mode 100644 jstests/core/hashed_index_sort.js create mode 100644 jstests/core/hashed_index_with_arrays.js create mode 100644 jstests/core/hashed_partial_and_sparse_index.js create mode 100644 src/mongo/db/query/query_planner_hashed_index_test.cpp diff --git a/jstests/core/compound_hashed_index.js b/jstests/core/compound_hashed_index.js deleted file mode 100644 index ebb8c471f62..00000000000 --- a/jstests/core/compound_hashed_index.js +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Test to verify the behaviour of compound hashed indexes. - * - * @tags: [requires_fcv_44] - */ -(function() { -"use strict"; - -const coll = db.compound_hashed_index; -coll.drop(); - -for (let i = 0; i < 20; i++) { - assert.commandWorked( - coll.insert({a: i, b: {subObj: "string_" + (i % 13)}, c: NumberInt(i % 10)})); -} - -// Creation of compound hashed indexes work. -assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: 1})); -assert.commandWorked(coll.createIndex({a: "hashed", c: -1})); -assert.commandWorked(coll.createIndex({b: "hashed", a: -1, c: 1})); - -// None of the index fields can be an array. -assert.commandFailedWithCode(coll.insert({a: []}), 16766); -assert.commandFailedWithCode(coll.insert({b: []}), 16766); -assert.commandFailedWithCode(coll.insert({c: []}), 16766); - -// Test that having arrays along the path of the index is not allowed. -assert.commandWorked(coll.createIndex({"field1.field2.0.field4": "hashed", "field2.0.field4": 1})); - -// Hashed field path cannot have arrays. -assert.commandFailedWithCode(coll.insert({field1: []}), 16766); -assert.commandFailedWithCode(coll.insert({field1: {field2: []}}), 16766); -assert.commandFailedWithCode(coll.insert({field1: {field2: {0: []}}}), 16766); -assert.commandFailedWithCode(coll.insert({field1: [{field2: {0: []}}]}), 16766); -assert.commandFailedWithCode(coll.insert({field1: {field2: {0: {field4: []}}}}), 16766); - -// Range field path cannot have arrays. -assert.commandFailedWithCode(coll.insert({field2: []}), 16766); -assert.commandFailedWithCode(coll.insert({field2: {0: [0]}}), 16766); - -// Verify that updates gets rejected when a document is modified to contain array along the path. -assert.commandFailedWithCode(coll.update({}, {field2: []}), 16766); -assert.commandFailedWithCode(coll.update({}, {field2: {0: {field4: []}}}), 16766); -assert.commandFailedWithCode(coll.update({}, {field1: []}), 16766); - -// Verify inserts and updates work when there are no arrays along path. -assert.commandWorked(coll.insert({field1: {field2: {0: {otherField: []}}}})); -assert.commandWorked(coll.insert({field1: {field2: {0: {field4: 1}}}})); -assert.commandWorked(coll.update({}, {field1: {field2: {0: {field4: 1}}}})); -})(); \ No newline at end of file diff --git a/jstests/core/hashed_index_collation.js b/jstests/core/hashed_index_collation.js new file mode 100644 index 00000000000..6b53002bdb7 --- /dev/null +++ b/jstests/core/hashed_index_collation.js @@ -0,0 +1,114 @@ +/** + * Tests to verify that hashed indexes obey collation rules. + * + * The tags below are necessary because collation requires that we use read/write commands rather + * than legacy operations. + * @tags: [requires_find_command, assumes_unsharded_collection, requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.hashed_index_collation; +coll.drop(); +const collation = { + locale: "en_US", + strength: 1 +}; + +/** + * Runs find command with the 'filter' and 'projection' provided in the input, then validates + * that the output returned matches 'expectedOutput'. Also runs explain() command on the same find + * command, validates that all the 'expectedStages' are present in the plan returned and all the + * 'stagesNotExpected' are not present in the plan. + */ +function validateFindCmdOutputAndPlan( + {filter, projection = {}, expectedOutput, expectedStages, stagesNotExpected}) { + const cmdObj = + {find: coll.getName(), filter: filter, projection: projection, collation: collation}; + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // We ignore the order since hashed index order is not predictable. + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + + assertStagesForExplainOfCommand({ + coll: coll, + cmdObj: cmdObj, + expectedStages: expectedStages, + stagesNotExpected: stagesNotExpected + }); +} + +// Verify that index creation works for compound hashed index with collation, when hashed field is a +// prefix. +assert.commandWorked( + coll.createIndex({"a.b": "hashed", "a.c": 1, "a.e": -1}, {collation: collation})); + +// Insert a series of documents whose fieldnames and values differ only by case. +assert.commandWorked(coll.insert({_id: 0, a: {b: "string", c: "STRING", e: 5}})); +assert.commandWorked(coll.insert({_id: 1, a: {b: "STRING", c: "string", e: 5}})); +assert.commandWorked(coll.insert({_id: 2, A: {B: "string", C: "STRING", E: 5}})); +assert.commandWorked(coll.insert({_id: 3, A: {B: "StrinG", C: "sTRINg", E: 5}})); + +// Verify that hashed field can use index in the presence of collation. Also verify that only +// the document's values, not the field names, adhere to the case-insensitive collation. +validateFindCmdOutputAndPlan({ + filter: {"a.b": "string", "a.c": "string"}, + expectedStages: ["IXSCAN", "FETCH"], + expectedOutput: [ + {_id: 0, a: {b: "string", c: "STRING", e: 5}}, + {_id: 1, a: {b: "STRING", c: "string", e: 5}} + ] +}); + +// Verify that the field names doesn't adhere to the case-insensitive collation and uses collection +// scan if the case doesn't match. +validateFindCmdOutputAndPlan({ + filter: {"A.B": "string"}, + expectedStages: ["COLLSCAN"], + expectedOutput: [ + {_id: 2, A: {B: "string", C: "STRING", E: 5}}, + {_id: 3, A: {B: "StrinG", C: "sTRINg", E: 5}} + ] +}); + +// Verify that $or query with collation returns correct results. +validateFindCmdOutputAndPlan({ + filter: {$or: [{"a.b": "string_1"}, {"a.b": "string", "a.c": "string"}]}, + expectedStages: ["OR"], + stagesNotExpected: ["COLLSCAN"], // Verify that both the OR stages use index scan. + expectedOutput: [ + {_id: 0, a: {b: "string", c: "STRING", e: 5}}, + {_id: 1, a: {b: "STRING", c: "string", e: 5}} + ] +}); + +/** + * When hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked( + coll.createIndex({"a.b": 1, "a.c": "hashed", "a.e": -1}, {collation: collation})); + +// Hashed indexes with collation can be covered, if the query predicate restrict strings from being +// returned. +validateFindCmdOutputAndPlan({ + filter: {"a.b": "string", "a.e": {$type: "number"}}, + projection: {"a.e": 1, _id: 0}, + expectedStages: ["IXSCAN"], + stagesNotExpected: ["FETCH"], + expectedOutput: [{a: {e: 5}}, {a: {e: 5}}] +}); + +// Hashed indexes with collation cannot be covered, if the query predicate doesn't restrict strings +// from being returneds. +validateFindCmdOutputAndPlan({ + filter: {"a.b": "string"}, + projection: {"a.e": 1, _id: 0}, + expectedStages: ["IXSCAN", "FETCH"], + expectedOutput: [{a: {e: 5}}, {a: {e: 5}}] +}); +})(); diff --git a/jstests/core/hashed_index_covered_queries.js b/jstests/core/hashed_index_covered_queries.js new file mode 100644 index 00000000000..218359477e0 --- /dev/null +++ b/jstests/core/hashed_index_covered_queries.js @@ -0,0 +1,103 @@ +/** + * Test to verify that hashed indexes can cover projections when appropriate. The queries can be + * covered when neither the query predicate nor projection uses a hashed field. + * + * @tags: [assumes_unsharded_collection, requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.compound_hashed_index; +coll.drop(); + +for (let i = 0; i < 100; i++) { + assert.commandWorked(coll.insert({a: i, b: (i % 13), c: NumberInt(i % 10)})); +} + +/** + * Runs find command with the 'filter' and 'projection' provided in the input, then validates + * that the output returned matches 'expectedOutput'. Also runs explain() command on the same find + * command and validates that all the 'expectedStages' are present in the plan returned. + */ +function validateFindCmdOutputAndPlan({filter, projection, expectedOutput, expectedStages}) { + const cmdObj = {find: coll.getName(), filter: filter, projection: projection}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // We ignore the order since hashed index order is not predictable. + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + } + + return assertStagesForExplainOfCommand( + {coll: coll, cmdObj: cmdObj, expectedStages: expectedStages}); +} + +/** + * Runs count command with the 'filter' and 'projection' provided in the input, then validates + * that the output returned matches 'expectedOutput'. Also runs explain() command on the same count + * command and validates that all the 'expectedStages' are present in the plan returned. + */ +function validateCountCmdOutputAndPlan({filter, expectedOutput, expectedStages}) { + const cmdObj = {count: coll.getName(), query: filter}; + const res = assert.commandWorked(coll.runCommand(cmdObj)); + assert.eq(res.n, expectedOutput); + assertStagesForExplainOfCommand({coll: coll, cmdObj: cmdObj, expectedStages: expectedStages}); +} + +/** + * Tests when hashed field is a prefix. + */ +assert.commandWorked(coll.createIndex({b: "hashed", c: -1, a: 1})); + +// Verify that queries cannot be covered with hashed field is a prefix. +validateFindCmdOutputAndPlan( + {filter: {c: 1}, projection: {a: 1, _id: 0}, expectedStages: ['COLLSCAN']}); + +/** + * Tests when hashed field is not a prefix. + */ +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: -1})); + +// Verify that query doesn't get covered when projecting a hashed field. +validateFindCmdOutputAndPlan({ + filter: {a: 26}, + projection: {b: 1, _id: 0}, + expectedOutput: [{b: 0}], + expectedStages: ['FETCH', 'IXSCAN'] +}); + +// Verify that query doesn't get covered when query is on a hashed field. This is to avoid the +// possibility of hash collision. If two different fields produce the same hash value, there is no +// way to distinguish them without fetching the document. +validateFindCmdOutputAndPlan({ + filter: {a: 26, b: 0}, + projection: {c: 1, _id: 0}, + expectedOutput: [{c: 6}], + expectedStages: ['FETCH', 'IXSCAN'] +}); + +// Verify that query gets covered when neither query nor project use hashed field. +validateFindCmdOutputAndPlan({ + filter: {a: {$gt: 24, $lt: 27}}, + projection: {c: 1, _id: 0}, + expectedOutput: [{c: 5}, {c: 6}], + expectedStages: ['IXSCAN', 'PROJECTION_COVERED'] +}); + +// Verify that an empty query with a coverable projection always uses a COLLSCAN. +validateFindCmdOutputAndPlan( + {filter: {}, projection: {a: 1, _id: 0}, expectedStages: ['COLLSCAN']}); + +// Verify that COUNT_SCAN cannot be used when query is on a hashed field. +validateCountCmdOutputAndPlan( + {filter: {a: 26, b: 0}, expectedStages: ['FETCH', 'IXSCAN'], expectedOutput: 1}); + +// Verify that a count operation with range query on a non-hashed prefix field can use +// COUNT_SCAN. +validateCountCmdOutputAndPlan( + {filter: {a: {$gt: 25, $lt: 29}}, expectedStages: ["COUNT_SCAN"], expectedOutput: 3}); +})(); \ No newline at end of file diff --git a/jstests/core/hashed_index_queries.js b/jstests/core/hashed_index_queries.js new file mode 100644 index 00000000000..75f5b75bc42 --- /dev/null +++ b/jstests/core/hashed_index_queries.js @@ -0,0 +1,182 @@ +/** + * Test to verify the behaviour of find, count, distinct operations in the presence of compound + * hashed indexes. + * + * @tags: [requires_fcv_44] + */ +(function() { +"use strict"; +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.hashed_index_queries; +coll.drop(); + +for (let i = 0; i < 100; i++) { + assert.commandWorked(assert.commandWorked( + coll.insert({a: i, b: {subObj: "str_" + (i % 13)}, c: NumberInt(i % 10)}))); + assert.commandWorked(coll.insert({a: i, b: (i % 13), c: NumberInt(i % 10)})); +} + +/** + * Runs find command with the 'filter' and validates that the output returned matches + * 'expectedOutput'. Also runs explain() command on the same find command and validates that all + * the 'expectedStages' are present in the plan returned. + */ +function validateFindCmdOutputAndPlan({filter, expectedStages, expectedOutput}) { + const cmdObj = {find: coll.getName(), filter: filter, projection: {_id: 0}}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // We ignore the order since hashed index order is not predictable. + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + } + assertStagesForExplainOfCommand({coll: coll, cmdObj: cmdObj, expectedStages: expectedStages}); +} + +/** + * Runs count command with the 'filter' and validates that the output returned matches + * 'expectedOutput'. Also runs explain() command on the same count command and validates that all + * the 'expectedStages' are present in the plan returned. + */ +function validateCountCmdOutputAndPlan({filter, expectedStages, expectedOutput}) { + const cmdObj = {count: coll.getName(), query: filter}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + assert.eq(res.n, expectedOutput); + } + assertStagesForExplainOfCommand({coll: coll, cmdObj: cmdObj, expectedStages: expectedStages}); +} +/** + * Tests for 'find' operation when hashed field is prefix. + */ +assert.commandWorked(coll.createIndex({b: "hashed", c: -1})); + +// Verify that index is not used for a range query on a hashed field. +validateFindCmdOutputAndPlan({filter: {b: {$gt: 10, $lt: 12}}, expectedStages: ["COLLSCAN"]}); + +// Verify that index is not used for a query on a hashed field's sub-object. +validateFindCmdOutputAndPlan({filter: {"b.subObj": "str_10"}, expectedStages: ["COLLSCAN"]}); + +// Verify that index is used for a query on a hashed field. +validateFindCmdOutputAndPlan({ + filter: {b: {subObj: "str_11"}}, + expectedOutput: [ + {a: 11, b: {subObj: "str_11"}, c: 1}, + {a: 24, b: {subObj: "str_11"}, c: 4}, + {a: 37, b: {subObj: "str_11"}, c: 7}, + {a: 50, b: {subObj: "str_11"}, c: 0}, + {a: 63, b: {subObj: "str_11"}, c: 3}, + {a: 76, b: {subObj: "str_11"}, c: 6}, + {a: 89, b: {subObj: "str_11"}, c: 9}, + ], + expectedStages: ["IXSCAN", "FETCH"], +}); + +/** + * Tests for 'find' operation when hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: -1})); + +// Verify $in query can use point interval bounds on hashed fields and non-hashed fields. +validateFindCmdOutputAndPlan({ + filter: + {a: {$in: [38, 37]}, b: {$in: [{subObj: "str_12"}, {subObj: "str_11"}]}, c: {$in: [7, 8]}}, + expectedOutput: [{a: 37, b: {subObj: "str_11"}, c: 7}, {a: 38, b: {subObj: "str_12"}, c: 8}], + expectedStages: ["IXSCAN", "FETCH"] +}); + +// Verify that a range query on a non-hashed prefix field can use index. +validateFindCmdOutputAndPlan({ + filter: {a: {$gt: 25, $lt: 29}, b: 0}, + expectedOutput: [{a: 26, b: 0, c: 6}], + expectedStages: ["IXSCAN", "FETCH"] +}); + +/** + * Tests for 'count' operation when hashed field is prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({b: "hashed", a: 1})); + +// Verify that index is not used for a range query on a hashed field. +validateCountCmdOutputAndPlan( + {filter: {b: {$gt: 10, $lt: 12}}, expectedOutput: 7, expectedStages: ["COLLSCAN"]}); + +// Verify that index is used for a query on a hashed field. +validateCountCmdOutputAndPlan( + {filter: {b: {subObj: "str_10"}}, expectedOutput: 7, expectedStages: ["IXSCAN", "FETCH"]}); + +/** + * Tests for 'count' operation when hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: -1})); + +// Verify that range query on a non-hashed prefix field can use index. +validateCountCmdOutputAndPlan({ + filter: {a: {$gt: 25, $lt: 29}, b: 0}, + expectedOutput: 1, + expectedStages: ["IXSCAN", "FETCH"] +}); + +/** + * Tests for 'distinct' operation when hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: -1, b: "hashed", c: 1})); + +// 'distinct' on non-hashed prefix fields cannot use DISTINCT_SCAN. +assert.eq(100, coll.distinct("a").length); +let plan = coll.explain("executionStats").distinct("a"); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); + +// 'distinct' on non-prefix fields cannot use index. +assert.eq(26, coll.distinct("b").length); +plan = coll.explain("executionStats").distinct("b"); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); +assert.eq(10, coll.distinct("c").length); +plan = coll.explain("executionStats").distinct("c"); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); + +// 'distinct' with query predicate can use index for the query part. +assert.eq([2], coll.distinct("c", {a: 12, b: {subObj: "str_12"}})); +plan = coll.explain("executionStats").distinct("c", {a: 12, b: {subObj: "str_12"}}); +assert(isIxscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); +assert(planHasStage(db, plan.queryPlanner.winningPlan, "FETCH"), plan.queryPlanner.winningPlan); + +/** + * Tests for 'distinct' operation when hashed field is a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({b: "hashed", c: 1})); + +// 'distinct' on hashed prefix field cannot use index. +assert.eq(26, coll.distinct("b").length); +plan = coll.explain("executionStats").distinct("b"); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); + +// 'distinct' with query predicate can use index for the query part. +assert.eq([1], coll.distinct("b", {b: 1})); +plan = coll.explain("executionStats").distinct("b", {b: 1}); +assert(isIxscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); +assert(planHasStage(db, plan.queryPlanner.winningPlan, "FETCH"), plan.queryPlanner.winningPlan); + +// 'distinct' with query predicate cannot use index when query cannot use index. +assert.eq([5], coll.distinct("b", {b: {$lt: 6, $gt: 4}})); +plan = coll.explain("executionStats").distinct("b", {b: {$lt: 6, $gt: 4}}); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); + +// 'distinct' with query predicate can use index for the query part. +assert.eq([2], coll.distinct("c", {a: 12, b: 12})); +plan = coll.explain("executionStats").distinct("c", {a: 12, b: 12}); +assert(isIxscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); +assert(planHasStage(db, plan.queryPlanner.winningPlan, "FETCH"), plan.queryPlanner.winningPlan); + +// 'distinct' on non-prefix fields cannot use index. +assert.eq(10, coll.distinct("c").length); +plan = coll.explain("executionStats").distinct("c"); +assert(isCollscan(db, plan.queryPlanner.winningPlan), plan.queryPlanner.winningPlan); +})(); \ No newline at end of file diff --git a/jstests/core/hashed_index_queries_with_logical_operators.js b/jstests/core/hashed_index_queries_with_logical_operators.js new file mode 100644 index 00000000000..afb477ecb08 --- /dev/null +++ b/jstests/core/hashed_index_queries_with_logical_operators.js @@ -0,0 +1,133 @@ +/** + * Test to verify the behaviour of compound hashed indexes when the queries use logical operators + * like $or, $not etc. + * For $not, we test two case + * 1. When hashed field is a prefix, we cannot use index because the index could + * incorrectly filter out matching documents which collide with the same hash value as the one given + * in the query predicate. + * 2. When non-hashed field is prefix, we can always use index for $not, but we currently don't. + * SERVER-44011 is intended to address that. + * + * @tags: [requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.hashed_index_queries_with_logical_operators; +coll.drop(); + +assert.commandWorked(coll.insert({})); +assert.commandWorked(coll.insert({a: null})); +assert.commandWorked(coll.insert({a: 12, b: 12})); +assert.commandWorked(coll.insert({b: 12})); +assert.commandWorked(coll.insert({a: null, b: 12})); + +/** + * Run find command with the 'filter' and projection' provided in the input, then validates + * that the output returned matches 'expectedOutput'. Also runs explain() command on the same find + * command, validates that all the 'expectedStages' are present in the plan returned and all the + * 'stagesNotExpected' are not present in the plan. + */ +function validateFindCmdOutputAndPlan({ + filter, + projection = { + _id: 0 + }, + expectedOutput, + expectedStages, + stagesNotExpected +}) { + const cmdObj = {find: coll.getName(), filter: filter, projection: projection}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // We ignore the order since hashed index order is not predictable. + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + } + + assertStagesForExplainOfCommand({ + coll: coll, + cmdObj: cmdObj, + expectedStages: expectedStages, + stagesNotExpected: stagesNotExpected + }); +} + +/** + * Tests when hashed field is a prefix. + */ +assert.commandWorked(coll.createIndex({a: "hashed", b: 1})); + +// Verify that sub-queries of $or opertor can use index. +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: 12, b: 12}]}, + expectedOutput: [{a: null, b: 12}, {a: null}, {a: 12, b: 12}, {b: 12}, {}], + expectedStages: ["OR"], + stagesNotExpected: ["COLLSCAN"] +}); + +// Verify that query cannot use index for $exists=true (which internally generate a $not query) +// query. +validateFindCmdOutputAndPlan({ + filter: {a: {$exists: true}, b: 12}, + expectedOutput: [{a: 12, b: 12}, {a: null, b: 12}], + expectedStages: ["COLLSCAN"] +}); + +// Verify that query can use index for matching 'null'. +validateFindCmdOutputAndPlan({ + filter: {a: null, b: 12}, + expectedOutput: [{b: 12}, {a: null, b: 12}], + expectedStages: ["FETCH", "IXSCAN"] +}); + +// Verify that query cannot use index for $not queries on hashed field. +validateFindCmdOutputAndPlan({filter: {a: {$not: {$eq: 12}}, b: 12}, expectedStages: ["COLLSCAN"]}); + +// Currently $exists:false predicates cannot use a hashed index. +// TODO SERVER-44011: Allow $exists:false predicates to use a hashed index. +validateFindCmdOutputAndPlan({filter: {a: {$exists: false}, b: 12}, expectedStages: ["COLLSCAN"]}); + +/** + * Tests when hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: -1})); + +// Verify that sub-queries of $or opertor can use index. The first element of $or should not require +// a FETCH. +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: 1}, {a: 12, b: 12}]}, + projection: {a: 1, c: 1, _id: 0}, + expectedOutput: [{a: 12}], + expectedStages: ["OR", "FETCH"], + stagesNotExpected: ["COLLSCAN"], +}); + +// Verify that can use index for $exists:true query and differentiate null from missing. +validateFindCmdOutputAndPlan({ + filter: {a: {$exists: true}, b: 12}, + expectedOutput: [{a: 12, b: 12}, {a: null, b: 12}], + expectedStages: ["FETCH", "IXSCAN"] +}); + +// Verify that query can use index for matching 'null' on non-hashed prefixes. +validateFindCmdOutputAndPlan({ + filter: {a: null, b: 12}, + expectedOutput: [{b: 12}, {a: null, b: 12}], + expectedStages: ["FETCH", "IXSCAN"] +}); + +// Verify that query can use index for matching 'null' on hashed field. +validateFindCmdOutputAndPlan( + {filter: {a: 12, b: null}, expectedOutput: [], expectedStages: ["FETCH", "IXSCAN"]}); + +// Currently $not queries on non-hashed prefixes cannot use a hashed index. +// TODO SERVER-44011: Allow $not queries on non-hashed prefixes to use index. +validateFindCmdOutputAndPlan({filter: {a: {$not: {$gt: 12}}, b: 12}, expectedStages: ["COLLSCAN"]}); +validateFindCmdOutputAndPlan({filter: {a: {$exists: false}, b: 12}, expectedStages: ["COLLSCAN"]}); +})(); diff --git a/jstests/core/hashed_index_sort.js b/jstests/core/hashed_index_sort.js new file mode 100644 index 00000000000..d895134eda7 --- /dev/null +++ b/jstests/core/hashed_index_sort.js @@ -0,0 +1,156 @@ +/** + * Test to verify the behaviour of compound hashed indexes when 'sort' operation is used along with + * the 'find' command. The test verifies compound hashed index with hashed prefix and non-hashed + * prefix. + * @tags: [assumes_unsharded_collection, requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.hashed_index_sort; +coll.drop(); + +for (let i = 0; i < 5; i++) { + for (let j = 0; j < 5; j++) { + assert.commandWorked(coll.insert({_id: (10 * i) + j, a: i, b: j, c: i + j, d: i - j})); + } +} + +/** + * Runs find command with the 'filter', 'sort' and 'project' provided in the input, then validates + * that the output returned matches 'expectedOutput'. Also runs explain() command on the same find + * command, validates that all the 'expectedStages' are present in the plan returned and all the + * 'stagesNotExpected' are not present in the plan. + */ +function validateFindCmdOutputAndPlan( + {filter, project: project, sort: sort, expectedStages, stagesNotExpected, expectedOutput}) { + const cmdObj = { + find: coll.getName(), + filter: filter, + sort: sort, + projection: project || + { + _id: 0 + } + }; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // Make sure that the documents returned are in the same order as 'expectedOutput'. + assert.eq(expectedOutput, ouputArray, ouputArray); + } + assertStagesForExplainOfCommand({ + coll: coll, + cmdObj: cmdObj, + expectedStages: expectedStages, + stagesNotExpected: stagesNotExpected + }); +} + +/** + * Tests when hashed field is prefix. + */ +assert.commandWorked(coll.createIndex({a: "hashed", b: -1, c: 1})); + +// Verify that an exact match predicate on hashed field (prefix) and sort with an immediate range +// field can use 'SORT_MERGE'. +validateFindCmdOutputAndPlan({ + filter: {a: 1}, + project: {b: 1, c: 1, _id: 0}, + sort: {b: 1}, + expectedOutput: [ + {b: 0, c: 1}, + {b: 1, c: 2}, + {b: 2, c: 3}, + {b: 3, c: 4}, + {b: 4, c: 5}, + ], + expectedStages: ["IXSCAN", "FETCH", "SORT_MERGE"] +}); + +// Verify that a list of exact match predicates on hashed field (prefix) and sort with an immediate +// range field can use 'SORT_MERGE'. +validateFindCmdOutputAndPlan({ + filter: {a: {$in: [1, 2]}}, + project: {b: 1, _id: 0}, + sort: {b: 1}, + expectedOutput: + [{b: 0}, {b: 0}, {b: 1}, {b: 1}, {b: 2}, {b: 2}, {b: 3}, {b: 3}, {b: 4}, {b: 4}], + expectedStages: ["FETCH", "SORT_MERGE"], + stagesNotExpected: ["COLLSCAN"] +}); + +// Sort on index fields which do not immediately follow the hashed field cannot use SORT_MERGE. +validateFindCmdOutputAndPlan({ + coll: coll, + filter: {a: 1}, + sort: {c: 1}, + expectedOutput: [ + {a: 1, b: 0, c: 1, d: 1}, + {a: 1, b: 1, c: 2, d: 0}, + {a: 1, b: 2, c: 3, d: -1}, + {a: 1, b: 3, c: 4, d: -2}, + {a: 1, b: 4, c: 5, d: -3}, + ], + expectedStages: ["IXSCAN", "SORT"] +}); + +/** + * Tests when hashed field is not a prefix. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: -1, c: "hashed", d: 1})); + +// Verify that an exact match predicate on range field (prefix) and sort with an immediate range +// field doesn't require any additional sort stages. The entire operation can be answered by the +// index. +validateFindCmdOutputAndPlan({ + filter: {a: 1}, + project: {_id: 0, a: 1, b: 1}, + sort: {b: 1}, + expectedOutput: [ + {a: 1, b: 0}, + {a: 1, b: 1}, + {a: 1, b: 2}, + {a: 1, b: 3}, + {a: 1, b: 4}, + ], + expectedStages: ["IXSCAN"], + stagesNotExpected: ["SORT_MERGE", "SORT", "FETCH"] +}); + +// Verify that a list of exact match predicates on range field (prefix) and sort with an immediate +// range field can use 'SORT_MERGE'. The entire operation will not require a FETCH. +validateFindCmdOutputAndPlan({ + filter: {a: {$in: [1, 2]}}, + project: {_id: 0, a: 1, b: 1}, + sort: {b: 1}, + nReturned: 20, + docsExamined: 0, + expectedStages: ["SORT_MERGE"], + stagesNotExpected: ["COLLSCAN", "FETCH"] +}); + +// Verify that query predicate and sort on non-hashed fields can be answered by the index but +// require a sort stage, if the 'sort' field is not immediately after 'query' field in the index. +validateFindCmdOutputAndPlan({ + filter: {a: 1}, + project: {_id: 0, d: 1, b: 1}, + sort: {d: 1}, + expectedOutput: [{b: 4, d: -3}, {b: 3, d: -2}, {b: 2, d: -1}, {b: 1, d: 0}, {b: 0, d: 1}], + expectedStages: ["IXSCAN", "SORT"], + stagesNotExpected: ["COLLSCAN", "FETCH"] +}); + +// Verify that sort on a hashed field required a FETCH and a SORT stage. +validateFindCmdOutputAndPlan({ + filter: {a: 1, b: 1}, + project: {_id: 0, c: 1}, + sort: {c: 1}, + expectedOutput: [{c: 2}], + expectedStages: ["IXSCAN", "FETCH", "SORT"] +}); +})(); \ No newline at end of file diff --git a/jstests/core/hashed_index_with_arrays.js b/jstests/core/hashed_index_with_arrays.js new file mode 100644 index 00000000000..afbd241fd7d --- /dev/null +++ b/jstests/core/hashed_index_with_arrays.js @@ -0,0 +1,78 @@ +/** + * Test to verify the behaviour of compound hashed indexes. + * + * @tags: [requires_fcv_44] + */ +(function() { +"use strict"; + +const coll = db.hashed_index_with_arrays; +coll.drop(); + +for (let i = 0; i < 20; i++) { + assert.commandWorked( + coll.insert({a: i, b: {subObj: "string_" + (i % 13)}, c: NumberInt(i % 10)})); +} + +// Creation of compound hashed indexes work. +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: 1})); +assert.commandWorked(coll.createIndex({a: "hashed", c: -1})); +assert.commandWorked(coll.createIndex({b: "hashed", a: -1, c: 1})); + +// None of the index fields can be an array. +assert.commandFailedWithCode(coll.insert({a: []}), 16766); +assert.commandFailedWithCode(coll.insert({b: []}), 16766); +assert.commandFailedWithCode(coll.insert({c: []}), 16766); + +// Test that having arrays along the path of the index is not allowed. +assert.commandWorked(coll.createIndex({"field1.field2.0.field4": "hashed", "field2.0.field4": 1})); + +// Hashed field path cannot have arrays. +assert.commandFailedWithCode(coll.insert({field1: []}), 16766); +assert.commandFailedWithCode(coll.insert({field1: {field2: []}}), 16766); +assert.commandFailedWithCode(coll.insert({field1: {field2: {0: []}}}), 16766); +assert.commandFailedWithCode(coll.insert({field1: [{field2: {0: []}}]}), 16766); +assert.commandFailedWithCode(coll.insert({field1: {field2: {0: {field4: []}}}}), 16766); + +// Range field path cannot have arrays. +assert.commandFailedWithCode(coll.insert({field2: []}), 16766); +assert.commandFailedWithCode(coll.insert({field2: {0: [0]}}), 16766); + +// Verify that updates gets rejected when a document is modified to contain array along the path. +assert.commandFailedWithCode(coll.update({}, {field2: []}), 16766); +assert.commandFailedWithCode(coll.update({}, {field2: {0: {field4: []}}}), 16766); +assert.commandFailedWithCode(coll.update({}, {field1: []}), 16766); +assert.commandFailedWithCode(coll.update({_id: "missing"}, {field1: []}, {upsert: true}), [16766]); + +// Verify inserts and updates work when there are no arrays along path. +assert.commandWorked(coll.insert({field1: {field2: {0: {otherField: []}}}})); +assert.commandWorked(coll.insert({field1: {field2: {0: {field4: 1}}}})); +assert.commandWorked(coll.update({}, {field1: {field2: {0: {field4: 1}}}})); +assert.commandWorked( + coll.update({_id: "missing"}, {field1: {field2: {0: {field4: 1}}}}, {upsert: true})); + +/** + * Tests for sparse indexes. + */ +// Creation of compound hashed indexes work. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({"a.b": 1, c: "hashed", d: -1}, {sparse: true})); +assert.commandWorked(coll.createIndex({"a.c": "hashed", d: -1}, {sparse: true})); +assert.commandWorked(coll.createIndex({b: "hashed", d: -1, c: 1}, {sparse: true})); + +// Any arrays not allowed for sparse index. +assert.commandFailedWithCode(coll.insert({b: []}), 16766); +assert.commandFailedWithCode(coll.insert({c: [1]}), 16766); +assert.commandFailedWithCode(coll.insert({a: []}), 16766); + +/** + * Tests for partial indexes. + */ +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked( + coll.createIndex({a: "hashed", b: 1}, {partialFilterExpression: {b: {$gt: 5}}})); +assert.commandFailedWithCode(coll.insert({a: [1], b: 6}), 16766); + +// Array insertion allowed when the document doesn't match the partial filter predication. +assert.commandWorked(coll.insert({a: [1], b: 1})); +})(); \ No newline at end of file diff --git a/jstests/core/hashed_partial_and_sparse_index.js b/jstests/core/hashed_partial_and_sparse_index.js new file mode 100644 index 00000000000..d42ddfc8f86 --- /dev/null +++ b/jstests/core/hashed_partial_and_sparse_index.js @@ -0,0 +1,73 @@ +/** + * Tests to verify that the queries return correct results in the presence of partial hashed + * index and sparse index. The test verifies compound hashed index with hashed prefix and non-hashed + * prefix. + * + * @tags: [requires_fcv_44] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For assertStagesForExplainOfCommand(). + +const coll = db.hashed_partial_index; +coll.drop(); +assert.commandWorked(coll.insert({})); +assert.commandWorked(coll.insert({a: null})); +assert.commandWorked(coll.insert({a: 1})); +assert.commandWorked(coll.insert({b: 4})); +assert.commandWorked(coll.insert({a: 1, b: 6})); + +/** + * Runs find command with the 'filter' and validates that the output returned matches + * 'expectedOutput'. Also runs explain() command on the same find command and validates that all + * the 'expectedStages' are present in the plan returned. + */ +function validateFindCmdOutputAndPlan({filter, expectedStages, expectedOutput}) { + const cmdObj = {find: coll.getName(), filter: filter, projection: {_id: 0}}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + + // We ignore the order since hashed index order is not predictable. + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + } + assertStagesForExplainOfCommand({coll: coll, cmdObj: cmdObj, expectedStages: expectedStages}); +} + +/** + * Tests for sparse indexes. + */ +assert.commandWorked(coll.createIndex({a: 1, b: "hashed", c: -1}, {sparse: true})); + +// Verify index not used for null/missing queries with sparse index. +validateFindCmdOutputAndPlan({filter: {a: null}, expectedStages: ["COLLSCAN"]}); +validateFindCmdOutputAndPlan({filter: {a: {$exists: false}}, expectedStages: ["COLLSCAN"]}); + +// Verify index can be used for non-null queries with sparse index. +validateFindCmdOutputAndPlan({ + filter: {a: {$exists: true}}, + expectedOutput: [{a: null}, {a: 1}, {a: 1, b: 6}], + expectedStages: ["IXSCAN", "FETCH"] +}); + +validateFindCmdOutputAndPlan( + {filter: {a: 1, b: 6}, expectedOutput: [{a: 1, b: 6}], expectedStages: ["IXSCAN", "FETCH"]}); + +/** + * Tests for partial indexes. + */ +[{b: "hashed", c: 1}, {b: 1, c: "hashed", d: 1}].forEach((index) => { + assert.commandWorked(coll.dropIndexes()); + assert.commandWorked(coll.createIndex(index, {partialFilterExpression: {b: {$gt: 5}}})); + + // Verify that index is not used if the query predicate doesn't match the + // 'partialFilterExpression'. + validateFindCmdOutputAndPlan({filter: {b: 4}, expectedStages: ["COLLSCAN"]}); + + // Verify that index is used if the query predicate matches the 'partialFilterExpression'. + validateFindCmdOutputAndPlan( + {filter: {b: 6}, expectedOutput: [{a: 1, b: 6}], expectedStages: ["IXSCAN", "FETCH"]}); +}); +})(); \ No newline at end of file diff --git a/jstests/libs/analyze_plan.js b/jstests/libs/analyze_plan.js index 74063ee5cd3..b408ea50fb2 100644 --- a/jstests/libs/analyze_plan.js +++ b/jstests/libs/analyze_plan.js @@ -401,3 +401,22 @@ function assertCoveredQueryAndCount({collection, query, project, count}) { "Winning plan for count was not covered: " + tojson(explain.queryPlanner.winningPlan)); assertExplainCount({explainResults: explain, expectedCount: count}); } + +/** + * Runs explain() operation on 'cmdObj' and verifies that all the stages in 'expectedStages' are + * present exactly once in the plan returned. When 'stagesNotExpected' array is passed, also + * verifies that none of those stages are present in the explain() plan. + */ +function assertStagesForExplainOfCommand({coll, cmdObj, expectedStages, stagesNotExpected}) { + const plan = assert.commandWorked(coll.runCommand({explain: cmdObj})); + const winningPlan = plan.queryPlanner.winningPlan; + for (let expectedStage of expectedStages) { + assert(planHasStage(coll.getDB(), winningPlan, expectedStage), + "Could not find stage " + expectedStage + ". Plan: " + tojson(plan)); + } + for (let stage of (stagesNotExpected || [])) { + assert(!planHasStage(coll.getDB(), winningPlan, stage), + "Found stage " + stage + " when not expected. Plan: " + tojson(plan)); + } + return plan; +} diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index f286bca9ccc..950e4722834 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -282,6 +282,7 @@ env.CppUnitTest( "query_planner_array_test.cpp", "query_planner_collation_test.cpp", "query_planner_geo_test.cpp", + "query_planner_hashed_index_test.cpp", "query_planner_partialidx_test.cpp", "query_planner_index_test.cpp", "query_planner_operator_test.cpp", diff --git a/src/mongo/db/query/query_planner_hashed_index_test.cpp b/src/mongo/db/query/query_planner_hashed_index_test.cpp new file mode 100644 index 00000000000..72612af05f5 --- /dev/null +++ b/src/mongo/db/query/query_planner_hashed_index_test.cpp @@ -0,0 +1,815 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/hasher.h" +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/db/query/query_planner_test_fixture.h" + +namespace mongo { +/** + * A specialization of the QueryPlannerTest fixture which makes it easy to present the planner + * with a view of the available hashed indexes. + */ +class QueryPlannerHashedTest : public QueryPlannerTest { +protected: + void setUp() final { + QueryPlannerTest::setUp(); + + // We're interested in testing plans that use a hashed index, so don't generate collection + // scans. + params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN; + } + + /** + * Returns string of the format "[,,true,true]". + */ + template + std::string getHashedBound(T value) { + auto hashedValue = BSONElementHasher::hash64(BSON("" << value).firstElement(), 0); + return str::stream() << "[ " << hashedValue << ", " << hashedValue << ", true, true]"; + } +}; + +// +// Range queries. +// +TEST_F(QueryPlannerHashedTest, RangeQueryWhenHashedFieldIsAPrefix) { + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + + // Range query on hashed field cannot use index. + runQuery(fromjson("{'x': {$gt: 0, $lt: 9}}")); + assertHasOnlyCollscan(); + + // Range query on hashed field cannot use index even if range would lead to empty bounds. + runQuery(fromjson("{x: {$lte: 5, $gte: 10}}")); + assertHasOnlyCollscan(); + + // Range query on non-hashed field can use index, if there is an equality match on the hashed + // prefix. + runQuery(fromjson("{x: 2, y: {$gt: 0, $lte: 9}}")); + + // Verify that fetch stage only has a filter on hashed field and correct bounds are used. + assertSolutionExists( + "{fetch: {filter: {x: 2}, node: {ixscan:{pattern: {x: 'hashed', y: 1, z: -1}, bounds: " + "{x: [" + + getHashedBound(2) + "], y: [[0,9,false,true]], z: [['MaxKey','MinKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, RangeQueryWhenNonHashedFieldIsAPrefix) { + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + + // Range query on non-hashed field can use index. + runQuery(fromjson("{x: {$gte: 0, $lt: 9}}")); + + // Verify that fetch stage has no filter and correct bounds are used. + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan:{pattern: {x: 1, y: 'hashed', z: " + "-1}, bounds: {x: [[0,9,true,false]],y: [['MinKey','MaxKey',true,true]], z: " + "[['MaxKey','MinKey',true,true]] }}}}}"); + + // Range query on hashed field cannot use index for hashed component. + runQuery(fromjson("{x: {$gte: 0, $lt: 9}, y: {$lte: 5, $gte: 10}}")); + + // Verify that fetch stage only has a filter on hashed field and validate the bounds. + assertSolutionExists( + "{fetch: {filter: {y: {$lte: 5, $gte: 10}}, node: {ixscan:{pattern: {x: 1, y: " + "'hashed', z: -1}, bounds: {x: [[0,9,true,false]],y: [['MinKey','MaxKey',true,true]], z: " + "[['MaxKey','MinKey',true,true]] }}}}}"); +} + +// +// Tests related to object comparison. +// +TEST_F(QueryPlannerHashedTest, DottedFieldInIndex) { + addIndex(BSON("x.a" + << "hashed" + << "y" << 1 << "z" << -1)); + + // Cannot use index when the query is on a prefix of the index field. + runQuery(fromjson("{x: {a: 1}}")); + assertHasOnlyCollscan(); + + // Can use index when query is on the index field. + runQuery(fromjson("{'x.a' : 1}")); + assertSolutionExists( + "{fetch: {filter: {'x.a': 1}, node: {ixscan:{pattern: {'x.a': 'hashed', y: " + "1, z: -1}, bounds: {'x.a': [" + + getHashedBound(1) + + "], y: [['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, QueryOnObject) { + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + + // Can use index when the query predicate is an object. + runQuery(fromjson("{x: {a: 1}, y: 1}")); + assertSolutionExists( + "{fetch: {filter: {x: {$eq : {a: 1}}}, node: {ixscan:{pattern: {'x': 'hashed', y: " + "1, z: -1}, bounds: {'x': [" + + getHashedBound(fromjson("{a: 1}")) + + "], y: [[1,1,true,true]], z: [['MaxKey','MinKey',true,true]] }}}}}"); + + // Cannot use index to query on a sub-object on an index field. + runQuery(fromjson("{'x.a' : 1}")); + assertHasOnlyCollscan(); +} + +// +// Null comparison and existence tests for compound hashed index. +// +TEST_F(QueryPlannerHashedTest, ExistsTrueQueries) { + // $exists:true query on hashed prefix field cannot use index. + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + runQuery(fromjson("{x: {$exists: true}}")); + assertHasOnlyCollscan(); + + // $exists:true query on non-hashed prefix field can use index. + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + runQuery(fromjson("{x: {$exists: true}}")); + assertSolutionExists( + "{fetch: {filter: {x: {$exists: true}}, node: {ixscan: {pattern: {x: 1, y: 'hashed', z:-1}," + "bounds: {x: [['MinKey','MaxKey',true,true]],y: [['MinKey','MaxKey',true,true]], z: " + "[['MaxKey','MinKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, ExistsFalseQueries) { + // $exists:false query on hashed prefix field cannot use index. + // TODO SERVER-44011: This can be optimised to use index scan on 'null'. + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + runQuery(fromjson("{x: {$exists: false}}")); + assertHasOnlyCollscan(); + + // $exists:false query on non-hashed prefix field cannot use index. + // TODO SERVER-44011: This can be optimised to use index. + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + runQuery(fromjson("{x: {$exists: false}}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerHashedTest, NotEqualsNullQueries) { + // $not queries on a hashed prefix field cannot use index. + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + runQuery(fromjson("{x: {$ne: null}}")); + assertHasOnlyCollscan(); + + runQuery(fromjson("{x: {$not: {$eq: null}}}")); + assertHasOnlyCollscan(); + + // $not queries on a non-hashed prefix field cannot use index. + // TODO SERVER-44011: This can be optimised to use index. + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + runQuery(fromjson("{x: {$ne: null}}")); + assertHasOnlyCollscan(); + + runQuery(fromjson("{x: {$not: {$eq: null}}}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerHashedTest, EqualsNullQueries) { + // When hashed field is a prefix, $eq:null queries can use index. The bounds will be point + // intervals of hashed value of 'undefined' and 'null'. + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + runQuery(fromjson("{x: {$eq: null}}")); + assertSolutionExists( + "{fetch: {filter: {x: {$eq: null}}, node: {ixscan:{pattern: {x: 'hashed', y: 1, z: " + "-1},bounds: {x: [" + + getHashedBound(BSONUndefined) + "," + getHashedBound(BSONNULL) + + "],y: [['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]]" + "}}}}}"); + + // $eq:null queries on a non-hashed field can use the index. The bounds will be point intervals + // of 'undefined' and 'null'. + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + runQuery(fromjson("{x: {$eq: null}}")); + assertSolutionExists( + "{fetch: {filter: {x: {$eq: null}}, node: {ixscan:{pattern: {x: 1, y: 'hashed', z: -1}, " + "bounds: {x:[[undefined, undefined, true, true],[null, null, true, true]], y: " + "[['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]]" + "}}}}}"); +} + +// +// Tests with $or operator. +// +TEST_F(QueryPlannerHashedTest, OrWithMultipleEqualityPredicatesOnHashedPrefixUsesSingleIndexScan) { + addIndex(BSON("a" + << "hashed" + << "b" << 1)); + runQuery(fromjson("{$or: [{a: 5}, {a: 10}]}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {$or: [{a: 5}, {a: 10}]}, node: {ixscan:{pattern: {a: 'hashed', b:1}, " + "bounds: {a:[" + + getHashedBound(5) + "," + getHashedBound(10) + + "], b: [['MinKey','MaxKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, + OrWithMultipleEqualityPredicatesOnNonHashedPrefixUsesSingleIndexScan) { + addIndex(BSON("a" << 1 << "b" + << "hashed")); + runQuery(fromjson("{$or: [{a: 5}, {a: 10}]}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {pattern: {a: 1, b: 'hashed'}, " + "bounds: {a:[[5,5,true,true],[10,10,true,true]], b: [['MinKey','MaxKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, OrWithOneRegularAndOneHashedIndexPathUsesTwoIndexes) { + addIndex(BSON("a" + << "hashed" + << "c" << 1)); + addIndex(BSON("b" << 1)); + runQuery(fromjson("{$or: [{a: 5}, {b: 10}]}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [" + "{fetch: {filter: {$or: [{a: 5}]}, node: {ixscan: {filter: null, pattern: " + "{a: 'hashed', c:1}, bounds: {a: [" + + getHashedBound(5) + + "]}}}}}, {ixscan: {filter: null, pattern: {b: 1}," + "bounds: {b: [[10,10,true,true]]}}}]}}}}"); +} + +TEST_F(QueryPlannerHashedTest, OrPushdownWithHashedPrefix) { + addIndex(BSON("a" + << "hashed" + << "b" << 1)); + addIndex(BSON("a" + << "hashed" + << "c" << 1)); + runQuery(fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}")); + assertNumSolutions(3U); + + // Verify that three different solution, with index1, index2 and a union of index1 solution, + // index2 solution are generated. + assertSolutionExists( + "{fetch: {filter: {$and : [{a: {$eq: 1}}, {$or: [{b: {$eq: 2}}, {c: {$eq: 3}}]}]}, node: " + "{ixscan: {pattern: {a: 'hashed', b: 1}, filter: null, bounds: {a: [" + + getHashedBound(1) + "], b: [['MinKey','MaxKey',true,true]]}}}}}"); + + assertSolutionExists( + "{fetch: { filter: {$and : [{a: {$eq: 1}}, {$or: [{b: {$eq: 2}}, {c: {$eq: 3}}]}]}, node: " + "{ixscan: {pattern: {a: 'hashed', c: 1}, filter: null, bounds: {a: [" + + getHashedBound(1) + "], c: [['MinKey','MaxKey',true,true]]}}}}}"); + + assertSolutionExists( + "{or: {nodes: [" + "{fetch: { filter: {a: {$eq: 1}}, node: {ixscan: {pattern: {a: 'hashed', b: " + "1}, filter: null, bounds: {a: [" + + getHashedBound(1) + + "], b: [[2,2,true,true]]}}}}}," // First sub-plan end. + "{fetch: { filter: {a: {$eq: 1}}, node: {ixscan: {pattern: {a: 'hashed', c: 1}, filter: " + "null, bounds: {a: [" + + getHashedBound(1) + + "], c: [[3,3,true,true]]}}}}}" // Second sub-plan end. + "]}}"); +} + +TEST_F(QueryPlannerHashedTest, OrPushdownWithNonHashedPrefix) { + addIndex(BSON("a" << 1 << "b" + << "hashed")); + addIndex(BSON("a" + << "hashed" + << "c" << 1)); + runQuery(fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}")); + assertNumSolutions(3U); + + // Verify that three different solution, with index1, index2 and a union of index1 solution, + // index2 solution are generated. + assertSolutionExists( + "{fetch: {filter: {$or: [{b: {$eq: 2}}, {c: {$eq: 3}}]}, node: " + "{ixscan: {pattern: {a: 1, b: 'hashed'}, filter: null, bounds: {a: [[1,1,true,true]], b: " + "[['MinKey','MaxKey',true,true]]}}}}}"); + + assertSolutionExists( + "{fetch: {filter: {$and: [{a: {$eq: 1}}, {$or: [{b: {$eq: 2}}, {c: {$eq: 3}}]}]}, node: " + "{ixscan: {pattern: {a: 'hashed', c: 1}, filter: null, bounds: {a: [" + + getHashedBound(1) + "], c: [['MinKey','MaxKey',true,true]]}}}}}"); + + assertSolutionExists( + "{or: {nodes: [{fetch: {filter: {b: {$eq: 2}}, node: " + "{ixscan: {pattern: {a: 1, b: 'hashed'}, filter: null, bounds: {a: [[1,1,true,true]], " + "b: [" + + getHashedBound(2) + "]}}}}}, " + + "{fetch: {filter: {a: {$eq: 1}}, node: {ixscan: {pattern: {a: 'hashed', c: 1}, filter: " + "null, bounds: {a: [" + + getHashedBound(1) + "], c: [[3,3,true,true]]}}}}}]}}"); +} + +// +// Covered projections. +// +TEST_F(QueryPlannerHashedTest, CannotCoverQueryWhenHashedFieldIsPrefix) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + // Verify that a query doesn't get covered when the query is on a hashed field, even if the + // projection doesn't include the hashed field. This is to avoid the possibility of hash + // collision. If two different fields produce the same hash value, there is no way to + // distinguish them without fetching the document. + runQueryAsCommand(fromjson("{filter: {x : {$eq: 5}}, projection:{_id: 0, y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, y: 1}, node: {fetch: {filter: {x : {$eq: 5}}, node: {ixscan: " + "{filter: null, pattern: {x: 'hashed', y: 1}," + "bounds: {x:[" + + getHashedBound(5) + "], y: [['MinKey','MaxKey',true,true]] }}}}}}}"); + + // Verify that queries cannot be covered with hashed field is a prefix, despite query and + // projection not using hashed fields. + runQueryAsCommand(fromjson("{filter: {y : {$eq: 5}}, projection:{_id: 0, y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists("{proj: {spec: {_id: 0, y: 1}, node: {cscan: {dir: 1}} }}"); +} + +TEST_F(QueryPlannerHashedTest, CanCoverQueryWhenHashedFieldIsNotPrefix) { + addIndex(BSON("x" << 1 << "y" + << "hashed" + << "z" << -1)); + + // Verify that query gets covered when neither query nor project use hashed field. + runQueryAsCommand(fromjson("{filter: {x: {$gt: 24, $lt: 27}}, projection:{_id: 0, z: 1}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, z: 1}, node: {ixscan: " + "{filter: null, pattern: {x: 1, y: 'hashed', z: -1}," + "bounds: {x: [[24,27,false,false]], y: [['MinKey','MaxKey',true,true]], z: " + "[['MaxKey','MinKey',true,true]] }}}}}"); + + // Verify that query doesn't get covered when query is on a hashed field. + runQueryAsCommand(fromjson("{filter: {x: 1, y:1}, projection:{_id: 0, z: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, z: 1}, node: {fetch: {filter: {y : {$eq: 1}}, node: {ixscan: " + "{filter: null, pattern: {x: 1, y: 'hashed', z: -1}," + "bounds: {x: [[1,1,true,true]], y:[" + + getHashedBound(1) + "] , z: [['MaxKey','MinKey',true,true]]} }} }} }}"); + + // Verify that the query doesn't get covered when projection is on a hashed field. + runQueryAsCommand(fromjson("{filter: {x: 1}, projection:{_id: 0, y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, y: 1}, node: {fetch: {filter: null, node: {ixscan: " + "{filter: null, pattern: {x: 1, y: 'hashed', z: -1}," + "bounds: {x: [[1,1,true,true]], y: [['MinKey','MaxKey',true,true]], z: " + "[['MaxKey','MinKey',true,true]]} }} }} }}"); +} + +// +// Sorting tests. +// +TEST_F(QueryPlannerHashedTest, SortWhenHashedFieldIsPrefix) { + addIndex(BSON("x" + << "hashed" + << "y" << -1 << "z" << 1)); + + // Verify that a list of exact match predicates on hashed field (prefix) and sort with an + // immediate range field can use 'SORT_MERGE'. + runQueryAsCommand(fromjson("{filter: {x: {$in: [1, 2]}}, sort: {y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x: {$in: [1,2]}}, node: {mergeSort: {nodes: [{ixscan: {pattern: {x: " + "'hashed', y: -1, z: 1}, bounds: {x: [" + + getHashedBound(1) + + "], y: [['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]]}}}," + "{ixscan: {pattern: {x: 'hashed', y: -1, z: 1}, bounds: {x: [" + + getHashedBound(2) + + "], y: [['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]]}}}]}}}}"); + + // Verify that an exact match predicate on hashed field (prefix) and sort with an immediate + // range field can use 'SORT_MERGE'. + runQueryAsCommand(fromjson("{filter: {x: 1}, sort: {y: -1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x: {$eq: 1}}, node: {mergeSort: {nodes: [{ixscan: {pattern: {x: " + "'hashed', y: -1, z: 1}, bounds: {x: [" + + getHashedBound(1) + + "], y: [['MaxKey','MinKey',true,true]], z: [['MinKey','MaxKey',true,true]] }}}] }}}}"); + + // Sort on any index field other than the one immediately following the hashed field will use a + // blocking sort. + runQueryAsCommand(fromjson("{filter: {x: 3}, sort: {z: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{sort: {pattern: {z:1}, limit: 0, node: {sortKeyGen: {node: {fetch: {filter: {x: {$eq: " + "3}}, node: {ixscan: {pattern: {x: 'hashed', y: -1, z: 1}, bounds: {x: [" + + getHashedBound(3) + + "], y: [['MaxKey','MinKey',true,true]], z: [['MinKey','MaxKey',true,true]]} }} }} }} }}"); +} + +TEST_F(QueryPlannerHashedTest, SortWhenNonHashedFieldIsPrefix) { + addIndex(BSON("x" << 1 << "y" << -1 << "z" + << "hashed" + << "a" << 1)); + + // Verify that a list of exact match predicates on range field (prefix) and sort with an + // immediate range field can use 'SORT_MERGE'. + runQueryAsCommand(fromjson("{filter: {x: {$in: [1, 2]}}, sort: {y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {mergeSort: {nodes: [{ixscan: {pattern: {x: " + "1, y: -1, z: 'hashed', a: 1}, bounds: {x: [[2,2,true,true]], y: " + "[['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]], a: " + "[['MaxKey','MinKey',true,true]]}}}," + "{ixscan: {pattern: {x: 1, y: -1, z: 'hashed', a: 1}, bounds: {x: [[1,1,true,true]], y: " + "[['MinKey','MaxKey',true,true]], z: [['MaxKey','MinKey',true,true]], a: " + "[['MaxKey','MinKey',true,true]]}}}]}}}}"); + + // Verify that an exact match predicate on range field (prefix) and sort with an immediate range + // field doesn't require any additional sort stages. The entire operation can be answered by the + // index. + runQueryAsCommand(fromjson("{filter: {x: 1}, sort: {y: -1}, projection: {_id: 0, a: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {pattern: {x: " + "1, y: -1, z: 'hashed', a: 1}, bounds: {x: [[1,1,true,true]], y: " + "[['MaxKey','MinKey',true,true]], z: [['MinKey','MaxKey',true,true]], a: " + "[['MinKey','MaxKey',true,true]]}}}}}"); + + // Verify that query predicate and sort on non-hashed fields can be answered without fetching + // the document, but require a sort stage, if the 'sort' field is not immediately after 'query' + // field in the index. + runQueryAsCommand(fromjson("{filter: {x: 3}, sort: {a: 1}, projection: {_id: 0, y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, y: 1}, node: {sort: {pattern: {a: 1}, limit: 0, node: {sortKeyGen: " + "{node: {ixscan: {pattern: {x: 1, y: -1, z: 'hashed', a: 1}, bounds: {x: " + "[[3,3,true,true]], y: [['MaxKey','MinKey',true,true]], z: " + "[['MinKey','MaxKey',true,true]], a: [['MinKey','MaxKey',true,true]]} }} }} }} }} "); + + // Verify that sort on a hashed field requires a fetch and a blocking sort stage. + runQueryAsCommand(fromjson("{filter: {x: 3}, sort: {z: 1}, projection: {_id: 0, y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {_id: 0, y: 1}, node: {sort: {pattern: {z:1}, limit: 0, node: {sortKeyGen: " + "{node: {fetch: {filter: null, node: {ixscan: {pattern: {x: 1, y: -1, z: 'hashed', a: 1}, " + "bounds: {x: [[3,3,true,true]], y: [['MaxKey','MinKey',true,true]], z: " + "[['MinKey','MaxKey',true,true]], a: [['MinKey','MaxKey',true,true]]} }} }} }} }} }}"); +} + +// +// Partial index tests. +// +TEST_F(QueryPlannerHashedTest, PartialIndexCanAnswerPredicateOnFilteredFieldWithHashedPrefix) { + auto filterObj = fromjson("{x: {$gt: 0}}"); + auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj); + addIndex(BSON("x" + << "hashed" + << "y" << 1), + filterExpr.get()); + + runQuery(fromjson("{x: 5}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x : {$eq: 5}}, node: " + "{ixscan: {filter: null, pattern: {x: 'hashed', y: 1}," + "bounds: {x:[" + + getHashedBound(5) + "], y: [['MinKey','MaxKey',true,true]] }}}}}"); + + runQuery(fromjson("{x: 5, y: {$lt: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x : {$eq: 5}}, node: " + "{ixscan: {filter: null, pattern: {x: 'hashed', y: 1}," + "bounds: {x:[" + + getHashedBound(5) + "], y: [[-Infinity,1,true,false]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, PartialIndexCanAnswerPredicateOnFilteredFieldWithNonHashedPrefix) { + auto filterObj = fromjson("{x: {$gt: 0}}"); + auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj); + addIndex(BSON("x" << 1 << "y" + << "hashed"), + filterExpr.get()); + + runQuery(fromjson("{x: {$gte: 5}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: " + "{ixscan: {filter: null, pattern: {x: 1, y: 'hashed'}," + "bounds: {x:[[5,Infinity,true,true]], y: [['MinKey','MaxKey',true,true]] }}}}}"); + + runQuery(fromjson("{x: {$gte: 5}, y: 1}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {y : {$eq: 1}}, node: " + "{ixscan: {filter: null, pattern: {x: 1, y: 'hashed'}," + "bounds: {x: [[5,Infinity,true,true]], y:[" + + getHashedBound(1) + "] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, PartialIndexDoesNotAnswerPredicatesExcludedByFilter) { + auto filterObj = fromjson("{x: {$gt: 0}}"); + auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj); + + // Hashed prefix. + addIndex(BSON("x" + << "hashed" + << "y" << 1), + filterExpr.get()); + + runQuery(fromjson("{x: {$eq: -1}}")); + assertHasOnlyCollscan(); + + runQuery(fromjson("{x: {$eq: 0}}")); + assertHasOnlyCollscan(); + + // Non-hashed prefix. + addIndex(BSON("x" << 1 << "y" + << "hashed"), + filterExpr.get()); + + runQuery(fromjson("{x: {$eq: -1}}")); + assertHasOnlyCollscan(); + + runQuery(fromjson("{x: {$eq: 0}}")); + assertHasOnlyCollscan(); +} + +// +// Hinting with hashed index tests. +// +TEST_F(QueryPlannerHashedTest, ChooseHashedIndexHint) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + addIndex(BSON("x" << 1)); + runQueryAsCommand(fromjson("{filter: {x: 3}, hint: {x: 'hashed', y: 1}}")); + + + assertNumSolutions(1U); + assertSolutionExists("{fetch: {node: {ixscan: {pattern: {x: 'hashed', y: 1}}}}}"); +} + +TEST_F(QueryPlannerHashedTest, ChooseHashedIndexHintWithOr) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + addIndex(BSON("y" << 1)); + + runQueryAsCommand(fromjson("{filter: {$or: [{x: 1}, {y: 1}]}, hint: {x: 'hashed', y: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {$or: [{x: 1}, {y: 1}]}, node: {ixscan: {pattern: {x: 'hashed', y: 1}, " + "bounds: {x:[['MinKey','MaxKey',true,true]], y: [['MinKey','MaxKey',true,true]]}}}}}"); +} + +TEST_F(QueryPlannerHashedTest, HintWhenHashedIndexDoesNotExist) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + runInvalidQueryHint(fromjson("{x: {$eq: 1}}"), + BSON("x" + << "hashed")); +} + +TEST_F(QueryPlannerHashedTest, TypeOfWithHashedIndex) { + // Hashed prefix. + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + runQuery(fromjson("{x: {$type: 'number'}}")); + assertHasOnlyCollscan(); + + // Non-hashed prefix. + addIndex(BSON("x" << 1 << "y" + << "hashed")); + + runQuery(fromjson("{x: {$type: 'number'}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {x: 1, y: 'hashed'}, " + "bounds: {x:[[NaN,Infinity,true,true]], y: [['MinKey','MaxKey',true,true]]}}}}}"); +} + +// +// Collation tests. +// +TEST_F(QueryPlannerHashedTest, + StringComparisonWithUnequalCollatorsAndHashedIndexResultsInCollscan) { + CollatorInterfaceMock alwaysEqualCollator(CollatorInterfaceMock::MockType::kAlwaysEqual); + addIndex(BSON("a" + << "hashed" + << "b" << 1), + &alwaysEqualCollator); + + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: {$lt: 'foo'}}, collation: {locale: 'reverse'}}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerHashedTest, StringComparisonWithEqualCollatorsAndHashedIndexUsesIndex) { + CollatorInterfaceMock reverseStringCollator(CollatorInterfaceMock::MockType::kReverseString); + addIndex(BSON("a" + << "hashed" + << "b" << 1), + &reverseStringCollator); + + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: {$eq: 'foo'}}, collation: {locale: 'reverse'}}")); + + assertNumSolutions(1U); + + // Verify that the bounds generated is based on a hash of reversed 'foo'. + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {a: 'hashed', b: 1}, " + "bounds: {a:[" + + getHashedBound("oof") + "], b: [['MinKey','MaxKey',true,true]]}}}}}"); +} + +TEST_F(QueryPlannerHashedTest, NonStringComparisonWithUnequalCollators) { + CollatorInterfaceMock alwaysEqualCollator(CollatorInterfaceMock::MockType::kAlwaysEqual); + addIndex(BSON("a" + << "hashed" + << "b" << 1), + &alwaysEqualCollator); + + runQueryAsCommand(fromjson("{find: 'testns', filter: {a: 2}, collation: {locale: 'reverse'}}")); + assertNumSolutions(1U); + + // Verify that we use an index scan even if the collators doesn't match. + assertSolutionExists( + "{fetch: {node: {ixscan: {pattern: {a: 'hashed', b: 1}, " + "bounds: {a:[" + + getHashedBound(2) + "], b: [['MinKey','MaxKey',true,true]]}}}}}"); +} + +// +// Tests to verify index usage for query with operators like $type, $regex, limit, skip etc. +// +TEST_F(QueryPlannerHashedTest, QueryWithPrefixRegex) { + // Prefix regex cannot use index when prefix field is hashed. + addIndex(BSON("x" + << "hashed" + << "y" << 1 << "z" << -1)); + runQuery(fromjson("{x: /^foo/}")); + assertHasOnlyCollscan(); + + // Prefix regex can use index when prefix field is not hashed. + addIndex(BSON("x" << 1 << "y" + << "hashed")); + runQuery(fromjson("{x: /^foo/}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {pattern: {x: 1, y: 'hashed'}," + "bounds: {x: [['foo','fop',true,false], [/^foo/,/^foo/,true,true]], y: " + "[['MinKey','MaxKey',true,true]] }}}}}"); +} + +TEST_F(QueryPlannerHashedTest, BasicSkip) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + runQueryAsCommand(fromjson("{filter: {x: 5}, skip: 8}")); + assertNumSolutions(1U); + + // Verify that the plan has 'skip' stage and uses index. + assertSolutionExists( + "{skip: {n: 8, node: {fetch: {filter: { x : {$eq: 5}}, node: " + "{ixscan: {filter: null, pattern: {x: 'hashed', y: 1}," + "bounds: {x:[" + + getHashedBound(5) + "], y: [['MinKey','MaxKey',true,true]] }}}}}}}"); +} + +TEST_F(QueryPlannerHashedTest, BasicLimit) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + runQueryAsCommand(fromjson("{filter: {x: 5}, limit: 5}")); + assertNumSolutions(1U); + + // Verify that the plan has 'limit' stage and uses index. + assertSolutionExists( + "{limit: {n: 5, node: {fetch: {filter: { x : {$eq: 5}}, node: " + "{ixscan: {filter: null, pattern: {x: 'hashed', y: 1}," + "bounds: {x:[" + + getHashedBound(5) + "], y: [['MinKey','MaxKey',true,true]] }}}}}}}"); +} + +TEST_F(QueryPlannerHashedTest, MinMaxParameter) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + + runInvalidQueryHintMinMax(BSONObj(), + BSON("x" + << "hashed" + << "y" << 1), + fromjson("{x: 1}"), // min. + BSONObj()); + runInvalidQueryHintMinMax(BSONObj(), + BSON("x" + << "hashed" + << "y" << 1), + BSONObj(), + fromjson("{x: 1}") // max. + ); + + runQueryAsCommand( + fromjson("{filter: {x: 5}, hint: {x: 'hashed', y: 1}, min: {x: NumberLong(1), y: 2}, " + "max: {x: NumberLong(2), y: 2}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x: {$eq: 5}}, node: {ixscan: {filter: null, pattern: {x: 'hashed', y: " + "1} }}}}"); + + + addIndex(BSON("x" << 1 << "y" + << "hashed")); + runInvalidQueryHintMinMax(BSONObj(), + BSON("x" << 1 << "y" + << "hashed"), + fromjson("{x: 1}"), // min. + BSONObj()); + runInvalidQueryHintMinMax(BSONObj(), + BSON("x" << 1 << "y" + << "hashed"), + BSONObj(), + fromjson("{x: 1}") // max. + ); + + runQueryAsCommand( + fromjson("{filter: {x: 5}, hint: {x: 1, y: 'hashed'}, min: {x: NumberLong(1), y: 2}, " + "max: {x: NumberLong(2), y: 2}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {x : {$eq: 5}}, node: {ixscan: {filter: null, pattern: {x: 1, y: " + "'hashed'} }}}}"); +} + +TEST_F(QueryPlannerHashedTest, ExprEqCanUseIndex) { + addIndex(BSON("x" + << "hashed" + << "y" << 1)); + runQuery(fromjson("{$expr: {$eq: ['$x', 1]}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {$and: [{x: {$_internalExprEq: 1}}, {$expr: {$eq: ['$x', {$const: " + "1}]}}]}, node: {ixscan: {pattern: {x: 'hashed', y: 1}, bounds: {x : [" + + getHashedBound(1) + "], y: [['MinKey','MaxKey',true,true]] } }}}}"); +} +} // namespace mongo diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 320f54978a4..20a2746ec1a 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -557,10 +557,17 @@ bool IndexScanNode::hasField(const string& field) const { return false; } - // Custom index access methods may return non-exact key data - this function is currently - // used for covering exact key data only. - if (IndexNames::BTREE != IndexNames::findPluginName(index.keyPattern)) { - return false; + // Compound hashed indexes can be covered when the projection is not on the hashed field. Other + // custom index access methods may return non-exact key data - this function is currently used + // for covering exact key data only. + auto indexPluginName = IndexNames::findPluginName(index.keyPattern); + switch (IndexNames::nameToType(indexPluginName)) { + case IndexType::INDEX_BTREE: + case IndexType::INDEX_HASHED: + break; + default: + // All other index types provide no fields. + return false; } // If the index has a non-simple collation and we have collation keys inside 'field', then this @@ -585,7 +592,9 @@ bool IndexScanNode::hasField(const string& field) const { // and that path has no multikey components. We can't cover a field that has multikey // components because the index keys contain individual array elements, and we can't // reconstitute the array from the index keys in the right order. - if (field == elt.fieldName() && + // In order for the field to be provided by the scan, it must be ascending (1) or + // descending (-1). It cannot be a special index type (e.g. "hashed"). + if (field == elt.fieldName() && elt.isNumber() && (!index.multikey || index.multikeyPaths[keyPatternFieldIndex].empty())) { return true; } -- cgit v1.2.1