diff options
author | Josef Ahmad <josef.ahmad@mongodb.com> | 2021-11-18 08:01:04 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-18 08:26:32 +0000 |
commit | 1eb92bc5fb8b853200bd0d9b515136db81d301b5 (patch) | |
tree | c028c1136f0570b06ae43e2523e9f253b1180dfc /jstests/libs/clustered_collections | |
parent | bd6b08daed4a016f5758970f1ecb760087ba62d9 (diff) | |
download | mongo-1eb92bc5fb8b853200bd0d9b515136db81d301b5.tar.gz |
SERVER-60702 Enable bounded collscans with arbitrary cluster keys
Diffstat (limited to 'jstests/libs/clustered_collections')
3 files changed, 602 insertions, 0 deletions
diff --git a/jstests/libs/clustered_collections/clustered_collection_bounded_scan_common.js b/jstests/libs/clustered_collections/clustered_collection_bounded_scan_common.js new file mode 100644 index 00000000000..6a47a8d2f3c --- /dev/null +++ b/jstests/libs/clustered_collections/clustered_collection_bounded_scan_common.js @@ -0,0 +1,133 @@ +/** + * Validate bounded collection scans on a clustered collection. + */ + +const testClusteredCollectionBoundedScan = function(coll, clusterKey) { + "use strict"; + load("jstests/libs/analyze_plan.js"); + load("jstests/libs/collection_drop_recreate.js"); + + const batchSize = 100; + const clusterKeyFieldName = Object.keys(clusterKey)[0]; + + function initAndPopulate(coll, clusterKey) { + const clusterKeyFieldName = Object.keys(clusterKey)[0]; + assertDropCollection(coll.getDB(), coll.getName()); + assert.commandWorked(coll.getDB().createCollection( + coll.getName(), {clusteredIndex: {key: clusterKey, unique: true}})); + + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < batchSize; i++) { + bulk.insert({[clusterKeyFieldName]: i, a: -i}); + } + assert.commandWorked(bulk.execute()); + assert.eq(coll.find().itcount(), batchSize); + } + function testEq() { + initAndPopulate(coll, clusterKey); + + const expl = assert.commandWorked(coll.getDB().runCommand({ + explain: {find: coll.getName(), filter: {[clusterKeyFieldName]: 5}}, + verbosity: "executionStats" + })); + + assert(getPlanStage(expl, "COLLSCAN")); + assert.eq(5, getPlanStage(expl, "COLLSCAN").minRecord); + assert.eq(5, getPlanStage(expl, "COLLSCAN").maxRecord); + + assert.eq(1, expl.executionStats.executionStages.nReturned); + // Expect nReturned + 1 documents examined by design - additional cursor 'next' beyond + // the range. + assert.eq(2, expl.executionStats.executionStages.docsExamined); + } + function testLT(op, val, expectedNReturned, expectedDocsExamined) { + initAndPopulate(coll, clusterKey); + + const expl = assert.commandWorked(coll.getDB().runCommand({ + explain: {find: coll.getName(), filter: {[clusterKeyFieldName]: {[op]: val}}}, + verbosity: "executionStats" + })); + + assert(getPlanStage(expl, "COLLSCAN")); + assert(getPlanStage(expl, "COLLSCAN").hasOwnProperty("maxRecord")); + assert(!getPlanStage(expl, "COLLSCAN").hasOwnProperty("minRecord")); + assert.eq(10, getPlanStage(expl, "COLLSCAN").maxRecord); + + assert.eq(expectedNReturned, expl.executionStats.executionStages.nReturned); + assert.eq(expectedDocsExamined, expl.executionStats.executionStages.docsExamined); + } + function testGT(op, val, expectedNReturned, expectedDocsExamined) { + initAndPopulate(coll, clusterKey); + + const expl = assert.commandWorked(coll.getDB().runCommand({ + explain: {find: coll.getName(), filter: {[clusterKeyFieldName]: {[op]: val}}}, + verbosity: "executionStats" + })); + + assert(getPlanStage(expl, "COLLSCAN")); + assert(!getPlanStage(expl, "COLLSCAN").hasOwnProperty("maxRecord")); + assert(getPlanStage(expl, "COLLSCAN").hasOwnProperty("minRecord")); + assert.eq(89, getPlanStage(expl, "COLLSCAN").minRecord); + + assert.eq(expectedNReturned, expl.executionStats.executionStages.nReturned); + assert.eq(expectedDocsExamined, expl.executionStats.executionStages.docsExamined); + } + function testRange(min, minVal, max, maxVal, expectedNReturned, expectedDocsExamined) { + initAndPopulate(coll, clusterKey); + + const expl = assert.commandWorked(coll.getDB().runCommand({ + explain: { + find: coll.getName(), + filter: {[clusterKeyFieldName]: {[min]: minVal, [max]: maxVal}} + }, + verbosity: "executionStats" + })); + + assert(getPlanStage(expl, "COLLSCAN")); + assert(getPlanStage(expl, "COLLSCAN").hasOwnProperty("maxRecord")); + assert(getPlanStage(expl, "COLLSCAN").hasOwnProperty("minRecord")); + assert.eq(minVal, getPlanStage(expl, "COLLSCAN").minRecord); + assert.eq(maxVal, getPlanStage(expl, "COLLSCAN").maxRecord); + + assert.eq(expectedNReturned, expl.executionStats.executionStages.nReturned); + assert.eq(expectedDocsExamined, expl.executionStats.executionStages.docsExamined); + } + function testNonClusterKeyScan() { + initAndPopulate(coll, clusterKey); + + const expl = assert.commandWorked(coll.getDB().runCommand({ + explain: {find: coll.getName(), filter: {a: {$gt: -10}}}, + verbosity: "executionStats" + })); + + assert(getPlanStage(expl, "COLLSCAN")); + assert(!getPlanStage(expl, "COLLSCAN").hasOwnProperty("maxRecord")); + assert(!getPlanStage(expl, "COLLSCAN").hasOwnProperty("minRecord")); + assert.eq(10, expl.executionStats.executionStages.nReturned); + } + + function testBoundedScans(coll, clusterKey) { + testEq(); + // Expect docsExamined == nReturned + 2 due to the collection scan bounds being always + // inclusive and due to the by-design additional cursor 'next' beyond the range. + testLT("$lt", 10, 10, 12); + // Expect docsExamined == nReturned + 1 due to the by-design additional cursor 'next' beyond + // the range. + testLT("$lte", 10, 11, 12); + // Expect docsExamined == nReturned + 1 due to the collection scan bounds being always + // inclusive. Note that unlike the 'testLT' cases, there's no additional cursor 'next' + // beyond the range because we hit EOF. + testGT("$gt", 89, 10, 11); + // Expect docsExamined == nReturned. + testGT("$gte", 89, 11, 11); + // docsExamined reflects the fact that collection scan bounds are always exclusive and + // that by design we do an additional cursor 'next' beyond the range. + testRange("$gt", 20, "$lt", 40, 19, 22); + testRange("$gte", 20, "$lt", 40, 20, 22); + testRange("$gt", 20, "$lte", 40, 20, 22); + testRange("$gte", 20, "$lte", 40, 21, 22); + testNonClusterKeyScan(); + } + + return testBoundedScans(coll, clusterKey); +}; diff --git a/jstests/libs/clustered_collections/clustered_collection_hint_common.js b/jstests/libs/clustered_collections/clustered_collection_hint_common.js new file mode 100644 index 00000000000..a8927d75028 --- /dev/null +++ b/jstests/libs/clustered_collections/clustered_collection_hint_common.js @@ -0,0 +1,281 @@ +/** + * Validate $hint on a clustered collection. + */ + +const testClusteredCollectionHint = function(coll, clusterKey, clusterKeyName) { + "use strict"; + load("jstests/libs/analyze_plan.js"); + load("jstests/libs/collection_drop_recreate.js"); + + const clusterKeyFieldName = Object.keys(clusterKey)[0]; + const batchSize = 100; + + function validateHint(coll, {expectedNReturned, cmd, expectedWinningPlanStats = {}}) { + const explain = assert.commandWorked(coll.runCommand({explain: cmd})); + assert.eq(explain.executionStats.nReturned, expectedNReturned, tojson(explain)); + + const actualWinningPlan = getWinningPlan(explain.queryPlanner); + const stageOfInterest = getPlanStage(actualWinningPlan, expectedWinningPlanStats.stage); + assert.neq(null, stageOfInterest); + + for (const [key, value] of Object.entries(expectedWinningPlanStats)) { + assert(stageOfInterest[key], tojson(explain)); + assert.eq(stageOfInterest[key], value, tojson(explain)); + } + + // Explicitly check that the plan is not bounded by default. + if (!expectedWinningPlanStats.hasOwnProperty("minRecord")) { + assert(!actualWinningPlan["minRecord"], tojson(explain)); + } + if (!expectedWinningPlanStats.hasOwnProperty("maxRecord")) { + assert(!actualWinningPlan["maxRecord"], tojson(explain)); + } + } + + function testHint(coll, clusterKey, clusterKeyName) { + // Create clustered collection. + assertDropCollection(coll.getDB(), coll.getName()); + assert.commandWorked(coll.getDB().createCollection( + coll.getName(), {clusteredIndex: {key: {[clusterKeyFieldName]: 1}, unique: true}})); + + // Create an index that the query planner would consider preferable to using the cluster key + // for point predicates on 'a'. + const idxA = {a: -1}; + assert.commandWorked(coll.createIndex(idxA)); + + // Populate collection. + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < batchSize; i++) { + bulk.insert({[clusterKeyFieldName]: i, a: -i}); + } + assert.commandWorked(bulk.execute()); + assert.eq(coll.find().itcount(), batchSize); + + const collName = coll.getName(); + + // Basic find with hints on cluster key. + validateHint(coll, { + expectedNReturned: batchSize, + cmd: { + find: collName, + hint: clusterKey, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + validateHint(coll, { + expectedNReturned: batchSize, + cmd: { + find: collName, + hint: clusterKeyName, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + validateHint(coll, { + expectedNReturned: 1, + cmd: { + find: collName, + filter: {a: -2}, + hint: clusterKey, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + validateHint(coll, { + expectedNReturned: 1, + cmd: { + find: collName, + filter: {a: -2}, + hint: clusterKeyName, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + + // Find with hints on cluster key that generate bounded collection scans. + const arbitraryDocId = 12; + validateHint(coll, { + expectedNReturned: 1, + cmd: { + find: collName, + filter: {[clusterKeyFieldName]: arbitraryDocId}, + hint: clusterKey, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + minRecord: arbitraryDocId, + maxRecord: arbitraryDocId + } + }); + validateHint(coll, { + expectedNReturned: 1, + cmd: { + find: collName, + filter: {[clusterKeyFieldName]: arbitraryDocId}, + hint: clusterKeyName, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + minRecord: arbitraryDocId, + maxRecord: arbitraryDocId + } + }); + validateHint(coll, { + expectedNReturned: arbitraryDocId, + cmd: { + find: collName, + filter: {[clusterKeyFieldName]: {$lt: arbitraryDocId}}, + hint: clusterKey, + }, + expectedWinningPlanStats: + {stage: "COLLSCAN", direction: "forward", maxRecord: arbitraryDocId} + }); + validateHint(coll, { + expectedNReturned: batchSize - arbitraryDocId, + cmd: { + find: collName, + filter: {[clusterKeyFieldName]: {$gte: arbitraryDocId}}, + hint: clusterKey, + }, + expectedWinningPlanStats: + {stage: "COLLSCAN", direction: "forward", minRecord: arbitraryDocId} + }); + + // Find with $natural hints. + validateHint(coll, { + expectedNReturned: batchSize, + cmd: { + find: collName, + hint: {$natural: -1}, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "backward", + } + }); + validateHint(coll, { + expectedNReturned: batchSize, + cmd: { + find: collName, + hint: {$natural: 1}, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + validateHint(coll, { + expectedNReturned: 1, + cmd: { + find: collName, + filter: {a: -2}, + hint: {$natural: -1}, + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "backward", + } + }); + + // Find on a standard index. + validateHint(coll, { + expectedNReturned: batchSize, + cmd: {find: collName, hint: idxA}, + expectedWinningPlanStats: { + stage: "IXSCAN", + keyPattern: idxA, + } + }); + + // Update with hint on cluster key. + validateHint(coll, { + expectedNReturned: 0, + cmd: { + update: collName, + updates: [{q: {[clusterKeyFieldName]: 3}, u: {$inc: {a: -2}}, hint: clusterKey}] + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + + // Update with reverse $natural hint. + validateHint(coll, { + expectedNReturned: 0, + cmd: { + update: collName, + updates: + [{q: {[clusterKeyFieldName]: 80}, u: {$inc: {a: 80}}, hint: {$natural: -1}}] + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "backward", + } + }); + + // Update with hint on secondary index. + validateHint(coll, { + expectedNReturned: 0, + cmd: {update: collName, updates: [{q: {a: -2}, u: {$set: {a: 2}}, hint: idxA}]}, + expectedWinningPlanStats: { + stage: "IXSCAN", + keyPattern: idxA, + } + }); + + // Delete with hint on cluster key. + validateHint(coll, { + expectedNReturned: 0, + cmd: { + delete: collName, + deletes: [{q: {[clusterKeyFieldName]: 2}, limit: 0, hint: clusterKey}] + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "forward", + } + }); + + // Delete reverse $natural hint. + validateHint(coll, { + expectedNReturned: 0, + cmd: { + delete: collName, + deletes: [{q: {[clusterKeyFieldName]: 30}, limit: 0, hint: {$natural: -1}}] + }, + expectedWinningPlanStats: { + stage: "COLLSCAN", + direction: "backward", + } + }); + + // Delete with hint on standard index. + validateHint(coll, { + expectedNReturned: 0, + cmd: {delete: collName, deletes: [{q: {a: -5}, limit: 0, hint: idxA}]}, + expectedWinningPlanStats: { + stage: "IXSCAN", + keyPattern: idxA, + } + }); + + // Reverse 'hint' on the cluster key is illegal. + assert.commandFailedWithCode( + coll.getDB().runCommand({find: coll.getName(), hint: {[clusterKeyFieldName]: -1}}), + ErrorCodes.BadValue); + } + + return testHint(coll, clusterKey, clusterKeyName); +}; diff --git a/jstests/libs/clustered_collections/clustered_collection_util.js b/jstests/libs/clustered_collections/clustered_collection_util.js new file mode 100644 index 00000000000..f00f89e7f47 --- /dev/null +++ b/jstests/libs/clustered_collections/clustered_collection_util.js @@ -0,0 +1,188 @@ +/** + * Utilities for testing clustered collections. + */ + +load("jstests/libs/analyze_plan.js"); +load("jstests/libs/collection_drop_recreate.js"); + +var ClusteredCollectionUtil = class { + static areClusteredIndexesEnabled(conn) { + const clusteredIndexesEnabled = + assert + .commandWorked(conn.adminCommand({getParameter: 1, featureFlagClusteredIndexes: 1})) + .featureFlagClusteredIndexes.value; + + if (!clusteredIndexesEnabled) { + return false; + } + return true; + } + + // Returns a copy of the 'createOptions' used to create the clustered collection with default + // values for fields absent in the user provided 'createOptions'. + static constructFullCreateOptions(createOptions) { + const fullCreateOptions = Object.extend({}, createOptions, /* deep copy */ true); + + // If the createOptions don't specify the name, expect the default. + if (!createOptions.clusteredIndex.name) { + const clusterKey = Object.keys(createOptions.clusteredIndex.key)[0]; + if (clusterKey == "_id") { + fullCreateOptions.clusteredIndex.name = "_id_"; + } else { + fullCreateOptions.clusteredIndex.name = clusterKey + "_1"; + } + } + + // If the createOptions don't specify 'v', expect the default. + if (!createOptions.clusteredIndex.v) { + fullCreateOptions.clusteredIndex.v = 2; + } + + return fullCreateOptions; + } + + // Provided the createOptions used to create the collection, validates the output from + // listCollections contains the correct information about the clusteredIndex. + static validateListCollections(db, collName, createOptions) { + const fullCreateOptions = ClusteredCollectionUtil.constructFullCreateOptions(createOptions); + const listColls = + assert.commandWorked(db.runCommand({listCollections: 1, filter: {name: collName}})); + const listCollsOptions = listColls.cursor.firstBatch[0].options; + assert(listCollsOptions.clusteredIndex); + assert.docEq(listCollsOptions.clusteredIndex, fullCreateOptions.clusteredIndex); + } + + // The clusteredIndex should appear in listIndexes with additional "clustered" field. + static validateListIndexes(db, collName, createOptions) { + const fullCreateOptions = ClusteredCollectionUtil.constructFullCreateOptions(createOptions); + const listIndexes = assert.commandWorked(db[collName].runCommand("listIndexes")); + const expectedListIndexesOutput = + Object.extend({clustered: true}, fullCreateOptions.clusteredIndex); + assert.docEq(listIndexes.cursor.firstBatch[0], expectedListIndexesOutput); + } + + static testBasicClusteredCollection(db, collName, clusterKey) { + const lengths = [100, 1024, 1024 * 1024, 3 * 1024 * 1024]; + const coll = db[collName]; + const clusterKeyString = new String(clusterKey); + + assert.commandWorked(db.createCollection( + collName, {clusteredIndex: {key: {[clusterKey]: 1}, unique: true}})); + + // Expect that duplicates are rejected. + for (let len of lengths) { + let id = 'x'.repeat(len); + assert.commandWorked(coll.insert({[clusterKey]: id})); + assert.commandFailedWithCode(coll.insert({[clusterKey]: id}), ErrorCodes.DuplicateKey); + assert.eq(1, coll.find({[clusterKey]: id}).itcount()); + } + + // Updates should work. + for (let len of lengths) { + let id = 'x'.repeat(len); + + // Validate the below for _id-clustered collection only until the following ticket is + // addressed: + // * TODO SERVER-60734 replacement updates should preserve the cluster key + if (clusterKey == "_id") { + assert.commandWorked(coll.update({[clusterKey]: id}, {a: len})); + + assert.eq(1, coll.find({[clusterKey]: id}).itcount()); + assert.eq(len, coll.findOne({[clusterKey]: id})['a']); + } + } + + // This section is based on jstests/core/timeseries/clustered_index_crud.js with + // specific additions for general-purpose (non-timeseries) clustered collections + assert.commandWorked(coll.insert({[clusterKey]: 0, a: 1})); + assert.commandWorked(coll.insert({[clusterKey]: 1, a: 1})); + assert.eq(1, coll.find({[clusterKey]: 0}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: "", a: 2})); + assert.eq(1, coll.find({[clusterKey]: ""}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: NumberLong("9223372036854775807"), a: 3})); + assert.eq(1, coll.find({[clusterKey]: NumberLong("9223372036854775807")}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: {a: 1, b: 1}, a: 4})); + assert.eq(1, coll.find({[clusterKey]: {a: 1, b: 1}}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: {a: {b: 1}, c: 1}, a: 5})); + assert.commandWorked(coll.insert({[clusterKey]: -1, a: 6})); + assert.eq(1, coll.find({[clusterKey]: -1}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: "123456789012", a: 7})); + assert.eq(1, coll.find({[clusterKey]: "123456789012"}).itcount()); + if (clusterKey == "_id") { + assert.commandWorked(coll.insert({a: 8})); + } else { + // Missing required cluster key field. + assert.commandFailedWithCode(coll.insert({a: 8}), 2); + assert.commandWorked(coll.insert({[clusterKey]: "withFieldA", a: 8})); + } + assert.eq(1, coll.find({a: 8}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: null, a: 9})); + assert.eq(1, coll.find({[clusterKey]: null}).itcount()); + assert.commandWorked(coll.insert({[clusterKey]: 'x'.repeat(99), a: 10})); + + if (clusterKey == "_id") { + assert.commandWorked(coll.insert({})); + } else { + // Missing required ts field. + assert.commandFailedWithCode(coll.insert({}), 2); + assert.commandWorked(coll.insert({[clusterKey]: 'missingFieldA'})); + } + // Can build a secondary index with a 3MB RecordId doc. + assert.commandWorked(coll.createIndex({a: 1})); + // Can drop the secondary index + assert.commandWorked(coll.dropIndex({a: 1})); + + // This key is too large. + assert.commandFailedWithCode( + coll.insert({[clusterKey]: 'x'.repeat(8 * 1024 * 1024), a: 11}), 5894900); + + // Look up using the secondary index on {a: 1} + assert.commandWorked(coll.createIndex({a: 1})); + + // TODO remove the branch once SERVER-60734 "replacement updates should preserve the cluster + // key" is resolved. + if (clusterKey == "_id") { + assert.eq(1, coll.find({a: null}).itcount()); + } else { + assert.eq(5, coll.find({a: null}).itcount()); + } + assert.eq(0, coll.find({a: 0}).itcount()); + assert.eq(2, coll.find({a: 1}).itcount()); + assert.eq(1, coll.find({a: 2}).itcount()); + assert.eq(1, coll.find({a: 8}).itcount()); + assert.eq(1, coll.find({a: 9}).itcount()); + assert.eq(null, coll.findOne({a: 9})[clusterKeyString]); + assert.eq(1, coll.find({a: 10}).itcount()); + assert.eq(99, coll.findOne({a: 10})[clusterKeyString].length); + + // TODO make it unconditional once SERVER-60734 "replacement updates should preserve the + // cluster key" is resolved. + if (clusterKey == "_id") { + for (let len of lengths) { + // Secondary index lookups for documents with large RecordId's. + assert.eq(1, coll.find({a: len}).itcount()); + assert.eq(len, coll.findOne({a: len})[clusterKeyString].length); + } + } + + // No support for numeric type differentiation. + assert.commandWorked(coll.insert({[clusterKey]: 42.0})); + assert.commandFailedWithCode(coll.insert({[clusterKey]: 42}), ErrorCodes.DuplicateKey); + assert.commandFailedWithCode(coll.insert({[clusterKey]: NumberLong("42")}), + ErrorCodes.DuplicateKey); + assert.eq(1, coll.find({[clusterKey]: 42.0}).itcount()); + assert.eq(1, coll.find({[clusterKey]: 42}).itcount()); + assert.eq(1, coll.find({[clusterKey]: NumberLong("42")}).itcount()); + coll.drop(); + } + + static waitForTTL(db) { + // The 'ttl.passes' metric is incremented when the TTL monitor starts processing the + // indexes, so we wait for it to be incremented twice to know that the TTL monitor finished + // processing the indexes at least once. + const ttlPasses = db.serverStatus().metrics.ttl.passes; + assert.soon(function() { + return db.serverStatus().metrics.ttl.passes > ttlPasses + 1; + }); + } +}; |