From d7cdf424817fc8a1d7e6572ebbd0f7fb13fb0939 Mon Sep 17 00:00:00 2001 From: Max Hirschhorn Date: Tue, 18 Feb 2020 15:54:27 -0500 Subject: SERVER-46059 Run CheckOrphansDeleted hook in concurrency_sharded*.yml. --- .../concurrency_sharded_causal_consistency.yml | 1 + ...ncy_sharded_causal_consistency_and_balancer.yml | 1 + ...y_sharded_clusterwide_ops_add_remove_shards.yml | 1 + ...currency_sharded_kill_primary_with_balancer.yml | 1 + ...ncy_sharded_local_read_write_multi_stmt_txn.yml | 1 + ...cal_read_write_multi_stmt_txn_with_balancer.yml | 1 + .../suites/concurrency_sharded_multi_stmt_txn.yml | 1 + ...urrency_sharded_multi_stmt_txn_kill_primary.yml | 1 + ...cy_sharded_multi_stmt_txn_terminate_primary.yml | 1 + ...rrency_sharded_multi_stmt_txn_with_balancer.yml | 1 + ...rency_sharded_multi_stmt_txn_with_stepdowns.yml | 1 + .../suites/concurrency_sharded_replication.yml | 1 + ...ncurrency_sharded_replication_with_balancer.yml | 1 + ...ncy_sharded_terminate_primary_with_balancer.yml | 1 + .../suites/concurrency_sharded_with_stepdowns.yml | 1 + ...urrency_sharded_with_stepdowns_and_balancer.yml | 1 + buildscripts/resmokelib/testing/hooks/orphans.py | 35 +++++++++ jstests/hooks/run_check_orphans_are_deleted.js | 36 +++++++++ jstests/libs/check_orphans_are_deleted_helpers.js | 89 ++++++++++++++++++++++ .../override_methods/check_orphans_are_deleted.js | 63 +-------------- 20 files changed, 180 insertions(+), 59 deletions(-) create mode 100644 buildscripts/resmokelib/testing/hooks/orphans.py create mode 100644 jstests/hooks/run_check_orphans_are_deleted.js create mode 100644 jstests/libs/check_orphans_are_deleted_helpers.js diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml index aa638310b1f..30aca1fe7fb 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml @@ -128,6 +128,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml index 7d5b72c4dba..e9607623bb2 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml @@ -135,6 +135,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_clusterwide_ops_add_remove_shards.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_clusterwide_ops_add_remove_shards.yml index 31bffac4895..18a8c11bcc0 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_clusterwide_ops_add_remove_shards.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_clusterwide_ops_add_remove_shards.yml @@ -24,6 +24,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml index 5f4ba529ead..4d8c23d555d 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml @@ -216,6 +216,7 @@ executor: kill: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml index f2569bb136e..00f8b7c72a9 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml @@ -180,6 +180,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml index 009bce0e23b..b7b46989ae7 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml @@ -181,6 +181,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml index 8a67591e1fb..799fb689c33 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml @@ -172,6 +172,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml index a229e2f8f73..467d9301be2 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml @@ -258,6 +258,7 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml index d6019a37da3..e607314c232 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml @@ -258,6 +258,7 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml index 8ab3b284880..6f719225970 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml @@ -170,6 +170,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml index 890318c23f6..40468ac7d7c 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml @@ -236,6 +236,7 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml index 14c9052ad89..139378e716c 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml @@ -131,6 +131,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml index 03278dff1fb..2c09db46c15 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml @@ -134,6 +134,7 @@ executor: - class: CheckReplDBHashInBackground - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml index 0f986352f23..7f725da7915 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml @@ -216,6 +216,7 @@ executor: terminate: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml index e09d0b79d61..a95dbe5a88d 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml @@ -206,6 +206,7 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml index 2c723371ed6..93ce8151755 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml @@ -212,6 +212,7 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections + - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: class: ShardedClusterFixture diff --git a/buildscripts/resmokelib/testing/hooks/orphans.py b/buildscripts/resmokelib/testing/hooks/orphans.py new file mode 100644 index 00000000000..873c1e59239 --- /dev/null +++ b/buildscripts/resmokelib/testing/hooks/orphans.py @@ -0,0 +1,35 @@ +"""Test hook for verifying orphan documents are eventually deleted in a sharded cluster.""" + +import os.path + +from . import jsfile +from ..fixtures import shardedcluster + + +class CheckOrphansDeleted(jsfile.DataConsistencyHook): + """Check if the range deleter failed to delete any orphan documents.""" + + def __init__(self, hook_logger, fixture, shell_options=None): + """Initialize CheckOrphansDeleted.""" + + if not isinstance(fixture, shardedcluster.ShardedClusterFixture): + raise ValueError(f"'fixture' must be an instance of ShardedClusterFixture, but got" + f" {fixture.__class__.__name__}") + + description = "Check orphan documents are eventually deleted" + js_filename = os.path.join("jstests", "hooks", "run_check_orphans_are_deleted.js") + super().__init__(hook_logger, fixture, js_filename, description, + shell_options=shell_options) + + def after_test(self, test, test_report): + """Run the run_check_orphans_are_deleted.js hook.""" + + # We temporarily disable the balancer so more work isn't generated for the range deleter + # while the hook is running. + if self.fixture.enable_balancer: + self.fixture.stop_balancer() + + super().after_test(test, test_report) + + if self.fixture.enable_balancer: + self.fixture.start_balancer() diff --git a/jstests/hooks/run_check_orphans_are_deleted.js b/jstests/hooks/run_check_orphans_are_deleted.js new file mode 100644 index 00000000000..b3745ffe646 --- /dev/null +++ b/jstests/hooks/run_check_orphans_are_deleted.js @@ -0,0 +1,36 @@ +'use strict'; + +/** + * Asserts that no shard in the cluster contains any orphan documents. + * + * Note: This hook won't find documents which don't have the full shard key. + */ +(function() { +load('jstests/libs/check_orphans_are_deleted_helpers.js'); // For CheckOrphansAreDeletedHelpers. +load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology. + +assert.neq(typeof db, 'undefined', 'No `db` object, is the shell connected to a server?'); + +const conn = db.getMongo(); +const topology = DiscoverTopology.findConnectedNodes(conn); + +if (topology.type !== Topology.kShardedCluster) { + throw new Error('Orphan documents check must be run against a sharded cluster, but got: ' + + tojson(topology)); +} + +for (let shardName of Object.keys(topology.shards)) { + const shard = topology.shards[shardName]; + let shardPrimary; + + if (shard.type === Topology.kStandalone) { + shardPrimary = shard.mongod; + } else if (shard.type === Topology.kReplicaSet) { + shardPrimary = shard.primary; + } else { + throw new Error('Unrecognized topology format: ' + tojson(topology)); + } + + CheckOrphansAreDeletedHelpers.runCheck(db.getMongo(), new Mongo(shardPrimary), shardName); +} +})(); diff --git a/jstests/libs/check_orphans_are_deleted_helpers.js b/jstests/libs/check_orphans_are_deleted_helpers.js new file mode 100644 index 00000000000..12b8b918fa6 --- /dev/null +++ b/jstests/libs/check_orphans_are_deleted_helpers.js @@ -0,0 +1,89 @@ +'use strict'; + +var CheckOrphansAreDeletedHelpers = (function() { + function runCheck(mongosConn, shardConn, shardId) { + const configDB = shardConn.getDB('config'); + + let migrationCoordinatorDocs = []; + assert.soon( + () => { + try { + migrationCoordinatorDocs = configDB.migrationCoordinators.find().toArray(); + return migrationCoordinatorDocs.length === 0; + } catch (exp) { + // Primary purpose is to stabilize shell repl set monitor to recognize the + // current primary. + print('caught exception while checking migration coordinators, ' + + 'will retry again unless timed out: ' + tojson(exp)); + } + }, + () => { + return 'timed out waiting for migrationCoordinators to be empty @ ' + shardId + + ', last known contents: ' + tojson(migrationCoordinatorDocs); + }, + 5 * 60 * 1000, + 1000); + + mongosConn.getDB('config').collections.find({dropped: false}).forEach(collDoc => { + const ns = collDoc._id; + const tempNsArray = ns.split('.'); + const dbName = tempNsArray.shift(); + const collName = tempNsArray.join('.'); + + // It is possible for a test to drop the shard key index. We skip running the check for + // orphan documents being deleted from that collection if it doesn't have a shard key + // index. + const hintRes = shardConn.getDB(dbName).runCommand({ + find: collName, + hint: collDoc.key, + limit: 1, + singleBatch: true, + }); + + if (hintRes.ok !== 1) { + assert( + /hint provided does not correspond to an existing index/.test(hintRes.errmsg), + () => { + return 'expected query failure due to bad hint: ' + tojson(hintRes); + }); + print('Failed to find shard key index on ' + ns + + ' so skipping check for orphan documents being deleted'); + return; + } + + print('Checking that orphan documents on shard ' + shardId + + ' have been deleted from namespace ' + ns); + + let rangeDeletions = []; + assert.soon( + () => { + rangeDeletions = configDB.rangeDeletions.find({nss: ns}).toArray(); + return rangeDeletions.length === 0; + }, + () => { + return 'timed out waiting for rangeDeletions on ' + ns + ' to be empty @ ' + + shardId + ', last known contents: ' + tojson(rangeDeletions); + }); + + const coll = shardConn.getDB(dbName)[collName]; + mongosConn.getDB('config') + .chunks.find({ns: ns, shard: {$ne: shardId}}) + .forEach(chunkDoc => { + // Use $min/$max so this will also work with hashed and compound shard keys. + const orphans = coll.find({}) + .hint(collDoc.key) + .min(chunkDoc.min) + .max(chunkDoc.max) + .toArray(); + assert.eq(0, + orphans.length, + 'found orphans @ ' + shardId + ' within chunk: ' + tojson(chunkDoc) + + ', orphans: ' + tojson(orphans)); + }); + }); + } + + return { + runCheck: runCheck, + }; +})(); diff --git a/jstests/libs/override_methods/check_orphans_are_deleted.js b/jstests/libs/override_methods/check_orphans_are_deleted.js index f2d329df80b..0f27e86dc1b 100644 --- a/jstests/libs/override_methods/check_orphans_are_deleted.js +++ b/jstests/libs/override_methods/check_orphans_are_deleted.js @@ -1,3 +1,5 @@ +load('jstests/libs/check_orphans_are_deleted_helpers.js'); // For CheckOrphansAreDeletedHelpers. + /** * Asserts that all shards in the sharded cluster doesn't own any orphan documents. * Requires all shards and config server to have primary that is reachable. @@ -13,63 +15,6 @@ ShardingTest.prototype.checkOrphansAreDeleted = function() { print('Running check orphans against cluster with mongos: ' + this.s.host); - let runCheck = function(mongosConn, shardConn, shardId) { - let configDB = shardConn.getDB('config'); - - let migrationCoordinatorDocs = []; - assert.soon( - () => { - try { - migrationCoordinatorDocs = configDB.migrationCoordinators.find().toArray(); - return migrationCoordinatorDocs.length == 0; - } catch (exp) { - // Primary purpose is to stabilize shell repl set monitor to recognize the - // current primary. - print('caught exception while checking migration coordinators, ' + - 'will retry again unless timed out: ' + tojson(exp)); - } - }, - () => { - return 'timed out waiting for migrationCoordinators to be empty @ ' + shardId + - ', last known contents: ' + tojson(migrationCoordinatorDocs); - }, - 5 * 60 * 1000, - 1000); - - let rangeDeletions = []; - assert.soon( - () => { - rangeDeletions = configDB.rangeDeletions.find().toArray(); - return rangeDeletions.length == 0; - }, - () => { - return 'timed out waiting for rangeDeletions to be empty @ ' + shardId + - ', last known contents: ' + tojson(rangeDeletions); - }); - - mongosConn.getDB('config').collections.find({dropped: false}).forEach(collDoc => { - let tempNsArray = collDoc._id.split('.'); - let dbName = tempNsArray.shift(); - let collName = tempNsArray.join('.'); - - let coll = shardConn.getDB(dbName)[collName]; - mongosConn.getDB('config') - .chunks.find({ns: collDoc._id, shard: {$ne: shardId}}) - .forEach(chunkDoc => { - // Use $min/$max so this will also work with hashed and compound shard keys. - let orphans = coll.find({}) - .hint(collDoc.key) - .min(chunkDoc.min) - .max(chunkDoc.max) - .toArray(); - assert.eq(0, - orphans.length, - 'found orphans @ ' + shardId + ' within chunk: ' + tojson(chunkDoc) + - ', orphans: ' + tojson(orphans)); - }); - }); - }; - let getConn = function(connStr) { try { return new Mongo(connStr); @@ -98,7 +43,7 @@ ShardingTest.prototype.checkOrphansAreDeleted = function() { if (shardConn != null) { authutil.asCluster(shardConn, keyFile, () => { - runCheck(mongosConn, shardConn, shardDoc._id); + CheckOrphansAreDeletedHelpers.runCheck(mongosConn, shardConn, shardDoc._id); }); } }); @@ -112,7 +57,7 @@ ShardingTest.prototype.checkOrphansAreDeleted = function() { if (shardConn != null) { shardConn.host = shardDoc.host; - runCheck(mongosConn, shardConn, shardDoc._id); + CheckOrphansAreDeletedHelpers.runCheck(mongosConn, shardConn, shardDoc._id); } }); } -- cgit v1.2.1