diff options
author | Jason Zhang <jason.zhang@mongodb.com> | 2021-03-17 19:47:10 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-10 14:04:56 +0000 |
commit | 5b9f84421440daef351224597fd8ecc594c2f70e (patch) | |
tree | e6e547e39ac7d36bcc4ee2c60acdd8fca7b686a5 | |
parent | dfde12485c163e88c91a7e67e768a51804033e56 (diff) | |
download | mongo-5b9f84421440daef351224597fd8ecc594c2f70e.tar.gz |
SERVER-53251 Add dbhash check to tenant migration passthrough
(cherry picked from commit 5a69247372a7f1374664996c5360da277545a5a8)
9 files changed, 182 insertions, 75 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py b/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py new file mode 100644 index 00000000000..9c4f24b1a34 --- /dev/null +++ b/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py @@ -0,0 +1,21 @@ +"""Test hook for verifying data consistency between the donor and recipient primaries in a tenant migration.""" + +import os.path + +from buildscripts.resmokelib.testing.hooks import jsfile + + +class CheckTenantMigrationDBHash(jsfile.DataConsistencyHook): + """Check if the dbhashes match. + + This includes dbhashes for all non-local databases and non-replicated system collections that + match on the primaries of the donor and recipient + """ + + def __init__( # pylint: disable=super-init-not-called + self, hook_logger, fixture, shell_options=None): + """Initialize CheckTenantMigrationDBHash.""" + description = "Check dbhashes of donor and recipient primaries" + js_filename = os.path.join("jstests", "hooks", "run_check_tenant_migration_dbhash.js") + jsfile.JSHook.__init__( # pylint: disable=non-parent-init-called + self, hook_logger, fixture, js_filename, description, shell_options=shell_options) diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py index a21a75a0bba..d900779df8c 100644 --- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py +++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py @@ -13,6 +13,7 @@ from buildscripts.resmokelib import errors from buildscripts.resmokelib import utils from buildscripts.resmokelib.testing.fixtures import interface as fixture_interface from buildscripts.resmokelib.testing.fixtures import tenant_migration +from buildscripts.resmokelib.testing.hooks import dbhash_tenant_migration from buildscripts.resmokelib.testing.hooks import interface @@ -36,7 +37,7 @@ class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-ins if not isinstance(fixture, tenant_migration.TenantMigrationFixture): raise ValueError("The ContinuousTenantMigration hook requires a TenantMigrationFixture") self._tenant_migration_fixture = fixture - self._tenant_id = shell_options["global_vars"]["TestData"]["tenantId"] + self._shell_options = shell_options self._tenant_migration_thread = None @@ -46,7 +47,7 @@ class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-ins raise ValueError("No TenantMigrationFixture to run migrations on") self.logger.info("Starting the tenant migration thread.") self._tenant_migration_thread = _TenantMigrationThread( - self.logger, self._tenant_migration_fixture, self._tenant_id) + self.logger, self._tenant_migration_fixture, self._shell_options, test_report) self._tenant_migration_thread.start() def after_suite(self, test_report): @@ -213,13 +214,15 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst FAIL_TO_PARSE_ERR_CODE = 9 NO_SUCH_KEY_ERR_CODE = 4 - def __init__(self, logger, tenant_migration_fixture, tenant_id): + def __init__(self, logger, tenant_migration_fixture, shell_options, test_report): """Initialize _TenantMigrationThread.""" - threading.Thread.__init__(self, name="TenantMigrationThread") + threading.Thread.__init__(self, name=self.THREAD_NAME) self.daemon = True self.logger = logger self._tenant_migration_fixture = tenant_migration_fixture - self._tenant_id = tenant_id + self._tenant_id = shell_options["global_vars"]["TestData"]["tenantId"] + self._test_report = test_report + self._shell_options = shell_options self.__lifecycle = TenantMigrationLifeCycle() # Event set when the thread has been stopped using the 'stop()' method. @@ -314,6 +317,10 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst """Wait until stop or timeout.""" self._is_stopped_evt.wait(timeout) + def short_name(self): + """Return the name of the thread.""" + return self.THREAD_NAME + def _check_thread(self): """Throw an error if the thread is not running.""" if not self.is_alive(): @@ -361,6 +368,24 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst read_preference = {"mode": "primary"} if random.randint(0, 1) else {"mode": "secondary"} return _TenantMigrationOptions(donor_rs, recipient_rs, self._tenant_id, read_preference) + def _check_tenant_migration_dbhash(self, migration_opts): + # Set the donor connection string, recipient connection string, and migration uuid string for the tenant migration dbhash check script. + self._shell_options[ + "global_vars"]["TestData"]["donorConnectionString"] = migration_opts.get_donor_primary( + ).get_internal_connection_string() + self._shell_options["global_vars"]["TestData"][ + "recipientConnectionString"] = migration_opts.get_recipient_primary( + ).get_internal_connection_string() + self._shell_options["global_vars"]["TestData"][ + "migrationIdString"] = migration_opts.migration_id.__str__() + + dbhash_test_case = dbhash_tenant_migration.CheckTenantMigrationDBHash( + self.logger, self._tenant_migration_fixture, self._shell_options) + dbhash_test_case.before_suite(self._test_report) + dbhash_test_case.before_test(self, self._test_report) + dbhash_test_case.after_test(self, self._test_report) + dbhash_test_case.after_suite(self._test_report) + def _run_migration(self, migration_opts): # noqa: D205,D400 """Run a tenant migration based on 'migration_opts', wait for the migration decision and garbage collection. Return true if the migration commits and false otherwise. @@ -374,6 +399,9 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst # Garbage collect the migration prior to throwing error to avoid migration conflict # in the next test. if is_committed: + # Once we have committed a migration, run a dbhash check before rerouting commands. + self._check_tenant_migration_dbhash(migration_opts) + # If the migration committed, to avoid routing commands incorrectly, wait for the # donor/proxy to reroute at least one command before doing garbage collection. Stop # waiting when the test finishes. diff --git a/jstests/hooks/run_check_tenant_migration_dbhash.js b/jstests/hooks/run_check_tenant_migration_dbhash.js new file mode 100644 index 00000000000..f826dfa12af --- /dev/null +++ b/jstests/hooks/run_check_tenant_migration_dbhash.js @@ -0,0 +1,26 @@ +// Does a dbhash check between the donor and recipient primaries during a tenant migration. +'use strict'; + +(function() { +load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology. +load("jstests/replsets/libs/tenant_migration_util.js"); + +const excludedDBs = ["testTenantMigration"]; +const testDBName = "testTenantMigration"; +const dbhashCollName = "dbhashCheck"; +const tenantId = TestData.tenantId; +const migrationId = UUID(TestData.migrationIdString); + +const donorRst = new ReplSetTest(TestData.donorConnectionString); +const recipientRst = new ReplSetTest(TestData.recipientConnectionString); + +const donorPrimary = donorRst.getPrimary(); +const donorPrimaryDB = donorPrimary.getDB(testDBName); + +// We assume every db is under the tenant being migrated. +TenantMigrationUtil.checkTenantDBHashes(donorRst, recipientRst, tenantId, excludedDBs); + +// Mark that we have completed the dbhash check. +assert.commandWorked( + donorPrimaryDB[dbhashCollName].insert([{_id: migrationId}], {writeConcern: {w: "majority"}})); +})(); diff --git a/jstests/libs/override_methods/inject_tenant_prefix.js b/jstests/libs/override_methods/inject_tenant_prefix.js index 3793b94f991..7b8d8f49b72 100644 --- a/jstests/libs/override_methods/inject_tenant_prefix.js +++ b/jstests/libs/override_methods/inject_tenant_prefix.js @@ -410,8 +410,8 @@ Mongo.prototype.runCommandRetryOnTenantMigrationErrors = function( numAttempts++; let resObj; if (this.reroutingMongo) { - resObj = reroutingRunCommandFunc(); this.recordRerouteDueToTenantMigration(); + resObj = reroutingRunCommandFunc(); } else { resObj = originalRunCommandFunc(); } @@ -507,6 +507,23 @@ Mongo.prototype.runCommandRetryOnTenantMigrationErrors = function( this.migrationStateDoc = this.getTenantMigrationStateDoc(); this.reroutingMongo = connect(this.migrationStateDoc.recipientConnectionString).getMongo(); + + // After getting a TenantMigrationCommitted error, wait for the python test fixture + // to do a dbhash check on the donor and recipient primaries before we retry the + // command on the recipient. + assert.soon(() => { + let findRes = assert.commandWorked(originalRunCommand.apply(this, [ + "testTenantMigration", + { + find: "dbhashCheck", + filter: {_id: this.migrationStateDoc._id}, + }, + 0 + ])); + + const docs = findRes.cursor.firstBatch; + return docs[0] != null; + }); } else if (migrationAbortedErr) { jsTestLog(`Got TenantMigrationAborted for command against database ${ dbNameWithTenantId} after trying ${numAttempts} times: ${tojson(resObj)}`); diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index 35338ec6002..296b9eb0cd1 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -300,7 +300,8 @@ function TenantMigrationTest({ // If the migration has been successfully committed, check the db hashes for the tenantId // between the donor and recipient. if (stateRes.state === TenantMigrationTest.State.kCommitted) { - this.checkTenantDBHashes(tenantId); + TenantMigrationUtil.checkTenantDBHashes( + this.getDonorRst(), this.getRecipientRst(), tenantId); } return stateRes; @@ -515,7 +516,8 @@ function TenantMigrationTest({ */ this.verifyRecipientDB = function( tenantId, dbName, collName, migrationCommitted = true, data = loadDummyData()) { - const shouldMigrate = migrationCommitted && this.isNamespaceForTenant(tenantId, dbName); + const shouldMigrate = + migrationCommitted && TenantMigrationUtil.isNamespaceForTenant(tenantId, dbName); jsTestLog(`Verifying that data in collection ${collName} of DB ${dbName} was ${ (shouldMigrate ? "" : "not")} migrated to the recipient`); @@ -560,13 +562,6 @@ function TenantMigrationTest({ }; /** - * Determines if a database name belongs to the given tenant. - */ - this.isNamespaceForTenant = function(tenantId, dbName) { - return dbName.startsWith(`${tenantId}_`); - }; - - /** * Returns the TenantMigrationAccessBlocker associated with the given tenantId on the * node. */ @@ -656,62 +651,6 @@ function TenantMigrationTest({ }; /** - * Compares the hashes for DBs that belong to the specified tenant between the donor and - * recipient primaries. - */ - this.checkTenantDBHashes = function( - tenantId, excludedDBs = [], msgPrefix = 'checkTenantDBHashes', ignoreUUIDs = false) { - // Always skip db hash checks for the local database. - excludedDBs = [...excludedDBs, "local"]; - - const donorPrimary = this.getDonorRst().getPrimary(); - const recipientPrimary = this.getRecipientRst().getPrimary(); - - // Filter out all dbs that don't belong to the tenant. - let combinedDBNames = [...donorPrimary.getDBNames(), ...recipientPrimary.getDBNames()]; - combinedDBNames = - combinedDBNames.filter(dbName => (this.isNamespaceForTenant(tenantId, dbName) && - !excludedDBs.includes(dbName))); - combinedDBNames = new Set(combinedDBNames); - - for (const dbName of combinedDBNames) { - // Pass in an empty array for the secondaries, since we only wish to compare the DB - // hashes between the donor and recipient primary in this test. - const donorDBHash = - assert.commandWorked(this.getDonorRst().getHashes(dbName, []).primary); - const recipientDBHash = - assert.commandWorked(this.getRecipientRst().getHashes(dbName, []).primary); - - const donorCollections = Object.keys(donorDBHash.collections); - const donorCollInfos = new CollInfos(donorPrimary, 'donorPrimary', dbName); - donorCollInfos.filter(donorCollections); - - const recipientCollections = Object.keys(recipientDBHash.collections); - const recipientCollInfos = new CollInfos(recipientPrimary, 'recipientPrimary', dbName); - recipientCollInfos.filter(recipientCollections); - - print(`checking db hash between donor: ${donorPrimary} and recipient: ${ - recipientPrimary}`); - - const collectionPrinted = new Set(); - const success = DataConsistencyChecker.checkDBHash(donorDBHash, - donorCollInfos, - recipientDBHash, - recipientCollInfos, - msgPrefix, - ignoreUUIDs, - true, /* syncingHasIndexes */ - collectionPrinted); - if (!success) { - print(`checkTenantDBHashes dumping donor and recipient primary oplogs`); - this.getDonorRst().dumpOplog(donorPrimary, {}, 100); - this.getRecipientRst().dumpOplog(recipientPrimary, {}, 100); - } - assert(success, 'dbhash mismatch between donor and recipient primaries'); - } - }; - - /** * Shuts down the donor and recipient sets, only if they were not passed in as parameters. * If they were passed in, the test that initialized them should be responsible for shutting * them down. diff --git a/jstests/replsets/libs/tenant_migration_util.js b/jstests/replsets/libs/tenant_migration_util.js index 1713ba4327f..dc3f5e33d50 100644 --- a/jstests/replsets/libs/tenant_migration_util.js +++ b/jstests/replsets/libs/tenant_migration_util.js @@ -239,6 +239,77 @@ var TenantMigrationUtil = (function() { return mtab.numBlockedWrites; } + /** + * Determines if a database name belongs to the given tenant. + */ + function isNamespaceForTenant(tenantId, dbName) { + return dbName.startsWith(`${tenantId}_`); + } + + /** + * Compares the hashes for DBs that belong to the specified tenant between the donor and + * recipient primaries. + */ + function checkTenantDBHashes(donorRst, + recipientRst, + tenantId, + excludedDBs = [], + msgPrefix = 'checkTenantDBHashes', + ignoreUUIDs = false) { + // Always skip db hash checks for the config, admin, and local database. + excludedDBs = [...excludedDBs, "config", "admin", "local"]; + + const donorPrimary = donorRst.getPrimary(); + const recipientPrimary = recipientRst.getPrimary(); + + // Filter out all dbs that don't belong to the tenant. + let combinedDBNames = [...donorPrimary.getDBNames(), ...recipientPrimary.getDBNames()]; + combinedDBNames = combinedDBNames.filter( + dbName => (isNamespaceForTenant(tenantId, dbName) && !excludedDBs.includes(dbName))); + combinedDBNames = new Set(combinedDBNames); + + for (const dbName of combinedDBNames) { + // Pass in an empty array for the secondaries, since we only wish to compare the DB + // hashes between the donor and recipient primary in this test. + const donorDBHash = assert.commandWorked(donorRst.getHashes(dbName, []).primary); + const recipientDBHash = + assert.commandWorked(recipientRst.getHashes(dbName, []).primary); + + const donorCollections = Object.keys(donorDBHash.collections); + const donorCollInfos = new CollInfos(donorPrimary, 'donorPrimary', dbName); + donorCollInfos.filter(donorCollections); + + const recipientCollections = Object.keys(recipientDBHash.collections); + const recipientCollInfos = new CollInfos(recipientPrimary, 'recipientPrimary', dbName); + recipientCollInfos.filter(recipientCollections); + + // TODO (SERVER-55343): Investigate temp collection behavior during tenant migrations. + donorCollInfos.collInfosRes = + donorCollInfos.collInfosRes.filter(info => !info.options.temp); + recipientCollInfos.collInfosRes = + recipientCollInfos.collInfosRes.filter(info => !info.options.temp); + + print(`checking db hash between donor: ${donorPrimary} and recipient: ${ + recipientPrimary}`); + + const collectionPrinted = new Set(); + const success = DataConsistencyChecker.checkDBHash(donorDBHash, + donorCollInfos, + recipientDBHash, + recipientCollInfos, + msgPrefix, + ignoreUUIDs, + true, /* syncingHasIndexes */ + collectionPrinted); + if (!success) { + print(`checkTenantDBHashes dumping donor and recipient primary oplogs`); + donorRst.dumpOplog(donorPrimary, {}, 100); + recipientRst.dumpOplog(recipientPrimary, {}, 100); + } + assert(success, 'dbhash mismatch between donor and recipient primaries'); + } + } + return { kExternalKeysNs, getExternalKeys, @@ -255,6 +326,8 @@ var TenantMigrationUtil = (function() { isMigrationCompleted, getTenantMigrationAccessBlocker, getNumBlockedReads, - getNumBlockedWrites + getNumBlockedWrites, + isNamespaceForTenant, + checkTenantDBHashes }; })(); diff --git a/jstests/replsets/tenant_migration_filters_tenant_id.js b/jstests/replsets/tenant_migration_filters_tenant_id.js index 1af890cc888..2d91862bebc 100644 --- a/jstests/replsets/tenant_migration_filters_tenant_id.js +++ b/jstests/replsets/tenant_migration_filters_tenant_id.js @@ -9,6 +9,7 @@ load("jstests/libs/uuid_util.js"); load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); if (!tenantMigrationTest.isFeatureFlagEnabled()) { @@ -28,7 +29,7 @@ const makeBaseTenantId = () => { const runTest = (baseTenantId, dbName, shouldMatch) => { jsTestLog(`Running tenant migration with dbName ${dbName} and tenantId ${baseTenantId}`); - assert.eq(shouldMatch, tenantMigrationTest.isNamespaceForTenant(baseTenantId, dbName)); + assert.eq(shouldMatch, TenantMigrationUtil.isNamespaceForTenant(baseTenantId, dbName)); tenantMigrationTest.insertDonorDB(dbName, collName); // Run a migration with the base tenant ID. diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js index a949e25e08f..1458c5f3fa6 100644 --- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js @@ -103,7 +103,8 @@ assert.commandWorked(migrationThread.returnData()); recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collNameRenamed); assert.eq(4, recipientColl.find().itcount()); assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs); -tenantMigrationTest.checkTenantDBHashes(tenantId); +TenantMigrationUtil.checkTenantDBHashes( + tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId); tenantMigrationTest.stop(); recipientRst.stopSet(); diff --git a/jstests/replsets/tenant_migration_resume_oplog_application.js b/jstests/replsets/tenant_migration_resume_oplog_application.js index 5a8a65dc676..9d665cb705f 100644 --- a/jstests/replsets/tenant_migration_resume_oplog_application.js +++ b/jstests/replsets/tenant_migration_resume_oplog_application.js @@ -102,7 +102,8 @@ resultsArr = appliedNoOps.toArray(); assert.eq(3, appliedNoOps.count(), appliedNoOps); assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr); -tenantMigrationTest.checkTenantDBHashes(tenantId); +TenantMigrationUtil.checkTenantDBHashes( + tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId); tenantMigrationTest.stop(); recipientRst.stopSet(); })(); |