summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Zhang <jason.zhang@mongodb.com>2021-03-17 19:47:10 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-10 14:04:56 +0000
commit5b9f84421440daef351224597fd8ecc594c2f70e (patch)
treee6e547e39ac7d36bcc4ee2c60acdd8fca7b686a5
parentdfde12485c163e88c91a7e67e768a51804033e56 (diff)
downloadmongo-5b9f84421440daef351224597fd8ecc594c2f70e.tar.gz
SERVER-53251 Add dbhash check to tenant migration passthrough
(cherry picked from commit 5a69247372a7f1374664996c5360da277545a5a8)
-rw-r--r--buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py21
-rw-r--r--buildscripts/resmokelib/testing/hooks/tenant_migration.py38
-rw-r--r--jstests/hooks/run_check_tenant_migration_dbhash.js26
-rw-r--r--jstests/libs/override_methods/inject_tenant_prefix.js19
-rw-r--r--jstests/replsets/libs/tenant_migration_test.js69
-rw-r--r--jstests/replsets/libs/tenant_migration_util.js75
-rw-r--r--jstests/replsets/tenant_migration_filters_tenant_id.js3
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js3
-rw-r--r--jstests/replsets/tenant_migration_resume_oplog_application.js3
9 files changed, 182 insertions, 75 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py b/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py
new file mode 100644
index 00000000000..9c4f24b1a34
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/dbhash_tenant_migration.py
@@ -0,0 +1,21 @@
+"""Test hook for verifying data consistency between the donor and recipient primaries in a tenant migration."""
+
+import os.path
+
+from buildscripts.resmokelib.testing.hooks import jsfile
+
+
+class CheckTenantMigrationDBHash(jsfile.DataConsistencyHook):
+ """Check if the dbhashes match.
+
+ This includes dbhashes for all non-local databases and non-replicated system collections that
+ match on the primaries of the donor and recipient
+ """
+
+ def __init__( # pylint: disable=super-init-not-called
+ self, hook_logger, fixture, shell_options=None):
+ """Initialize CheckTenantMigrationDBHash."""
+ description = "Check dbhashes of donor and recipient primaries"
+ js_filename = os.path.join("jstests", "hooks", "run_check_tenant_migration_dbhash.js")
+ jsfile.JSHook.__init__( # pylint: disable=non-parent-init-called
+ self, hook_logger, fixture, js_filename, description, shell_options=shell_options)
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
index a21a75a0bba..d900779df8c 100644
--- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py
+++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
@@ -13,6 +13,7 @@ from buildscripts.resmokelib import errors
from buildscripts.resmokelib import utils
from buildscripts.resmokelib.testing.fixtures import interface as fixture_interface
from buildscripts.resmokelib.testing.fixtures import tenant_migration
+from buildscripts.resmokelib.testing.hooks import dbhash_tenant_migration
from buildscripts.resmokelib.testing.hooks import interface
@@ -36,7 +37,7 @@ class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-ins
if not isinstance(fixture, tenant_migration.TenantMigrationFixture):
raise ValueError("The ContinuousTenantMigration hook requires a TenantMigrationFixture")
self._tenant_migration_fixture = fixture
- self._tenant_id = shell_options["global_vars"]["TestData"]["tenantId"]
+ self._shell_options = shell_options
self._tenant_migration_thread = None
@@ -46,7 +47,7 @@ class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-ins
raise ValueError("No TenantMigrationFixture to run migrations on")
self.logger.info("Starting the tenant migration thread.")
self._tenant_migration_thread = _TenantMigrationThread(
- self.logger, self._tenant_migration_fixture, self._tenant_id)
+ self.logger, self._tenant_migration_fixture, self._shell_options, test_report)
self._tenant_migration_thread.start()
def after_suite(self, test_report):
@@ -213,13 +214,15 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
FAIL_TO_PARSE_ERR_CODE = 9
NO_SUCH_KEY_ERR_CODE = 4
- def __init__(self, logger, tenant_migration_fixture, tenant_id):
+ def __init__(self, logger, tenant_migration_fixture, shell_options, test_report):
"""Initialize _TenantMigrationThread."""
- threading.Thread.__init__(self, name="TenantMigrationThread")
+ threading.Thread.__init__(self, name=self.THREAD_NAME)
self.daemon = True
self.logger = logger
self._tenant_migration_fixture = tenant_migration_fixture
- self._tenant_id = tenant_id
+ self._tenant_id = shell_options["global_vars"]["TestData"]["tenantId"]
+ self._test_report = test_report
+ self._shell_options = shell_options
self.__lifecycle = TenantMigrationLifeCycle()
# Event set when the thread has been stopped using the 'stop()' method.
@@ -314,6 +317,10 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
"""Wait until stop or timeout."""
self._is_stopped_evt.wait(timeout)
+ def short_name(self):
+ """Return the name of the thread."""
+ return self.THREAD_NAME
+
def _check_thread(self):
"""Throw an error if the thread is not running."""
if not self.is_alive():
@@ -361,6 +368,24 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
read_preference = {"mode": "primary"} if random.randint(0, 1) else {"mode": "secondary"}
return _TenantMigrationOptions(donor_rs, recipient_rs, self._tenant_id, read_preference)
+ def _check_tenant_migration_dbhash(self, migration_opts):
+ # Set the donor connection string, recipient connection string, and migration uuid string for the tenant migration dbhash check script.
+ self._shell_options[
+ "global_vars"]["TestData"]["donorConnectionString"] = migration_opts.get_donor_primary(
+ ).get_internal_connection_string()
+ self._shell_options["global_vars"]["TestData"][
+ "recipientConnectionString"] = migration_opts.get_recipient_primary(
+ ).get_internal_connection_string()
+ self._shell_options["global_vars"]["TestData"][
+ "migrationIdString"] = migration_opts.migration_id.__str__()
+
+ dbhash_test_case = dbhash_tenant_migration.CheckTenantMigrationDBHash(
+ self.logger, self._tenant_migration_fixture, self._shell_options)
+ dbhash_test_case.before_suite(self._test_report)
+ dbhash_test_case.before_test(self, self._test_report)
+ dbhash_test_case.after_test(self, self._test_report)
+ dbhash_test_case.after_suite(self._test_report)
+
def _run_migration(self, migration_opts): # noqa: D205,D400
"""Run a tenant migration based on 'migration_opts', wait for the migration decision and
garbage collection. Return true if the migration commits and false otherwise.
@@ -374,6 +399,9 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
# Garbage collect the migration prior to throwing error to avoid migration conflict
# in the next test.
if is_committed:
+ # Once we have committed a migration, run a dbhash check before rerouting commands.
+ self._check_tenant_migration_dbhash(migration_opts)
+
# If the migration committed, to avoid routing commands incorrectly, wait for the
# donor/proxy to reroute at least one command before doing garbage collection. Stop
# waiting when the test finishes.
diff --git a/jstests/hooks/run_check_tenant_migration_dbhash.js b/jstests/hooks/run_check_tenant_migration_dbhash.js
new file mode 100644
index 00000000000..f826dfa12af
--- /dev/null
+++ b/jstests/hooks/run_check_tenant_migration_dbhash.js
@@ -0,0 +1,26 @@
+// Does a dbhash check between the donor and recipient primaries during a tenant migration.
+'use strict';
+
+(function() {
+load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology.
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const excludedDBs = ["testTenantMigration"];
+const testDBName = "testTenantMigration";
+const dbhashCollName = "dbhashCheck";
+const tenantId = TestData.tenantId;
+const migrationId = UUID(TestData.migrationIdString);
+
+const donorRst = new ReplSetTest(TestData.donorConnectionString);
+const recipientRst = new ReplSetTest(TestData.recipientConnectionString);
+
+const donorPrimary = donorRst.getPrimary();
+const donorPrimaryDB = donorPrimary.getDB(testDBName);
+
+// We assume every db is under the tenant being migrated.
+TenantMigrationUtil.checkTenantDBHashes(donorRst, recipientRst, tenantId, excludedDBs);
+
+// Mark that we have completed the dbhash check.
+assert.commandWorked(
+ donorPrimaryDB[dbhashCollName].insert([{_id: migrationId}], {writeConcern: {w: "majority"}}));
+})();
diff --git a/jstests/libs/override_methods/inject_tenant_prefix.js b/jstests/libs/override_methods/inject_tenant_prefix.js
index 3793b94f991..7b8d8f49b72 100644
--- a/jstests/libs/override_methods/inject_tenant_prefix.js
+++ b/jstests/libs/override_methods/inject_tenant_prefix.js
@@ -410,8 +410,8 @@ Mongo.prototype.runCommandRetryOnTenantMigrationErrors = function(
numAttempts++;
let resObj;
if (this.reroutingMongo) {
- resObj = reroutingRunCommandFunc();
this.recordRerouteDueToTenantMigration();
+ resObj = reroutingRunCommandFunc();
} else {
resObj = originalRunCommandFunc();
}
@@ -507,6 +507,23 @@ Mongo.prototype.runCommandRetryOnTenantMigrationErrors = function(
this.migrationStateDoc = this.getTenantMigrationStateDoc();
this.reroutingMongo =
connect(this.migrationStateDoc.recipientConnectionString).getMongo();
+
+ // After getting a TenantMigrationCommitted error, wait for the python test fixture
+ // to do a dbhash check on the donor and recipient primaries before we retry the
+ // command on the recipient.
+ assert.soon(() => {
+ let findRes = assert.commandWorked(originalRunCommand.apply(this, [
+ "testTenantMigration",
+ {
+ find: "dbhashCheck",
+ filter: {_id: this.migrationStateDoc._id},
+ },
+ 0
+ ]));
+
+ const docs = findRes.cursor.firstBatch;
+ return docs[0] != null;
+ });
} else if (migrationAbortedErr) {
jsTestLog(`Got TenantMigrationAborted for command against database ${
dbNameWithTenantId} after trying ${numAttempts} times: ${tojson(resObj)}`);
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js
index 35338ec6002..296b9eb0cd1 100644
--- a/jstests/replsets/libs/tenant_migration_test.js
+++ b/jstests/replsets/libs/tenant_migration_test.js
@@ -300,7 +300,8 @@ function TenantMigrationTest({
// If the migration has been successfully committed, check the db hashes for the tenantId
// between the donor and recipient.
if (stateRes.state === TenantMigrationTest.State.kCommitted) {
- this.checkTenantDBHashes(tenantId);
+ TenantMigrationUtil.checkTenantDBHashes(
+ this.getDonorRst(), this.getRecipientRst(), tenantId);
}
return stateRes;
@@ -515,7 +516,8 @@ function TenantMigrationTest({
*/
this.verifyRecipientDB = function(
tenantId, dbName, collName, migrationCommitted = true, data = loadDummyData()) {
- const shouldMigrate = migrationCommitted && this.isNamespaceForTenant(tenantId, dbName);
+ const shouldMigrate =
+ migrationCommitted && TenantMigrationUtil.isNamespaceForTenant(tenantId, dbName);
jsTestLog(`Verifying that data in collection ${collName} of DB ${dbName} was ${
(shouldMigrate ? "" : "not")} migrated to the recipient`);
@@ -560,13 +562,6 @@ function TenantMigrationTest({
};
/**
- * Determines if a database name belongs to the given tenant.
- */
- this.isNamespaceForTenant = function(tenantId, dbName) {
- return dbName.startsWith(`${tenantId}_`);
- };
-
- /**
* Returns the TenantMigrationAccessBlocker associated with the given tenantId on the
* node.
*/
@@ -656,62 +651,6 @@ function TenantMigrationTest({
};
/**
- * Compares the hashes for DBs that belong to the specified tenant between the donor and
- * recipient primaries.
- */
- this.checkTenantDBHashes = function(
- tenantId, excludedDBs = [], msgPrefix = 'checkTenantDBHashes', ignoreUUIDs = false) {
- // Always skip db hash checks for the local database.
- excludedDBs = [...excludedDBs, "local"];
-
- const donorPrimary = this.getDonorRst().getPrimary();
- const recipientPrimary = this.getRecipientRst().getPrimary();
-
- // Filter out all dbs that don't belong to the tenant.
- let combinedDBNames = [...donorPrimary.getDBNames(), ...recipientPrimary.getDBNames()];
- combinedDBNames =
- combinedDBNames.filter(dbName => (this.isNamespaceForTenant(tenantId, dbName) &&
- !excludedDBs.includes(dbName)));
- combinedDBNames = new Set(combinedDBNames);
-
- for (const dbName of combinedDBNames) {
- // Pass in an empty array for the secondaries, since we only wish to compare the DB
- // hashes between the donor and recipient primary in this test.
- const donorDBHash =
- assert.commandWorked(this.getDonorRst().getHashes(dbName, []).primary);
- const recipientDBHash =
- assert.commandWorked(this.getRecipientRst().getHashes(dbName, []).primary);
-
- const donorCollections = Object.keys(donorDBHash.collections);
- const donorCollInfos = new CollInfos(donorPrimary, 'donorPrimary', dbName);
- donorCollInfos.filter(donorCollections);
-
- const recipientCollections = Object.keys(recipientDBHash.collections);
- const recipientCollInfos = new CollInfos(recipientPrimary, 'recipientPrimary', dbName);
- recipientCollInfos.filter(recipientCollections);
-
- print(`checking db hash between donor: ${donorPrimary} and recipient: ${
- recipientPrimary}`);
-
- const collectionPrinted = new Set();
- const success = DataConsistencyChecker.checkDBHash(donorDBHash,
- donorCollInfos,
- recipientDBHash,
- recipientCollInfos,
- msgPrefix,
- ignoreUUIDs,
- true, /* syncingHasIndexes */
- collectionPrinted);
- if (!success) {
- print(`checkTenantDBHashes dumping donor and recipient primary oplogs`);
- this.getDonorRst().dumpOplog(donorPrimary, {}, 100);
- this.getRecipientRst().dumpOplog(recipientPrimary, {}, 100);
- }
- assert(success, 'dbhash mismatch between donor and recipient primaries');
- }
- };
-
- /**
* Shuts down the donor and recipient sets, only if they were not passed in as parameters.
* If they were passed in, the test that initialized them should be responsible for shutting
* them down.
diff --git a/jstests/replsets/libs/tenant_migration_util.js b/jstests/replsets/libs/tenant_migration_util.js
index 1713ba4327f..dc3f5e33d50 100644
--- a/jstests/replsets/libs/tenant_migration_util.js
+++ b/jstests/replsets/libs/tenant_migration_util.js
@@ -239,6 +239,77 @@ var TenantMigrationUtil = (function() {
return mtab.numBlockedWrites;
}
+ /**
+ * Determines if a database name belongs to the given tenant.
+ */
+ function isNamespaceForTenant(tenantId, dbName) {
+ return dbName.startsWith(`${tenantId}_`);
+ }
+
+ /**
+ * Compares the hashes for DBs that belong to the specified tenant between the donor and
+ * recipient primaries.
+ */
+ function checkTenantDBHashes(donorRst,
+ recipientRst,
+ tenantId,
+ excludedDBs = [],
+ msgPrefix = 'checkTenantDBHashes',
+ ignoreUUIDs = false) {
+ // Always skip db hash checks for the config, admin, and local database.
+ excludedDBs = [...excludedDBs, "config", "admin", "local"];
+
+ const donorPrimary = donorRst.getPrimary();
+ const recipientPrimary = recipientRst.getPrimary();
+
+ // Filter out all dbs that don't belong to the tenant.
+ let combinedDBNames = [...donorPrimary.getDBNames(), ...recipientPrimary.getDBNames()];
+ combinedDBNames = combinedDBNames.filter(
+ dbName => (isNamespaceForTenant(tenantId, dbName) && !excludedDBs.includes(dbName)));
+ combinedDBNames = new Set(combinedDBNames);
+
+ for (const dbName of combinedDBNames) {
+ // Pass in an empty array for the secondaries, since we only wish to compare the DB
+ // hashes between the donor and recipient primary in this test.
+ const donorDBHash = assert.commandWorked(donorRst.getHashes(dbName, []).primary);
+ const recipientDBHash =
+ assert.commandWorked(recipientRst.getHashes(dbName, []).primary);
+
+ const donorCollections = Object.keys(donorDBHash.collections);
+ const donorCollInfos = new CollInfos(donorPrimary, 'donorPrimary', dbName);
+ donorCollInfos.filter(donorCollections);
+
+ const recipientCollections = Object.keys(recipientDBHash.collections);
+ const recipientCollInfos = new CollInfos(recipientPrimary, 'recipientPrimary', dbName);
+ recipientCollInfos.filter(recipientCollections);
+
+ // TODO (SERVER-55343): Investigate temp collection behavior during tenant migrations.
+ donorCollInfos.collInfosRes =
+ donorCollInfos.collInfosRes.filter(info => !info.options.temp);
+ recipientCollInfos.collInfosRes =
+ recipientCollInfos.collInfosRes.filter(info => !info.options.temp);
+
+ print(`checking db hash between donor: ${donorPrimary} and recipient: ${
+ recipientPrimary}`);
+
+ const collectionPrinted = new Set();
+ const success = DataConsistencyChecker.checkDBHash(donorDBHash,
+ donorCollInfos,
+ recipientDBHash,
+ recipientCollInfos,
+ msgPrefix,
+ ignoreUUIDs,
+ true, /* syncingHasIndexes */
+ collectionPrinted);
+ if (!success) {
+ print(`checkTenantDBHashes dumping donor and recipient primary oplogs`);
+ donorRst.dumpOplog(donorPrimary, {}, 100);
+ recipientRst.dumpOplog(recipientPrimary, {}, 100);
+ }
+ assert(success, 'dbhash mismatch between donor and recipient primaries');
+ }
+ }
+
return {
kExternalKeysNs,
getExternalKeys,
@@ -255,6 +326,8 @@ var TenantMigrationUtil = (function() {
isMigrationCompleted,
getTenantMigrationAccessBlocker,
getNumBlockedReads,
- getNumBlockedWrites
+ getNumBlockedWrites,
+ isNamespaceForTenant,
+ checkTenantDBHashes
};
})();
diff --git a/jstests/replsets/tenant_migration_filters_tenant_id.js b/jstests/replsets/tenant_migration_filters_tenant_id.js
index 1af890cc888..2d91862bebc 100644
--- a/jstests/replsets/tenant_migration_filters_tenant_id.js
+++ b/jstests/replsets/tenant_migration_filters_tenant_id.js
@@ -9,6 +9,7 @@
load("jstests/libs/uuid_util.js");
load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
if (!tenantMigrationTest.isFeatureFlagEnabled()) {
@@ -28,7 +29,7 @@ const makeBaseTenantId = () => {
const runTest = (baseTenantId, dbName, shouldMatch) => {
jsTestLog(`Running tenant migration with dbName ${dbName} and tenantId ${baseTenantId}`);
- assert.eq(shouldMatch, tenantMigrationTest.isNamespaceForTenant(baseTenantId, dbName));
+ assert.eq(shouldMatch, TenantMigrationUtil.isNamespaceForTenant(baseTenantId, dbName));
tenantMigrationTest.insertDonorDB(dbName, collName);
// Run a migration with the base tenant ID.
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
index a949e25e08f..1458c5f3fa6 100644
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
+++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
@@ -103,7 +103,8 @@ assert.commandWorked(migrationThread.returnData());
recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collNameRenamed);
assert.eq(4, recipientColl.find().itcount());
assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs);
-tenantMigrationTest.checkTenantDBHashes(tenantId);
+TenantMigrationUtil.checkTenantDBHashes(
+ tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
diff --git a/jstests/replsets/tenant_migration_resume_oplog_application.js b/jstests/replsets/tenant_migration_resume_oplog_application.js
index 5a8a65dc676..9d665cb705f 100644
--- a/jstests/replsets/tenant_migration_resume_oplog_application.js
+++ b/jstests/replsets/tenant_migration_resume_oplog_application.js
@@ -102,7 +102,8 @@ resultsArr = appliedNoOps.toArray();
assert.eq(3, appliedNoOps.count(), appliedNoOps);
assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr);
-tenantMigrationTest.checkTenantDBHashes(tenantId);
+TenantMigrationUtil.checkTenantDBHashes(
+ tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
})();