summaryrefslogtreecommitdiff
path: root/buildscripts
diff options
context:
space:
mode:
authorCheahuychou Mao <cheahuychou.mao@mongodb.com>2020-09-15 16:32:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-09-22 19:47:25 +0000
commitf3eadbd5f8cf6d08a20ea8689f6c58a3b6036b98 (patch)
treee36b4cb50d1b59e7f188f5ee21eb1bc54f627ff7 /buildscripts
parent9ec023a45dff4f8b3b837bee2c99442f0eec5316 (diff)
downloadmongo-f3eadbd5f8cf6d08a20ea8689f6c58a3b6036b98.tar.gz
SERVER-50787 Update donorStartMigration command to have a "polling" interface
Diffstat (limited to 'buildscripts')
-rw-r--r--buildscripts/resmokelib/testing/hooks/tenant_migration.py38
1 files changed, 25 insertions, 13 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
index ddcfc674f21..28e38192a61 100644
--- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py
+++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
@@ -69,8 +69,9 @@ class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-ins
class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-instance-attributes
- MAX_SLEEP_SECONDS = 0.1
- MAX_BLOCK_TIME_MS = 5 * 1000
+ MAX_SLEEP_SECS = 0.1
+ MAX_BLOCK_TIME_MILLISECS = 5 * 1000
+ DONOR_START_MIGRATION_POLL_INTERVAL_SECS = 0.1
TENANT_MIGRATION_ABORTED_ERROR_CODE = 325
def __init__(self, logger, rs_fixtures, db_prefix):
@@ -107,10 +108,10 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
self.join()
def _enable_abort(self, donor_primary_client, donor_primary_port, donor_primary_rs_name):
- # Configure the failpoint to make the migration abort after the migration has been blocking
- # reads and writes for a randomly generated number of seconds (< MAX_BLOCK_TIME_MS). Must
- # be called with _disable_abort at the start and end of each test so that each test uses
- # its own randomly generated block time.
+ # Configure the failpoint to make the migration abort after the migration has been
+ # blocking reads and writes for a randomly generated number of milliseconds
+ # (< MAX_BLOCK_TIME_MILLISECS). Must be called with _disable_abort at the start and
+ # end of each test so that each test uses its own randomly generated block time.
try:
donor_primary_client.admin.command(
bson.SON([("configureFailPoint", "abortTenantMigrationAfterBlockingStarts"),
@@ -118,7 +119,7 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
("data",
bson.SON([("blockTimeMS",
random.uniform(
- 0, _TenantMigrationThread.MAX_BLOCK_TIME_MS))]))]))
+ 0, _TenantMigrationThread.MAX_BLOCK_TIME_MILLISECS))]))]))
except pymongo.errors.OperationFailure as err:
self.logger.exception(
"Unable to enable the failpoint to make migrations abort on donor primary on port "
@@ -146,20 +147,31 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
donor_primary = rs_fixture.get_primary()
donor_primary_client = donor_primary.mongo_client()
- time.sleep(random.uniform(0, _TenantMigrationThread.MAX_SLEEP_SECONDS))
+ time.sleep(random.uniform(0, _TenantMigrationThread.MAX_SLEEP_SECS))
self.logger.info(
"Starting a tenant migration with donor primary on port %d of replica set '%s'.",
donor_primary.port, rs_fixture.replset_name)
+ cmd_obj = {
+ "donorStartMigration": 1, "migrationId": bson.Binary(uuid.uuid4().bytes, 4),
+ "recipientConnectionString": "dummySet/dummyHost:1234",
+ "databasePrefix": self._db_prefix, "readPreference": {"mode": "primary"}
+ }
+
try:
self._enable_abort(donor_primary_client, donor_primary.port, rs_fixture.replset_name)
- donor_primary_client.admin.command({
- "donorStartMigration": 1, "migrationId": bson.Binary(
- uuid.uuid4().bytes, 4), "recipientConnectionString": "dummySet/dummyHost:1234",
- "databasePrefix": self._db_prefix, "readPreference": {"mode": "primary"}
- }, bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
+ while True:
+ # Keep polling the migration state until the migration completes, otherwise we might
+ # end up disabling 'abortTenantMigrationAfterBlockingStarts' before the migration
+ # enters the blocking state and aborts.
+ res = donor_primary_client.admin.command(
+ cmd_obj,
+ bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
+ if (not res["ok"] or res["state"] == "committed" or res["state"] == "aborted"):
+ break
+ time.sleep(_TenantMigrationThread.DONOR_START_MIGRATION_POLL_INTERVAL_SECS)
except pymongo.errors.OperationFailure as err:
if err.code == _TenantMigrationThread.TENANT_MIGRATION_ABORTED_ERROR_CODE:
self.logger.exception(