diff options
Diffstat (limited to 'buildscripts/resmokelib/testing/hooks/tenant_migration.py')
-rw-r--r-- | buildscripts/resmokelib/testing/hooks/tenant_migration.py | 131 |
1 files changed, 70 insertions, 61 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py index 4c6100d6153..1996142e4be 100644 --- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py +++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py @@ -206,8 +206,6 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst MIGRATION_STATE_POLL_INTERVAL_SECS = 0.1 NO_SUCH_MIGRATION_ERR_CODE = 327 INTERNAL_ERR_CODE = 1 - ILLEGAL_OPERATION_ERR_CODE = 20 - INVALID_NS_ERR_CODE = 73 def __init__(self, logger, tenant_migration_fixture, tenant_id): """Initialize _TenantMigrationThread.""" @@ -328,16 +326,52 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst return _TenantMigrationOptions(donor_rs, recipient_rs, self._tenant_id, read_preference) def _run_migration(self, migration_opts): # noqa: D205,D400 - """Run donorStartMigration to start a tenant migration based on 'migration_opts', wait for - the migration decision. Returns true if the migration commits and false otherwise. + """Run a tenant migration based on 'migration_opts', wait for the migration decision and + garbage collection. Return true if the migration commits and false otherwise. """ - donor_primary = migration_opts.get_donor_primary() - donor_primary_client = donor_primary.mongo_client() + try: + # Clean up any orphaned tenant databases on the recipient allow next migration to start. + self._drop_tenant_databases(migration_opts.recipient_rs) + res = self._start_and_wait_for_migration(migration_opts) + is_committed = res["state"] == "committed" - self.logger.info( - "Starting tenant migration with donor primary on port %d of replica set '%s'.", - donor_primary.port, migration_opts.get_donor_name()) + # Garbage collect the migration prior to throwing error to avoid migration conflict + # in the next test. + if is_committed: + # If the migration committed, to avoid routing commands incorrectly, wait for the + # donor/proxy to reroute at least one command before doing garbage collection. Stop + # waiting when the test finishes. + self._wait_for_reroute_or_test_completion(migration_opts) + self._forget_migration(migration_opts) + self._wait_for_migration_garbage_collection(migration_opts) + if not res["ok"]: + raise errors.ServerFailure("Tenant migration with donor replica set '" + + migration_opts.get_donor_name() + "' failed: " + + str(res)) + + if is_committed: + return True + + abort_reason = res["abortReason"] + if self._is_fail_point_abort_reason(abort_reason): + self.logger.info("Tenant migration with donor replica set '" + + migration_opts.get_donor_name() + "' aborted due to failpoint: " + + str(res)) + return False + raise errors.ServerFailure("Tenant migration with donor replica set '" + + migration_opts.get_donor_name() + + "' aborted due to an error: " + str(res)) + except pymongo.errors.PyMongoError: + self.logger.exception( + "Error running tenant migration with donor primary on replica set '%s'.", + migration_opts.get_donor_name()) + raise + + def _start_and_wait_for_migration(self, migration_opts): # noqa: D205,D400 + """Run donorStartMigration to start a tenant migration based on 'migration_opts', wait for + the migration decision and return the last response for donorStartMigration. + """ cmd_obj = { "donorStartMigration": 1, @@ -354,60 +388,35 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst "recipientCertificateForDonor": get_certificate_and_private_key("jstests/libs/tenant_migration_recipient.pem"), } - is_committed = False - - try: - # Clean up any orphaned tenant databases on the recipient allow next migration to start. - self._drop_tenant_databases(migration_opts.recipient_rs) + donor_primary = migration_opts.get_donor_primary() + donor_primary_client = donor_primary.mongo_client() - while True: - # Keep polling the migration state until the migration completes. - res = donor_primary_client.admin.command( - cmd_obj, - bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE)) - - if res["state"] == "committed": - self.logger.info("Tenant migration with donor primary on port " + - str(donor_primary.port) + " of replica set '" + - migration_opts.get_donor_name() + "' has committed.") - is_committed = True - break - elif res["state"] == "aborted": - abort_reason = res["abortReason"] - if self._is_fail_point_abort_reason(abort_reason): - self.logger.info("Tenant migration with donor primary on port " + - str(donor_primary.port) + " of replica set '" + - migration_opts.get_donor_name() + - "' has aborted due to failpoint: " + str(res)) - break - else: - raise errors.ServerFailure("Tenant migration with donor primary on port " + - str(donor_primary.port) + " of replica set '" + - migration_opts.get_donor_name() + - "' has aborted due to an error: " + str(res)) - elif not res["ok"]: - raise errors.ServerFailure("Tenant migration with donor primary on port " + - str(donor_primary.port) + " of replica set '" + - migration_opts.get_donor_name() + "' has failed: " + - str(res)) - - time.sleep(self.MIGRATION_STATE_POLL_INTERVAL_SECS) - - # Garbage collect the migration. - if is_committed: - # If the migration committed, to avoid routing commands incorrectly, wait for the - # donor/proxy to reroute at least one command before doing garbage collection. Stop - # waiting when the test finishes. - self._wait_for_reroute_or_test_completion(migration_opts) - self._forget_migration(migration_opts) - self._wait_for_migration_garbage_collection(migration_opts) + self.logger.info( + "Starting tenant migration on donor primary on port %d of replica set '%s'.", + donor_primary.port, migration_opts.get_donor_name()) - return is_committed - except pymongo.errors.PyMongoError: - self.logger.exception( - "Error running tenant migration with donor primary on port %d of replica set '%s'.", - donor_primary.port, migration_opts.get_donor_name()) - raise + while True: + # Keep polling the migration state until the migration completes. + res = donor_primary_client.admin.command( + cmd_obj, + bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE)) + + if res["state"] == "committed": + self.logger.info("Tenant migration with donor primary on port " + + str(donor_primary.port) + " of replica set '" + + migration_opts.get_donor_name() + "' has committed.") + return res + if res["state"] == "aborted": + self.logger.info("Tenant migration with donor primary on port " + + str(donor_primary.port) + " of replica set '" + + migration_opts.get_donor_name() + "' has aborted: " + str(res)) + return res + if not res["ok"]: + self.logger.info("Tenant migration with donor primary on port " + + str(donor_primary.port) + " of replica set '" + + migration_opts.get_donor_name() + "' has failed: " + str(res)) + return res + time.sleep(self.MIGRATION_STATE_POLL_INTERVAL_SECS) def _forget_migration(self, migration_opts): """Run donorForgetMigration to garbage collection the tenant migration denoted by migration_opts'.""" |