summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2021-03-18 20:49:51 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-19 19:42:54 +0000
commit67c23bccf42fd86557ef47e911b8c6cf57648f4f (patch)
treebbce9efccfc005273e49fcd677c10ebd5e685281
parent4e410825f29880ff9502d41e3874b9ec8cef49cd (diff)
downloadmongo-67c23bccf42fd86557ef47e911b8c6cf57648f4f.tar.gz
SERVER-55349 Make TenantMigrationThread garbage collect migration before validating abort reason
-rw-r--r--buildscripts/resmokelib/testing/hooks/tenant_migration.py131
1 files changed, 70 insertions, 61 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
index 4c6100d6153..1996142e4be 100644
--- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py
+++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
@@ -206,8 +206,6 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
MIGRATION_STATE_POLL_INTERVAL_SECS = 0.1
NO_SUCH_MIGRATION_ERR_CODE = 327
INTERNAL_ERR_CODE = 1
- ILLEGAL_OPERATION_ERR_CODE = 20
- INVALID_NS_ERR_CODE = 73
def __init__(self, logger, tenant_migration_fixture, tenant_id):
"""Initialize _TenantMigrationThread."""
@@ -328,16 +326,52 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
return _TenantMigrationOptions(donor_rs, recipient_rs, self._tenant_id, read_preference)
def _run_migration(self, migration_opts): # noqa: D205,D400
- """Run donorStartMigration to start a tenant migration based on 'migration_opts', wait for
- the migration decision. Returns true if the migration commits and false otherwise.
+ """Run a tenant migration based on 'migration_opts', wait for the migration decision and
+ garbage collection. Return true if the migration commits and false otherwise.
"""
- donor_primary = migration_opts.get_donor_primary()
- donor_primary_client = donor_primary.mongo_client()
+ try:
+ # Clean up any orphaned tenant databases on the recipient allow next migration to start.
+ self._drop_tenant_databases(migration_opts.recipient_rs)
+ res = self._start_and_wait_for_migration(migration_opts)
+ is_committed = res["state"] == "committed"
- self.logger.info(
- "Starting tenant migration with donor primary on port %d of replica set '%s'.",
- donor_primary.port, migration_opts.get_donor_name())
+ # Garbage collect the migration prior to throwing error to avoid migration conflict
+ # in the next test.
+ if is_committed:
+ # If the migration committed, to avoid routing commands incorrectly, wait for the
+ # donor/proxy to reroute at least one command before doing garbage collection. Stop
+ # waiting when the test finishes.
+ self._wait_for_reroute_or_test_completion(migration_opts)
+ self._forget_migration(migration_opts)
+ self._wait_for_migration_garbage_collection(migration_opts)
+ if not res["ok"]:
+ raise errors.ServerFailure("Tenant migration with donor replica set '" +
+ migration_opts.get_donor_name() + "' failed: " +
+ str(res))
+
+ if is_committed:
+ return True
+
+ abort_reason = res["abortReason"]
+ if self._is_fail_point_abort_reason(abort_reason):
+ self.logger.info("Tenant migration with donor replica set '" +
+ migration_opts.get_donor_name() + "' aborted due to failpoint: " +
+ str(res))
+ return False
+ raise errors.ServerFailure("Tenant migration with donor replica set '" +
+ migration_opts.get_donor_name() +
+ "' aborted due to an error: " + str(res))
+ except pymongo.errors.PyMongoError:
+ self.logger.exception(
+ "Error running tenant migration with donor primary on replica set '%s'.",
+ migration_opts.get_donor_name())
+ raise
+
+ def _start_and_wait_for_migration(self, migration_opts): # noqa: D205,D400
+ """Run donorStartMigration to start a tenant migration based on 'migration_opts', wait for
+ the migration decision and return the last response for donorStartMigration.
+ """
cmd_obj = {
"donorStartMigration":
1,
@@ -354,60 +388,35 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
"recipientCertificateForDonor":
get_certificate_and_private_key("jstests/libs/tenant_migration_recipient.pem"),
}
- is_committed = False
-
- try:
- # Clean up any orphaned tenant databases on the recipient allow next migration to start.
- self._drop_tenant_databases(migration_opts.recipient_rs)
+ donor_primary = migration_opts.get_donor_primary()
+ donor_primary_client = donor_primary.mongo_client()
- while True:
- # Keep polling the migration state until the migration completes.
- res = donor_primary_client.admin.command(
- cmd_obj,
- bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
-
- if res["state"] == "committed":
- self.logger.info("Tenant migration with donor primary on port " +
- str(donor_primary.port) + " of replica set '" +
- migration_opts.get_donor_name() + "' has committed.")
- is_committed = True
- break
- elif res["state"] == "aborted":
- abort_reason = res["abortReason"]
- if self._is_fail_point_abort_reason(abort_reason):
- self.logger.info("Tenant migration with donor primary on port " +
- str(donor_primary.port) + " of replica set '" +
- migration_opts.get_donor_name() +
- "' has aborted due to failpoint: " + str(res))
- break
- else:
- raise errors.ServerFailure("Tenant migration with donor primary on port " +
- str(donor_primary.port) + " of replica set '" +
- migration_opts.get_donor_name() +
- "' has aborted due to an error: " + str(res))
- elif not res["ok"]:
- raise errors.ServerFailure("Tenant migration with donor primary on port " +
- str(donor_primary.port) + " of replica set '" +
- migration_opts.get_donor_name() + "' has failed: " +
- str(res))
-
- time.sleep(self.MIGRATION_STATE_POLL_INTERVAL_SECS)
-
- # Garbage collect the migration.
- if is_committed:
- # If the migration committed, to avoid routing commands incorrectly, wait for the
- # donor/proxy to reroute at least one command before doing garbage collection. Stop
- # waiting when the test finishes.
- self._wait_for_reroute_or_test_completion(migration_opts)
- self._forget_migration(migration_opts)
- self._wait_for_migration_garbage_collection(migration_opts)
+ self.logger.info(
+ "Starting tenant migration on donor primary on port %d of replica set '%s'.",
+ donor_primary.port, migration_opts.get_donor_name())
- return is_committed
- except pymongo.errors.PyMongoError:
- self.logger.exception(
- "Error running tenant migration with donor primary on port %d of replica set '%s'.",
- donor_primary.port, migration_opts.get_donor_name())
- raise
+ while True:
+ # Keep polling the migration state until the migration completes.
+ res = donor_primary_client.admin.command(
+ cmd_obj,
+ bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
+
+ if res["state"] == "committed":
+ self.logger.info("Tenant migration with donor primary on port " +
+ str(donor_primary.port) + " of replica set '" +
+ migration_opts.get_donor_name() + "' has committed.")
+ return res
+ if res["state"] == "aborted":
+ self.logger.info("Tenant migration with donor primary on port " +
+ str(donor_primary.port) + " of replica set '" +
+ migration_opts.get_donor_name() + "' has aborted: " + str(res))
+ return res
+ if not res["ok"]:
+ self.logger.info("Tenant migration with donor primary on port " +
+ str(donor_primary.port) + " of replica set '" +
+ migration_opts.get_donor_name() + "' has failed: " + str(res))
+ return res
+ time.sleep(self.MIGRATION_STATE_POLL_INTERVAL_SECS)
def _forget_migration(self, migration_opts):
"""Run donorForgetMigration to garbage collection the tenant migration denoted by migration_opts'."""