summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMikhail Shchatko <mikhail.shchatko@mongodb.com>2021-08-18 13:54:37 +0300
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-19 14:15:58 +0000
commita415c11489de4d83f33b7b1c30c642e33fafbcfe (patch)
treed25696b9c8f6be1b116138a0fc19b70815696368
parentecb7d0f01755310c28bec2ca9d15fdd75ab1178d (diff)
downloadmongo-a415c11489de4d83f33b7b1c30c642e33fafbcfe.tar.gz
SERVER-59414 Retry curator setup in powercycle
(cherry picked from commit ccdc9b7703e1d367d8986e0bfa3fdeb8d870ccdb)
-rwxr-xr-xbuildscripts/remote_operations.py115
-rwxr-xr-xbuildscripts/tests/test_remote_operations.py4
-rw-r--r--etc/evergreen.yml39
-rwxr-xr-xpytests/powertest.py6
4 files changed, 92 insertions, 72 deletions
diff --git a/buildscripts/remote_operations.py b/buildscripts/remote_operations.py
index 93798ae7ac1..21ed080d363 100755
--- a/buildscripts/remote_operations.py
+++ b/buildscripts/remote_operations.py
@@ -60,14 +60,16 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
def __init__( # pylint: disable=too-many-arguments
self, user_host, ssh_connection_options=None, ssh_options=None, scp_options=None,
- retries=0, retry_sleep=0, debug=False, shell_binary="/bin/bash", use_shell=False):
+ op_retry_count=0, access_retry_count=0, retry_sleep=0, debug=False,
+ shell_binary="/bin/bash", use_shell=False):
"""Initialize RemoteOperations."""
self.user_host = user_host
self.ssh_connection_options = ssh_connection_options if ssh_connection_options else ""
self.ssh_options = ssh_options if ssh_options else ""
self.scp_options = scp_options if scp_options else ""
- self.retries = retries
+ self.op_retry_count = op_retry_count
+ self.access_retry_count = access_retry_count
self.retry_sleep = retry_sleep
self.debug = debug
self.shell_binary = shell_binary
@@ -87,19 +89,15 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
buff_stdout, _ = process.communicate()
return process.poll(), buff_stdout
- def _remote_access(self):
- """Check if a remote session is possible."""
- cmd = "ssh {} {} {} date".format(self.ssh_connection_options, self.ssh_options,
- self.user_host)
+ def _call_retries(self, cmd, retry_count):
attempt_num = 0
- buff = ""
while True:
ret, buff = self._call(cmd)
# Ignore any connection errors before sshd has fully initialized.
if not ret and not any(ssh_error in buff for ssh_error in _SSH_CONNECTION_ERRORS):
return ret, buff
attempt_num += 1
- if attempt_num > self.retries:
+ if attempt_num > retry_count:
break
if self.debug:
print("Failed remote attempt {}, retrying in {} seconds".format(
@@ -107,7 +105,16 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
time.sleep(self.retry_sleep)
return ret, buff
+ def _remote_access(self):
+ """Check if a remote session is possible."""
+ cmd = "ssh {} {} {} date".format(self.ssh_connection_options, self.ssh_options,
+ self.user_host)
+ return self._call_retries(cmd, self.access_retry_count)
+
def _perform_operation(self, cmd):
+ if self.op_retry_count:
+ return self._call_retries(cmd, self.op_retry_count)
+
return self._call(cmd)
def access_established(self):
@@ -231,44 +238,55 @@ def main(): # pylint: disable=too-many-branches,too-many-statements
shell_options = optparse.OptionGroup(parser, "Shell options")
copy_options = optparse.OptionGroup(parser, "Copy options")
- parser.add_option("--userHost", dest="user_host", default=None,
- help=("User and remote host to execute commands on [REQUIRED]."
- " Examples, 'user@1.2.3.4' or 'user@myhost.com'."))
-
- parser.add_option("--operation", dest="operation", default="shell", choices=_OPERATIONS,
- help=("Remote operation to perform, choose one of '{}',"
- " defaults to '%default'.".format(", ".join(_OPERATIONS))))
-
- control_options.add_option("--sshConnectionOptions", dest="ssh_connection_options",
- default=None, action="append",
- help=("SSH connection options which are common to ssh and scp."
- " More than one option can be specified either"
- " in one quoted string or by specifying"
- " this option more than once. Example options:"
- " '-i $HOME/.ssh/access.pem -o ConnectTimeout=10"
- " -o ConnectionAttempts=10'"))
-
- control_options.add_option("--sshOptions", dest="ssh_options", default=None, action="append",
- help=("SSH specific options."
- " More than one option can be specified either"
- " in one quoted string or by specifying"
- " this option more than once. Example options:"
- " '-t' or '-T'"))
-
- control_options.add_option("--scpOptions", dest="scp_options", default=None, action="append",
- help=("SCP specific options."
- " More than one option can be specified either"
- " in one quoted string or by specifying"
- " this option more than once. Example options:"
- " '-l 5000'"))
-
- control_options.add_option("--retries", dest="retries", type=int, default=0,
- help=("Number of retries to attempt for operation,"
- " defaults to '%default'."))
-
- control_options.add_option("--retrySleep", dest="retry_sleep", type=int, default=10,
- help=("Number of seconds to wait between retries,"
- " defaults to '%default'."))
+ parser.add_option(
+ "--userHost", dest="user_host", default=None,
+ help=("User and remote host to execute commands on [REQUIRED]."
+ " Examples, 'user@1.2.3.4' or 'user@myhost.com'."))
+
+ parser.add_option(
+ "--operation", dest="operation", default="shell", choices=_OPERATIONS,
+ help=("Remote operation to perform, choose one of '{}',"
+ " defaults to '%default'.".format(", ".join(_OPERATIONS))))
+
+ control_options.add_option(
+ "--sshConnectionOptions", dest="ssh_connection_options", default=None, action="append",
+ help=("SSH connection options which are common to ssh and scp."
+ " More than one option can be specified either"
+ " in one quoted string or by specifying"
+ " this option more than once. Example options:"
+ " '-i $HOME/.ssh/access.pem -o ConnectTimeout=10"
+ " -o ConnectionAttempts=10'"))
+
+ control_options.add_option(
+ "--sshOptions", dest="ssh_options", default=None, action="append",
+ help=("SSH specific options."
+ " More than one option can be specified either"
+ " in one quoted string or by specifying"
+ " this option more than once. Example options:"
+ " '-t' or '-T'"))
+
+ control_options.add_option(
+ "--scpOptions", dest="scp_options", default=None, action="append",
+ help=("SCP specific options."
+ " More than one option can be specified either"
+ " in one quoted string or by specifying"
+ " this option more than once. Example options:"
+ " '-l 5000'"))
+
+ control_options.add_option(
+ "--op_retry_count", dest="op_retry_count", type=int, default=0,
+ help=("Number of retries to attempt for operation,"
+ " defaults to '%default'."))
+
+ control_options.add_option(
+ "--access_retry_count", dest="access_retry_count", type=int, default=0,
+ help=("Number of retries to attempt remote access,"
+ " defaults to '%default'."))
+
+ control_options.add_option(
+ "--retrySleep", dest="retry_sleep", type=int, default=10,
+ help=("Number of seconds to wait between retries,"
+ " defaults to '%default'."))
control_options.add_option("--debug", dest="debug", action="store_true", default=False,
help="Provides debug output.")
@@ -347,8 +365,9 @@ def main(): # pylint: disable=too-many-branches,too-many-statements
remote_op = RemoteOperations(
user_host=options.user_host, ssh_connection_options=ssh_connection_options,
- ssh_options=ssh_options, scp_options=scp_options, retries=options.retries,
- retry_sleep=options.retry_sleep, debug=options.debug)
+ ssh_options=ssh_options, scp_options=scp_options, op_retry_count=options.op_retry_count,
+ access_retry_count=options.access_retry_count, retry_sleep=options.retry_sleep,
+ debug=options.debug)
ret_code, buff = remote_op.operation(options.operation, operation_param, operation_dir)
if options.verbose:
print("Return code: {} for command {}".format(ret_code, sys.argv))
diff --git a/buildscripts/tests/test_remote_operations.py b/buildscripts/tests/test_remote_operations.py
index f119e4ffb8a..2fe1dfa0cc7 100755
--- a/buildscripts/tests/test_remote_operations.py
+++ b/buildscripts/tests/test_remote_operations.py
@@ -49,14 +49,14 @@ class RemoteOperationConnection(RemoteOperationsTestCase):
self.assertIsNotNone(buff)
# Invalid host with retries
- remote_op = rop.RemoteOperations(user_host="badhost2", retries=3)
+ remote_op = rop.RemoteOperations(user_host="badhost2", access_retry_count=3)
ret, buff = remote_op.access_info()
self.assertFalse(remote_op.access_established())
self.assertNotEqual(0, ret)
self.assertIsNotNone(buff)
# Invalid host with retries & retry_sleep
- remote_op = rop.RemoteOperations(user_host="badhost3", retries=3, retry_sleep=1)
+ remote_op = rop.RemoteOperations(user_host="badhost3", access_retry_count=3, retry_sleep=1)
ret, buff = remote_op.access_info()
self.assertFalse(remote_op.access_established())
self.assertNotEqual(0, ret)
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index c28e46c4204..552478fd955 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -1802,14 +1802,14 @@ functions:
--verbose \
--userHost $USER@$private_ip_address \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmd"
$python buildscripts/remote_operations.py \
--verbose \
--userHost $USER@$private_ip_address \
--operation "copy_from" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--file ec2_monitor_files.tgz
sleep 30
done
@@ -1877,7 +1877,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_to" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--file buildscripts/mount_drives.sh
- command: shell.exec
@@ -1914,7 +1914,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- command: shell.exec
@@ -1941,7 +1941,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- command: shell.exec
@@ -1964,7 +1964,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_to" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
$file_param \
--remoteDir ${remote_dir}
@@ -1988,7 +1988,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- command: shell.exec
@@ -2026,7 +2026,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- command: shell.exec
@@ -2051,7 +2051,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|3} \
+ --access_retry_count ${ssh_retries|3} \
--commands "$cmds"
- command: shell.exec
@@ -2093,7 +2093,8 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --op_retry_count 2 \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- command: shell.exec
@@ -2164,7 +2165,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
fi
@@ -2187,7 +2188,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmds"
- *copy_ec2_monitor_files
@@ -2240,7 +2241,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--commands "$cmd"
"copy EC2 artifacts": &copy_ec2_artifacts
@@ -2260,7 +2261,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_from" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries|0} \
+ --access_retry_count ${ssh_retries|0} \
--file ec2_artifacts.tgz
"cleanup EC2 instance": &cleanup_ec2_instance
@@ -2302,7 +2303,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries} \
+ --access_retry_count ${ssh_retries} \
--commands "$cmds" \
--commandDir $remote_dir
@@ -2323,7 +2324,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_from" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries} \
+ --access_retry_count ${ssh_retries} \
--file "$remote_dir/*.core" \
--file "$remote_dir/*.mdmp"
# Since both type of core files do not exist on the same host, this command
@@ -2863,7 +2864,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_to" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries} \
+ --access_retry_count ${ssh_retries} \
$file_param \
--remoteDir $remote_dir
fi
@@ -2881,7 +2882,7 @@ functions:
--verbose \
--userHost $USER@${private_ip_address} \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries} \
+ --access_retry_count ${ssh_retries} \
--commands "$cmds"
$python buildscripts/remote_operations.py \
@@ -2889,7 +2890,7 @@ functions:
--userHost $USER@${private_ip_address} \
--operation "copy_from" \
--sshConnectionOptions "$ssh_connection_options" \
- --retries ${ssh_retries} \
+ --access_retry_count ${ssh_retries} \
--file "$remote_dir/debugger*.*" \
--file "$remote_dir/*.$core_ext"
fi
diff --git a/pytests/powertest.py b/pytests/powertest.py
index d78faac5bcc..7659817db81 100755
--- a/pytests/powertest.py
+++ b/pytests/powertest.py
@@ -1111,14 +1111,14 @@ class LocalToRemoteOperations(object):
"""
def __init__( # pylint: disable=too-many-arguments
- self, user_host, retries=2, retry_sleep=30, ssh_connection_options=None,
+ self, user_host, access_retry_count=2, retry_sleep=30, ssh_connection_options=None,
ssh_options=None, shell_binary="/bin/bash", use_shell=False):
"""Initialize LocalToRemoteOperations."""
self.remote_op = remote_operations.RemoteOperations( # pylint: disable=undefined-variable
user_host=user_host, ssh_connection_options=ssh_connection_options,
- ssh_options=ssh_options, retries=retries, retry_sleep=retry_sleep, debug=True,
- shell_binary=shell_binary, use_shell=use_shell)
+ ssh_options=ssh_options, access_retry_count=access_retry_count, retry_sleep=retry_sleep,
+ debug=True, shell_binary=shell_binary, use_shell=use_shell)
def shell(self, cmds, remote_dir=None):
"""Return tuple (ret, output) from performing remote shell operation."""