summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ironic_python_agent/config.py15
-rw-r--r--ironic_python_agent/extensions/standby.py28
-rw-r--r--ironic_python_agent/tests/unit/extensions/test_standby.py82
-rw-r--r--releasenotes/notes/image-download-retries-65ac31fe4328e438.yaml11
4 files changed, 118 insertions, 18 deletions
diff --git a/ironic_python_agent/config.py b/ironic_python_agent/config.py
index 5724a675..c73e6fc9 100644
--- a/ironic_python_agent/config.py
+++ b/ironic_python_agent/config.py
@@ -239,6 +239,21 @@ cli_opts = [
'enforce token validation. The configuration provided '
'by the conductor MAY override this and force this '
'setting to be changed to True in memory.'),
+ cfg.IntOpt('image_download_connection_timeout', min=1,
+ default=APARAMS.get(
+ 'ipa-image-download-connection-timeout', 60),
+ help='The connection timeout (in seconds) when downloading '
+ 'an image. Does not affect the whole download.'),
+ cfg.IntOpt('image_download_connection_retries', min=0,
+ default=APARAMS.get('ipa-image-download-connection-retries', 2),
+ help='How many times to retry the connection when downloading '
+ 'an image. Also retries on failure HTTP statuses.'),
+ cfg.IntOpt('image_download_connection_retry_interval', min=0,
+ default=APARAMS.get(
+ 'ipa-image-download-connection-retry-interval', 5),
+ help='Interval (in seconds) between two attempts to establish '
+ 'connection when downloading an image.'),
+
]
CONF.register_cli_opts(cli_opts)
diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py
index 289e1cc0..bd0ade60 100644
--- a/ironic_python_agent/extensions/standby.py
+++ b/ironic_python_agent/extensions/standby.py
@@ -70,12 +70,28 @@ def _download_with_proxy(image_info, url, image_id):
os.environ['no_proxy'] = no_proxy
proxies = image_info.get('proxies', {})
verify, cert = utils.get_ssl_client_options(CONF)
- resp = requests.get(url, stream=True, proxies=proxies,
- verify=verify, cert=cert)
- if resp.status_code != 200:
- msg = ('Received status code {} from {}, expected 200. Response '
- 'body: {}').format(resp.status_code, url, resp.text)
- raise errors.ImageDownloadError(image_id, msg)
+ resp = None
+ for attempt in range(CONF.image_download_connection_retries + 1):
+ try:
+ resp = requests.get(url, stream=True, proxies=proxies,
+ verify=verify, cert=cert,
+ timeout=CONF.image_download_connection_timeout)
+ if resp.status_code != 200:
+ msg = ('Received status code {} from {}, expected 200. '
+ 'Response body: {}').format(resp.status_code, url,
+ resp.text)
+ raise errors.ImageDownloadError(image_id, msg)
+ except (errors.ImageDownloadError, requests.RequestException) as e:
+ if (attempt == CONF.image_download_connection_retries
+ # NOTE(dtantsur): do not retry 4xx status codes
+ or (resp and resp.status_code < 500)):
+ raise
+ else:
+ LOG.warning('Unable to connect to %s, retrying. Error: %s',
+ url, e)
+ time.sleep(CONF.image_download_connection_retry_interval)
+ else:
+ break
return resp
diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py
index d4030f09..b0cc081b 100644
--- a/ironic_python_agent/tests/unit/extensions/test_standby.py
+++ b/ironic_python_agent/tests/unit/extensions/test_standby.py
@@ -18,6 +18,7 @@ from unittest import mock
from ironic_lib import exception
from oslo_concurrency import processutils
+import requests
from ironic_python_agent import errors
from ironic_python_agent.extensions import standby
@@ -369,7 +370,8 @@ class TestStandbyExtension(base.IronicAgentTest):
standby._download_image(image_info)
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies={})
+ stream=True, proxies={},
+ timeout=60)
write = file_mock.write
write.assert_any_call('some')
write.assert_any_call('content')
@@ -400,7 +402,8 @@ class TestStandbyExtension(base.IronicAgentTest):
self.assertEqual(no_proxy, os.environ['no_proxy'])
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies=proxies)
+ stream=True, proxies=proxies,
+ timeout=60)
write = file_mock.write
write.assert_any_call('some')
write.assert_any_call('content')
@@ -1111,7 +1114,8 @@ class TestStandbyExtension(base.IronicAgentTest):
'/dev/foo')
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies={})
+ stream=True, proxies={},
+ timeout=60)
expected_calls = [mock.call('some'), mock.call('content')]
file_mock.write.assert_has_calls(expected_calls)
fix_gpt_mock.assert_called_once_with('/dev/foo', node_uuid=None)
@@ -1136,7 +1140,8 @@ class TestStandbyExtension(base.IronicAgentTest):
image_info, '/dev/foo')
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies={})
+ stream=True, proxies={},
+ timeout=60)
# Assert write was only called once and failed!
file_mock.write.assert_called_once_with('some')
@@ -1254,11 +1259,13 @@ class TestImageDownload(base.IronicAgentTest):
self.assertEqual(content, list(image_download))
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies={})
+ stream=True, proxies={},
+ timeout=60)
self.assertEqual(image_info['checksum'],
image_download._hash_algo.hexdigest())
- def test_download_image_fail(self, requests_mock, time_mock):
+ @mock.patch('time.sleep', autospec=True)
+ def test_download_image_fail(self, sleep_mock, requests_mock, time_mock):
response = requests_mock.return_value
response.status_code = 401
response.text = 'Unauthorized'
@@ -1272,7 +1279,56 @@ class TestImageDownload(base.IronicAgentTest):
standby.ImageDownload, image_info)
requests_mock.assert_called_once_with(image_info['urls'][0],
cert=None, verify=True,
- stream=True, proxies={})
+ stream=True, proxies={},
+ timeout=60)
+ self.assertFalse(sleep_mock.called)
+
+ @mock.patch('time.sleep', autospec=True)
+ def test_download_image_retries(self, sleep_mock, requests_mock,
+ time_mock):
+ response = requests_mock.return_value
+ response.status_code = 500
+ response.text = 'Oops'
+ time_mock.return_value = 0.0
+ image_info = _build_fake_image_info()
+ msg = ('Error downloading image: Download of image fake_id failed: '
+ 'URL: http://example.org; time: .* seconds. Error: '
+ 'Received status code 500 from http://example.org, expected '
+ '200. Response body: Oops')
+ self.assertRaisesRegex(errors.ImageDownloadError, msg,
+ standby.ImageDownload, image_info)
+ requests_mock.assert_called_with(image_info['urls'][0],
+ cert=None, verify=True,
+ stream=True, proxies={},
+ timeout=60)
+ self.assertEqual(3, requests_mock.call_count)
+ sleep_mock.assert_called_with(5)
+ self.assertEqual(2, sleep_mock.call_count)
+
+ @mock.patch('time.sleep', autospec=True)
+ def test_download_image_retries_success(self, sleep_mock, requests_mock,
+ md5_mock):
+ content = ['SpongeBob', 'SquarePants']
+ fail_response = mock.Mock()
+ fail_response.status_code = 500
+ fail_response.text = " "
+ response = mock.Mock()
+ response.status_code = 200
+ response.iter_content.return_value = content
+ requests_mock.side_effect = [requests.Timeout, fail_response, response]
+
+ image_info = _build_fake_image_info()
+ md5_mock.return_value.hexdigest.return_value = image_info['checksum']
+ image_download = standby.ImageDownload(image_info)
+
+ self.assertEqual(content, list(image_download))
+ requests_mock.assert_called_with(image_info['urls'][0],
+ cert=None, verify=True,
+ stream=True, proxies={},
+ timeout=60)
+ self.assertEqual(3, requests_mock.call_count)
+ sleep_mock.assert_called_with(5)
+ self.assertEqual(2, sleep_mock.call_count)
def test_download_image_and_checksum(self, requests_mock, md5_mock):
content = ['SpongeBob', 'SquarePants']
@@ -1293,9 +1349,9 @@ class TestImageDownload(base.IronicAgentTest):
self.assertEqual(content, list(image_download))
requests_mock.assert_has_calls([
mock.call('http://example.com/checksum', cert=None, verify=True,
- stream=True, proxies={}),
+ stream=True, proxies={}, timeout=60),
mock.call(image_info['urls'][0], cert=None, verify=True,
- stream=True, proxies={}),
+ stream=True, proxies={}, timeout=60),
])
self.assertEqual(fake_cs, image_download._hash_algo.hexdigest())
@@ -1323,9 +1379,9 @@ foobar irrelevant file.img
self.assertEqual(content, list(image_download))
requests_mock.assert_has_calls([
mock.call('http://example.com/checksum', cert=None, verify=True,
- stream=True, proxies={}),
+ stream=True, proxies={}, timeout=60),
mock.call(image_info['urls'][0], cert=None, verify=True,
- stream=True, proxies={}),
+ stream=True, proxies={}, timeout=60),
])
self.assertEqual(fake_cs, image_download._hash_algo.hexdigest())
@@ -1378,7 +1434,9 @@ foobar irrelevant file.img
response = mock.Mock()
response.status_code = 200
response.iter_content.return_value = content
- requests_mock.side_effect = [cs_response, response]
+ # 3 retries on status code
+ requests_mock.side_effect = [cs_response, cs_response, cs_response,
+ response]
image_info = _build_fake_image_info(
'http://example.com/path/image.img')
diff --git a/releasenotes/notes/image-download-retries-65ac31fe4328e438.yaml b/releasenotes/notes/image-download-retries-65ac31fe4328e438.yaml
new file mode 100644
index 00000000..7f787a56
--- /dev/null
+++ b/releasenotes/notes/image-download-retries-65ac31fe4328e438.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+ - |
+ Provides timeout and retries when establishing a connection to download
+ an image in the ``standby`` extension. Reduces probability of an image
+ download getting stuck in the event of network problems.
+
+ The default timeout is 60 seconds and can be set via the
+ ``ipa-image-download-connection-timeout`` kernel parameter. The default
+ number of retries is 2 and can be set via the
+ ``ipa-image-download-connection-retries`` parameter.