summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2020-07-02 13:32:25 +0000
committerGerrit Code Review <review@openstack.org>2020-07-02 13:32:26 +0000
commitfff4ac26d9256c290371d1441b89f67a2a7100d0 (patch)
treea02ad60dc2b6a8ff6e877fe41786eb80e9f69f64
parentef0bcb7e116b10c0dcd7fc415f69dcd8bc91c31d (diff)
parent0e34f02b856aa56b3b101ff6f6e9d9ff2ac69d2d (diff)
downloadironic-python-agent-fff4ac26d9256c290371d1441b89f67a2a7100d0.tar.gz
Merge "Add full download retries" into stable/train
-rw-r--r--ironic_python_agent/extensions/standby.py65
-rw-r--r--ironic_python_agent/tests/unit/extensions/test_standby.py41
-rw-r--r--releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml6
3 files changed, 83 insertions, 29 deletions
diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py
index 3a496ab1..80988e44 100644
--- a/ironic_python_agent/extensions/standby.py
+++ b/ironic_python_agent/extensions/standby.py
@@ -405,16 +405,30 @@ def _download_image(image_info):
"""
starttime = time.time()
image_location = _image_location(image_info)
- image_download = ImageDownload(image_info, time_obj=starttime)
-
- with open(image_location, 'wb') as f:
+ for attempt in range(CONF.image_download_connection_retries + 1):
try:
- for chunk in image_download:
- f.write(chunk)
- except Exception as e:
- msg = 'Unable to write image to {}. Error: {}'.format(
- image_location, str(e))
- raise errors.ImageDownloadError(image_info['id'], msg)
+ image_download = ImageDownload(image_info, time_obj=starttime)
+
+ with open(image_location, 'wb') as f:
+ try:
+ for chunk in image_download:
+ f.write(chunk)
+ except Exception as e:
+ msg = 'Unable to write image to {}. Error: {}'.format(
+ image_location, str(e))
+ raise errors.ImageDownloadError(image_info['id'], msg)
+ except errors.ImageDownloadError as e:
+ if attempt == CONF.image_download_connection_retries:
+ raise
+ else:
+ LOG.warning('Image download failed, %(attempt)s of %(total)s: '
+ '%(error)s',
+ {'attempt': attempt,
+ 'total': CONF.image_download_connection_retries,
+ 'error': e})
+ time.sleep(CONF.image_download_connection_retry_interval)
+ else:
+ break
totaltime = time.time() - starttime
LOG.info("Image downloaded from {} in {} seconds".format(image_location,
@@ -549,16 +563,31 @@ class StandbyExtension(base.BaseAgentExtension):
match the checksum as reported by glance in image_info.
"""
starttime = time.time()
- image_download = ImageDownload(image_info, time_obj=starttime)
-
- with open(device, 'wb+') as f:
+ total_retries = CONF.image_download_connection_retries
+ for attempt in range(total_retries + 1):
try:
- for chunk in image_download:
- f.write(chunk)
- except Exception as e:
- msg = 'Unable to write image to device {}. Error: {}'.format(
- device, str(e))
- raise errors.ImageDownloadError(image_info['id'], msg)
+ image_download = ImageDownload(image_info, time_obj=starttime)
+
+ with open(device, 'wb+') as f:
+ try:
+ for chunk in image_download:
+ f.write(chunk)
+ except Exception as e:
+ msg = ('Unable to write image to device {}. '
+ 'Error: {}').format(device, str(e))
+ raise errors.ImageDownloadError(image_info['id'], msg)
+ except errors.ImageDownloadError as e:
+ if attempt == CONF.image_download_connection_retries:
+ raise
+ else:
+ LOG.warning('Image download failed, %(attempt)s of '
+ '%(total)s: %(error)s',
+ {'attempt': attempt,
+ 'total': total_retries,
+ 'error': e})
+ time.sleep(CONF.image_download_connection_retry_interval)
+ else:
+ break
totaltime = time.time() - starttime
LOG.info("Image streamed onto device {} in {} "
diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py
index adb2e083..394c2bb1 100644
--- a/ironic_python_agent/tests/unit/extensions/test_standby.py
+++ b/ironic_python_agent/tests/unit/extensions/test_standby.py
@@ -1079,6 +1079,8 @@ class TestStandbyExtension(base.IronicAgentTest):
@mock.patch('requests.get', autospec=True)
def test_stream_raw_image_onto_device_write_error(self, requests_mock,
open_mock, md5_mock):
+ self.config(image_download_connection_timeout=1)
+ self.config(image_download_connection_retry_interval=0)
image_info = _build_fake_image_info()
response = requests_mock.return_value
response.status_code = 200
@@ -1092,12 +1094,20 @@ class TestStandbyExtension(base.IronicAgentTest):
self.assertRaises(errors.ImageDownloadError,
self.agent_extension._stream_raw_image_onto_device,
image_info, '/dev/foo')
- requests_mock.assert_called_once_with(image_info['urls'][0],
- cert=None, verify=True,
- stream=True, proxies={},
- timeout=60)
- # Assert write was only called once and failed!
- file_mock.write.assert_called_once_with('some')
+ calls = [mock.call('http://example.org', cert=None, proxies={},
+ stream=True, timeout=1, verify=True),
+ mock.call().iter_content(mock.ANY),
+ mock.call('http://example.org', cert=None, proxies={},
+ stream=True, timeout=1, verify=True),
+ mock.call().iter_content(mock.ANY),
+ mock.call('http://example.org', cert=None, proxies={},
+ stream=True, timeout=1, verify=True),
+ mock.call().iter_content(mock.ANY)]
+ requests_mock.assert_has_calls(calls)
+ write_calls = [mock.call('some'),
+ mock.call('some'),
+ mock.call('some')]
+ file_mock.write.assert_has_calls(write_calls)
@mock.patch('ironic_lib.disk_utils.get_disk_identifier',
lambda dev: 'ROOT')
@@ -1147,6 +1157,7 @@ class TestStandbyExtension(base.IronicAgentTest):
return self
self.config(image_download_connection_timeout=1)
+ self.config(image_download_connection_retry_interval=0)
image_info = _build_fake_image_info()
file_mock = mock.Mock()
open_mock.return_value.__enter__.return_value = file_mock
@@ -1160,11 +1171,19 @@ class TestStandbyExtension(base.IronicAgentTest):
self.agent_extension._stream_raw_image_onto_device,
image_info,
'/dev/foo')
- requests_mock.assert_called_once_with(image_info['urls'][0],
- cert=None, verify=True,
- stream=True, proxies={},
- timeout=1)
- file_mock.write.assert_called_once_with('meow')
+
+ calls = [mock.call(image_info['urls'][0], cert=None, verify=True,
+ stream=True, proxies={}, timeout=1),
+ mock.call(image_info['urls'][0], cert=None, verify=True,
+ stream=True, proxies={}, timeout=1),
+ mock.call(image_info['urls'][0], cert=None, verify=True,
+ stream=True, proxies={}, timeout=1)]
+ requests_mock.assert_has_calls(calls)
+
+ write_calls = [mock.call('meow'),
+ mock.call('meow'),
+ mock.call('meow')]
+ file_mock.write.assert_has_calls(write_calls)
self.assertFalse(fix_gpt_mock.called)
def test__message_format_partition_bios(self):
diff --git a/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml b/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml
new file mode 100644
index 00000000..660ec4c8
--- /dev/null
+++ b/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+ - |
+ Fixes deployment failures when the image download is interrupted
+ mid-stream while the contents are being downloaded. Previously retries
+ were limited to only opening the initial connection.