diff options
author | Zuul <zuul@review.opendev.org> | 2020-07-02 13:32:25 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2020-07-02 13:32:26 +0000 |
commit | fff4ac26d9256c290371d1441b89f67a2a7100d0 (patch) | |
tree | a02ad60dc2b6a8ff6e877fe41786eb80e9f69f64 | |
parent | ef0bcb7e116b10c0dcd7fc415f69dcd8bc91c31d (diff) | |
parent | 0e34f02b856aa56b3b101ff6f6e9d9ff2ac69d2d (diff) | |
download | ironic-python-agent-fff4ac26d9256c290371d1441b89f67a2a7100d0.tar.gz |
Merge "Add full download retries" into stable/train
3 files changed, 83 insertions, 29 deletions
diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index 3a496ab1..80988e44 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -405,16 +405,30 @@ def _download_image(image_info): """ starttime = time.time() image_location = _image_location(image_info) - image_download = ImageDownload(image_info, time_obj=starttime) - - with open(image_location, 'wb') as f: + for attempt in range(CONF.image_download_connection_retries + 1): try: - for chunk in image_download: - f.write(chunk) - except Exception as e: - msg = 'Unable to write image to {}. Error: {}'.format( - image_location, str(e)) - raise errors.ImageDownloadError(image_info['id'], msg) + image_download = ImageDownload(image_info, time_obj=starttime) + + with open(image_location, 'wb') as f: + try: + for chunk in image_download: + f.write(chunk) + except Exception as e: + msg = 'Unable to write image to {}. Error: {}'.format( + image_location, str(e)) + raise errors.ImageDownloadError(image_info['id'], msg) + except errors.ImageDownloadError as e: + if attempt == CONF.image_download_connection_retries: + raise + else: + LOG.warning('Image download failed, %(attempt)s of %(total)s: ' + '%(error)s', + {'attempt': attempt, + 'total': CONF.image_download_connection_retries, + 'error': e}) + time.sleep(CONF.image_download_connection_retry_interval) + else: + break totaltime = time.time() - starttime LOG.info("Image downloaded from {} in {} seconds".format(image_location, @@ -549,16 +563,31 @@ class StandbyExtension(base.BaseAgentExtension): match the checksum as reported by glance in image_info. """ starttime = time.time() - image_download = ImageDownload(image_info, time_obj=starttime) - - with open(device, 'wb+') as f: + total_retries = CONF.image_download_connection_retries + for attempt in range(total_retries + 1): try: - for chunk in image_download: - f.write(chunk) - except Exception as e: - msg = 'Unable to write image to device {}. Error: {}'.format( - device, str(e)) - raise errors.ImageDownloadError(image_info['id'], msg) + image_download = ImageDownload(image_info, time_obj=starttime) + + with open(device, 'wb+') as f: + try: + for chunk in image_download: + f.write(chunk) + except Exception as e: + msg = ('Unable to write image to device {}. ' + 'Error: {}').format(device, str(e)) + raise errors.ImageDownloadError(image_info['id'], msg) + except errors.ImageDownloadError as e: + if attempt == CONF.image_download_connection_retries: + raise + else: + LOG.warning('Image download failed, %(attempt)s of ' + '%(total)s: %(error)s', + {'attempt': attempt, + 'total': total_retries, + 'error': e}) + time.sleep(CONF.image_download_connection_retry_interval) + else: + break totaltime = time.time() - starttime LOG.info("Image streamed onto device {} in {} " diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index adb2e083..394c2bb1 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -1079,6 +1079,8 @@ class TestStandbyExtension(base.IronicAgentTest): @mock.patch('requests.get', autospec=True) def test_stream_raw_image_onto_device_write_error(self, requests_mock, open_mock, md5_mock): + self.config(image_download_connection_timeout=1) + self.config(image_download_connection_retry_interval=0) image_info = _build_fake_image_info() response = requests_mock.return_value response.status_code = 200 @@ -1092,12 +1094,20 @@ class TestStandbyExtension(base.IronicAgentTest): self.assertRaises(errors.ImageDownloadError, self.agent_extension._stream_raw_image_onto_device, image_info, '/dev/foo') - requests_mock.assert_called_once_with(image_info['urls'][0], - cert=None, verify=True, - stream=True, proxies={}, - timeout=60) - # Assert write was only called once and failed! - file_mock.write.assert_called_once_with('some') + calls = [mock.call('http://example.org', cert=None, proxies={}, + stream=True, timeout=1, verify=True), + mock.call().iter_content(mock.ANY), + mock.call('http://example.org', cert=None, proxies={}, + stream=True, timeout=1, verify=True), + mock.call().iter_content(mock.ANY), + mock.call('http://example.org', cert=None, proxies={}, + stream=True, timeout=1, verify=True), + mock.call().iter_content(mock.ANY)] + requests_mock.assert_has_calls(calls) + write_calls = [mock.call('some'), + mock.call('some'), + mock.call('some')] + file_mock.write.assert_has_calls(write_calls) @mock.patch('ironic_lib.disk_utils.get_disk_identifier', lambda dev: 'ROOT') @@ -1147,6 +1157,7 @@ class TestStandbyExtension(base.IronicAgentTest): return self self.config(image_download_connection_timeout=1) + self.config(image_download_connection_retry_interval=0) image_info = _build_fake_image_info() file_mock = mock.Mock() open_mock.return_value.__enter__.return_value = file_mock @@ -1160,11 +1171,19 @@ class TestStandbyExtension(base.IronicAgentTest): self.agent_extension._stream_raw_image_onto_device, image_info, '/dev/foo') - requests_mock.assert_called_once_with(image_info['urls'][0], - cert=None, verify=True, - stream=True, proxies={}, - timeout=1) - file_mock.write.assert_called_once_with('meow') + + calls = [mock.call(image_info['urls'][0], cert=None, verify=True, + stream=True, proxies={}, timeout=1), + mock.call(image_info['urls'][0], cert=None, verify=True, + stream=True, proxies={}, timeout=1), + mock.call(image_info['urls'][0], cert=None, verify=True, + stream=True, proxies={}, timeout=1)] + requests_mock.assert_has_calls(calls) + + write_calls = [mock.call('meow'), + mock.call('meow'), + mock.call('meow')] + file_mock.write.assert_has_calls(write_calls) self.assertFalse(fix_gpt_mock.called) def test__message_format_partition_bios(self): diff --git a/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml b/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml new file mode 100644 index 00000000..660ec4c8 --- /dev/null +++ b/releasenotes/notes/agent-fully-retries-image-downloads-67409a493c6d08ae.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes deployment failures when the image download is interrupted + mid-stream while the contents are being downloaded. Previously retries + were limited to only opening the initial connection. |