diff options
-rw-r--r-- | ironic_python_agent/burnin.py | 84 | ||||
-rw-r--r-- | ironic_python_agent/extensions/standby.py | 5 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/extensions/test_standby.py | 3 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_burnin.py | 77 | ||||
-rw-r--r-- | lower-constraints.txt | 4 | ||||
-rw-r--r-- | releasenotes/notes/add-smart-test-to-disk-burnin-d02d31e23e5efa9a.yaml | 7 | ||||
-rw-r--r-- | releasenotes/notes/fix-rescan-device-7b00c6836b687ce8.yaml | 4 | ||||
-rw-r--r-- | releasenotes/notes/qemu-write-zeros-2edbf3152c57e2b6.yaml | 6 | ||||
-rw-r--r-- | releasenotes/source/xena.rst | 6 | ||||
-rw-r--r-- | requirements.txt | 4 |
10 files changed, 188 insertions, 12 deletions
diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py index c97eea96..841dbfb1 100644 --- a/ironic_python_agent/burnin.py +++ b/ironic_python_agent/burnin.py @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import time from ironic_lib import utils @@ -84,6 +85,83 @@ def stress_ng_vm(node): raise errors.CommandExecutionError(error_msg) +def _smart_test_status(device): + """Get the SMART test status of a device + + :param device: The device to check. + :raises: CommandExecutionError if the execution of smartctl fails. + :returns: A string with the SMART test status of the device and + None if the status is not available. + """ + args = ['smartctl', '-ja', device.name] + try: + out, _ = utils.execute(*args) + smart_info = json.loads(out) + if smart_info: + return smart_info['ata_smart_data'][ + 'self_test']['status']['string'] + except (processutils.ProcessExecutionError, OSError, KeyError) as e: + LOG.error('SMART test on %(device)s failed with ' + '%(err)s', {'device': device.name, 'err': e}) + return None + + +def _run_smart_test(devices): + """Launch a SMART test on the passed devices + + :param devices: A list of device objects to check. + :raises: CommandExecutionError if the execution of smartctl fails. + :raises: CleaningError if the SMART test on any of the devices fails. + """ + failed_devices = [] + for device in devices: + args = ['smartctl', '-t', 'long', device.name] + LOG.info('SMART self test command: %s', + ' '.join(map(str, args))) + try: + utils.execute(*args) + except (processutils.ProcessExecutionError, OSError) as e: + LOG.error("Starting SMART test on %(device)s failed with: " + "%(err)s", {'device': device.name, 'err': e}) + failed_devices.append(device.name) + if failed_devices: + error_msg = ("fio (disk) failed to start SMART self test on %s", + ', '.join(failed_devices)) + raise errors.CleaningError(error_msg) + + # wait for the test to finish and report the test results + failed_devices = [] + while True: + for device in list(devices): + status = _smart_test_status(device) + if status is None: + devices.remove(device) + continue + if "in progress" in status: + msg = "SMART test still running on %s ..." % device.name + LOG.debug(msg) + continue + if "completed without error" in status: + msg = "%s passed SMART test" % device.name + LOG.info(msg) + devices.remove(device) + continue + failed_devices.append(device.name) + LOG.warning("%(device)s failed SMART test with: %(err)s", + {'device': device.name, 'err': status}) + devices.remove(device) + if not devices: + break + LOG.info("SMART tests still running ...") + time.sleep(30) + + # fail the clean step if the SMART test has failed + if failed_devices: + msg = ('fio (disk) SMART test failed for %s' % ' '.join( + map(str, failed_devices))) + raise errors.CleaningError(msg) + + def fio_disk(node): """Burn-in the disks with fio @@ -118,6 +196,12 @@ def fio_disk(node): LOG.error(error_msg) raise errors.CommandExecutionError(error_msg) + # if configured, run a smart self test on all devices and fail the + # step if any of the devices reports an error + smart_test = info.get('agent_burnin_fio_disk_smart_test', False) + if smart_test: + _run_smart_test(devices) + def _do_fio_network(writer, runtime, partner): diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index 623423b9..1e7855c4 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -200,12 +200,13 @@ def _write_whole_disk_image(image, image_info, device): disk_utils.udev_settle() command = ['qemu-img', 'convert', - '-t', 'directsync', '-O', 'host_device', '-W', + '-t', 'directsync', '-S', '0', '-O', 'host_device', '-W', image, device] LOG.info('Writing image with command: %s', ' '.join(command)) try: disk_utils.convert_image(image, device, out_format='host_device', - cache='directsync', out_of_order=True) + cache='directsync', out_of_order=True, + sparse_size='0') except processutils.ProcessExecutionError as e: raise errors.ImageWriteError(device, e.exit_code, e.stdout, e.stderr) diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index 559a15bb..bce88a00 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -182,7 +182,8 @@ class TestStandbyExtension(base.IronicAgentTest): convert_mock.assert_called_once_with(location, device, out_format='host_device', cache='directsync', - out_of_order=True) + out_of_order=True, + sparse_size='0') wipe_mock.assert_called_once_with(device, '') udev_mock.assert_called_once_with() rescan_mock.assert_called_once_with(device) diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py index 2258352e..16aea375 100644 --- a/ironic_python_agent/tests/unit/test_burnin.py +++ b/ironic_python_agent/tests/unit/test_burnin.py @@ -21,6 +21,39 @@ from ironic_python_agent import hardware from ironic_python_agent.tests.unit import base +SMART_OUTPUT_JSON_COMPLETED = (""" +{ + "ata_smart_data": { + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 1, + "extended": 2, + "conveyance": 2 + } + } + } +} +""") + +SMART_OUTPUT_JSON_MISSING = (""" +{ + "ata_smart_data": { + "self_test": { + "status": { + "value": 0, + "passed": true + } + } + } +} +""") + + @mock.patch.object(utils, 'execute', autospec=True) class TestBurnin(base.IronicAgentTest): @@ -133,6 +166,50 @@ class TestBurnin(base.IronicAgentTest): '--loops', 5, '--runtime', 600, '--time_based', '--name', '/dev/sdj', '--name', '/dev/hdaa') + def test__smart_test_status(self, mock_execute): + device = hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True) + mock_execute.return_value = ([SMART_OUTPUT_JSON_COMPLETED, 'err']) + + status = burnin._smart_test_status(device) + + mock_execute.assert_called_once_with('smartctl', '-ja', '/dev/sdj') + self.assertEqual(status, "completed without error") + + def test__smart_test_status_missing(self, mock_execute): + device = hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True) + mock_execute.return_value = ([SMART_OUTPUT_JSON_MISSING, 'err']) + + status = burnin._smart_test_status(device) + + mock_execute.assert_called_once_with('smartctl', '-ja', '/dev/sdj') + self.assertIsNone(status) + + @mock.patch.object(burnin, '_smart_test_status', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + def test_fio_disk_smart_test(self, mock_list, mock_status, mock_execute): + + node = {'driver_info': {'agent_burnin_fio_disk_smart_test': True}} + + mock_list.return_value = [ + hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), + hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), + ] + mock_status.return_value = "completed without error" + mock_execute.return_value = (['out', 'err']) + + burnin.fio_disk(node) + + expected_calls = [ + mock.call('fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, + '--ioengine', 'libaio', '--iodepth', '32', '--verify', + 'crc32c', '--verify_dump', 1, '--continue_on_error', + 'verify', '--loops', 4, '--runtime', 0, '--time_based', + '--name', '/dev/sdj', '--name', '/dev/hdaa'), + mock.call('smartctl', '-t', 'long', '/dev/sdj'), + mock.call('smartctl', '-t', 'long', '/dev/hdaa') + ] + mock_execute.assert_has_calls(expected_calls) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) def test_fio_disk_no_fio(self, mock_list, mock_execute): diff --git a/lower-constraints.txt b/lower-constraints.txt index 6e5fc9bc..8fbb7f13 100644 --- a/lower-constraints.txt +++ b/lower-constraints.txt @@ -6,7 +6,7 @@ cryptography==2.3 dogpile.cache==0.9.2 eventlet==0.18.2 importlib_metadata==1.7.0;python_version<'3.8' -ironic-lib==4.7.1 +ironic-lib==5.1.0 netifaces==0.10.4 openstacksdk==0.49.0 oslo.concurrency==3.26.0 @@ -14,7 +14,7 @@ oslo.config==5.2.0 oslo.log==3.36.0 oslo.serialization==2.18.0 oslo.service==1.24.0 -oslo.utils==3.33.0 +oslo.utils==3.34.0 oslotest==3.2.0 pbr==2.0.0 psutil==3.2.2 diff --git a/releasenotes/notes/add-smart-test-to-disk-burnin-d02d31e23e5efa9a.yaml b/releasenotes/notes/add-smart-test-to-disk-burnin-d02d31e23e5efa9a.yaml new file mode 100644 index 00000000..0e95b911 --- /dev/null +++ b/releasenotes/notes/add-smart-test-to-disk-burnin-d02d31e23e5efa9a.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Add 'agent_burnin_fio_disk_smart_test' option in driver-info for disk + burn-in. If set to True, this option will launch a parallel SMART self + test on all devices after the disk burn-in and fail the disk burn-in + clean step if any of the tests fail. diff --git a/releasenotes/notes/fix-rescan-device-7b00c6836b687ce8.yaml b/releasenotes/notes/fix-rescan-device-7b00c6836b687ce8.yaml index 05dee911..26de1102 100644 --- a/releasenotes/notes/fix-rescan-device-7b00c6836b687ce8.yaml +++ b/releasenotes/notes/fix-rescan-device-7b00c6836b687ce8.yaml @@ -1,5 +1,5 @@ --- fixes: - | - Fixes an issue where partitions are not visible due to a - incorrect call to have the partition table re-read. + Fixes an issue where partitions are not visible due to an incorrect call + when re-reading the partition table during a device rescan. diff --git a/releasenotes/notes/qemu-write-zeros-2edbf3152c57e2b6.yaml b/releasenotes/notes/qemu-write-zeros-2edbf3152c57e2b6.yaml new file mode 100644 index 00000000..8b79dc23 --- /dev/null +++ b/releasenotes/notes/qemu-write-zeros-2edbf3152c57e2b6.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + IPA instructs qemu-img to write image zeros to disk. + Doing this will cause it not to zero out the entire + block device which can be very costly on a slow HDD. diff --git a/releasenotes/source/xena.rst b/releasenotes/source/xena.rst index 1be85be3..b92ce439 100644 --- a/releasenotes/source/xena.rst +++ b/releasenotes/source/xena.rst @@ -1,6 +1,6 @@ -========================= -Xena Series Release Notes -========================= +========================================= +Xena Series (8.0.0 - 8.2.x) Release Notes +========================================= .. release-notes:: :branch: stable/xena diff --git a/requirements.txt b/requirements.txt index 69d886cd..cadb5c30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,13 +10,13 @@ oslo.concurrency>=3.26.0 # Apache-2.0 oslo.log>=3.36.0 # Apache-2.0 oslo.serialization!=2.19.1,>=2.18.0 # Apache-2.0 oslo.service!=1.28.1,>=1.24.0 # Apache-2.0 -oslo.utils>=3.33.0 # Apache-2.0 +oslo.utils>=3.34.0 # Apache-2.0 Pint>=0.5 # BSD psutil>=3.2.2 # BSD pyudev>=0.18 # LGPLv2.1+ requests>=2.14.2 # Apache-2.0 stevedore>=1.20.0 # Apache-2.0 tenacity>=6.2.0 # Apache-2.0 -ironic-lib>=4.7.1 # Apache-2.0 +ironic-lib>=5.1.0 # Apache-2.0 Werkzeug>=1.0.1 # BSD License cryptography>=2.3 # BSD/Apache-2.0 |