diff options
author | Jacob Anders <janders@redhat.com> | 2021-11-22 15:59:50 +1000 |
---|---|---|
committer | Jacob Anders <janders@redhat.com> | 2022-03-15 19:00:25 +1000 |
commit | c5f7f18bcb2e1876b0712c691996346620ffc920 (patch) | |
tree | 8842a50d00f795bbff6f09241beeb8b4b394648d | |
parent | cf4644312095ff836c522569ba0a66666bf8857d (diff) | |
download | ironic-python-agent-c5f7f18bcb2e1876b0712c691996346620ffc920.tar.gz |
Improve efficiency of storage cleaning in mixed media envs
https://storyboard.openstack.org/#!/story/2008290 added support
for NVMe-native storage cleaning, greatly improving storage clean
times on NVMe-based nodes as well as reducing device wear.
This is a follow up change which aims to make further improvements
to cleaning efficiency in mixed NVMe-HDD environments. This is
achieved by combining NVMe-native cleaning methods on NVMe devices
with traditional metadata clean on non-NVMe devices.
Story: 2009264
Task: 43498
Change-Id: I445d8f4aaa6cd191d2e540032aed3148fdbff341
-rw-r--r-- | ironic_python_agent/hardware.py | 91 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_hardware.py | 41 | ||||
-rw-r--r-- | releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml | 8 |
3 files changed, 124 insertions, 16 deletions
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index ceafe31e..d02620c6 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1347,34 +1347,45 @@ class GenericHardwareManager(HardwareManager): LOG.error(msg) raise errors.IncompatibleHardwareMethodError(msg) - def erase_devices_metadata(self, node, ports): - """Attempt to erase the disk devices metadata. - - :param node: Ironic node object - :param ports: list of Ironic port objects - :raises BlockDeviceEraseError: when there's an error erasing the - block device - """ + def _list_erasable_devices(self): block_devices = self.list_block_devices(include_partitions=True) # NOTE(coreywright): Reverse sort by device name so a partition (eg # sda1) is processed before it disappears when its associated disk (eg # sda) has its partition table erased and the kernel notified. block_devices.sort(key=lambda dev: dev.name, reverse=True) - erase_errors = {} + erasable_devices = [] for dev in block_devices: if self._is_virtual_media_device(dev): - LOG.info("Skipping metadata erase of virtual media device %s", + LOG.info("Skipping erasure of virtual media device %s", dev.name) continue if self._is_linux_raid_member(dev): - LOG.info("Skipping metadata erase of RAID member device %s", + LOG.info("Skipping erasure of RAID member device %s", dev.name) continue if self._is_read_only_device(dev): - LOG.info("Skipping metadata erase of read-only device %s", + LOG.info("Skipping erasure of read-only device %s", dev.name) continue + erasable_devices.append(dev) + + return erasable_devices + + def erase_devices_metadata(self, node, ports): + """Attempt to erase the disk devices metadata. + :param node: Ironic node object + :param ports: list of Ironic port objects + :raises BlockDeviceEraseError: when there's an error erasing the + block device + """ + block_devices = self.list_block_devices(include_partitions=True) + # NOTE(coreywright): Reverse sort by device name so a partition (eg + # sda1) is processed before it disappears when its associated disk (eg + # sda) has its partition table erased and the kernel notified. + block_devices.sort(key=lambda dev: dev.name, reverse=True) + erase_errors = {} + for dev in self._list_erasable_devices(): try: disk_utils.destroy_disk_metadata(dev.name, node['uuid']) except processutils.ProcessExecutionError as e: @@ -1388,6 +1399,55 @@ class GenericHardwareManager(HardwareManager): for k, v in erase_errors.items()])) raise errors.BlockDeviceEraseError(excpt_msg) + def erase_devices_express(self, node, ports): + """Attempt to perform time-optimised disk erasure: + + for NVMe devices, perform NVMe Secure Erase if supported. For other + devices, perform metadata erasure + + :param node: Ironic node object + :param ports: list of Ironic port objects + :raises BlockDeviceEraseError: when there's an error erasing the + block device + """ + erase_errors = {} + info = node.get('driver_internal_info', {}) + if not self._list_erasable_devices: + LOG.debug("No erasable devices have been found.") + return + for dev in self._list_erasable_devices(): + try: + if self._is_nvme(dev): + execute_nvme_erase = info.get( + 'agent_enable_nvme_secure_erase', True) + if execute_nvme_erase and self._nvme_erase(dev): + continue + except errors.BlockDeviceEraseError as e: + LOG.error('Failed to securely erase device "%(dev)s". ' + 'Error: %(error)s, falling back to metadata ' + 'clean', {'dev': dev.name, 'error': e}) + secure_erase_error = e + try: + disk_utils.destroy_disk_metadata(dev.name, node['uuid']) + except processutils.ProcessExecutionError as e: + LOG.error('Failed to erase the metadata on device ' + '"%(dev)s". Error: %(error)s', + {'dev': dev.name, 'error': e}) + if secure_erase_error: + erase_errors[dev.name] = ( + "Secure erase failed: %s. " + "Fallback to metadata erase also failed: %s.", + secure_erase_error, e) + else: + erase_errors[dev.name] = e + + if erase_errors: + excpt_msg = ('Failed to conduct an express erase on ' + 'the device(s): %s' % '\n'.join('"%s": %s' % item + for item in + erase_errors.items())) + raise errors.BlockDeviceEraseError(excpt_msg) + def _find_pstore_mount_point(self): """Find the pstore mount point by scanning /proc/mounts. @@ -1941,6 +2001,13 @@ class GenericHardwareManager(HardwareManager): 'abortable': True }, { + 'step': 'erase_devices_express', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, + { 'step': 'erase_pstore', 'priority': 0, 'interface': 'deploy', diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 7b19931b..e02c908f 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -130,6 +130,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): 'abortable': True }, { + 'step': 'erase_devices_express', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, + { 'step': 'erase_pstore', 'priority': 0, 'interface': 'deploy', @@ -2096,6 +2103,32 @@ class TestGenericHardwareManager(base.IronicAgentTest): ]) @mock.patch.object(il_utils, 'execute', autospec=True) + @mock.patch.object(disk_utils, 'destroy_disk_metadata', autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + '_nvme_erase', autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + '_list_erasable_devices', autospec=True) + def test_erase_devices_express( + self, mock_list_erasable_devices, mock_nvme_erase, + mock_destroy_disk_metadata, mock_execute): + block_devices = [ + hardware.BlockDevice('/dev/sda', 'sata', 65535, False), + hardware.BlockDevice('/dev/md0', 'raid-device', 32767, False), + hardware.BlockDevice('/dev/nvme0n1', 'nvme', 32767, False), + hardware.BlockDevice('/dev/nvme1n1', 'nvme', 32767, False) + ] + mock_list_erasable_devices.return_value = list(block_devices) + + self.hardware.erase_devices_express(self.node, []) + self.assertEqual([mock.call(self.hardware, block_devices[2]), + mock.call(self.hardware, block_devices[3])], + mock_nvme_erase.call_args_list) + self.assertEqual([mock.call('/dev/sda', self.node['uuid']), + mock.call('/dev/md0', self.node['uuid'])], + mock_destroy_disk_metadata.call_args_list) + mock_list_erasable_devices.assert_called_with(self.hardware) + + @mock.patch.object(il_utils, 'execute', autospec=True) @mock.patch.object(hardware.GenericHardwareManager, '_is_virtual_media_device', autospec=True) @mock.patch.object(hardware.GenericHardwareManager, @@ -2128,8 +2161,8 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call('/dev/sda', self.node['uuid']), mock.call('/dev/md0', self.node['uuid'])], mock_metadata.call_args_list) - mock_list_devs.assert_called_once_with(self.hardware, - include_partitions=True) + mock_list_devs.assert_called_with(self.hardware, + include_partitions=True) self.assertEqual([mock.call(self.hardware, block_devices[0]), mock.call(self.hardware, block_devices[1]), mock.call(self.hardware, block_devices[4]), @@ -2176,8 +2209,8 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.assertEqual([mock.call('/dev/sdb', self.node['uuid']), mock.call('/dev/sda', self.node['uuid'])], mock_metadata.call_args_list) - mock_list_devs.assert_called_once_with(self.hardware, - include_partitions=True) + mock_list_devs.assert_called_with(self.hardware, + include_partitions=True) self.assertEqual([mock.call(self.hardware, block_devices[1]), mock.call(self.hardware, block_devices[0])], mock__is_vmedia.call_args_list) diff --git a/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml b/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml new file mode 100644 index 00000000..790d1189 --- /dev/null +++ b/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Adds support for express cleaning mode where hardware-assisted, fast and + secure data erasure is performed on NVMe devices that support it, + while other devices fall back to erase_devices_metadata. + The goal of this feature is to enable express node cleaning in + environments with hybrid storage configuration (e.g. NVMe + HDD). |