summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Anders <janders@redhat.com>2021-11-22 15:59:50 +1000
committerJacob Anders <janders@redhat.com>2022-03-15 19:00:25 +1000
commitc5f7f18bcb2e1876b0712c691996346620ffc920 (patch)
tree8842a50d00f795bbff6f09241beeb8b4b394648d
parentcf4644312095ff836c522569ba0a66666bf8857d (diff)
downloadironic-python-agent-c5f7f18bcb2e1876b0712c691996346620ffc920.tar.gz
Improve efficiency of storage cleaning in mixed media envs
https://storyboard.openstack.org/#!/story/2008290 added support for NVMe-native storage cleaning, greatly improving storage clean times on NVMe-based nodes as well as reducing device wear. This is a follow up change which aims to make further improvements to cleaning efficiency in mixed NVMe-HDD environments. This is achieved by combining NVMe-native cleaning methods on NVMe devices with traditional metadata clean on non-NVMe devices. Story: 2009264 Task: 43498 Change-Id: I445d8f4aaa6cd191d2e540032aed3148fdbff341
-rw-r--r--ironic_python_agent/hardware.py91
-rw-r--r--ironic_python_agent/tests/unit/test_hardware.py41
-rw-r--r--releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml8
3 files changed, 124 insertions, 16 deletions
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py
index ceafe31e..d02620c6 100644
--- a/ironic_python_agent/hardware.py
+++ b/ironic_python_agent/hardware.py
@@ -1347,34 +1347,45 @@ class GenericHardwareManager(HardwareManager):
LOG.error(msg)
raise errors.IncompatibleHardwareMethodError(msg)
- def erase_devices_metadata(self, node, ports):
- """Attempt to erase the disk devices metadata.
-
- :param node: Ironic node object
- :param ports: list of Ironic port objects
- :raises BlockDeviceEraseError: when there's an error erasing the
- block device
- """
+ def _list_erasable_devices(self):
block_devices = self.list_block_devices(include_partitions=True)
# NOTE(coreywright): Reverse sort by device name so a partition (eg
# sda1) is processed before it disappears when its associated disk (eg
# sda) has its partition table erased and the kernel notified.
block_devices.sort(key=lambda dev: dev.name, reverse=True)
- erase_errors = {}
+ erasable_devices = []
for dev in block_devices:
if self._is_virtual_media_device(dev):
- LOG.info("Skipping metadata erase of virtual media device %s",
+ LOG.info("Skipping erasure of virtual media device %s",
dev.name)
continue
if self._is_linux_raid_member(dev):
- LOG.info("Skipping metadata erase of RAID member device %s",
+ LOG.info("Skipping erasure of RAID member device %s",
dev.name)
continue
if self._is_read_only_device(dev):
- LOG.info("Skipping metadata erase of read-only device %s",
+ LOG.info("Skipping erasure of read-only device %s",
dev.name)
continue
+ erasable_devices.append(dev)
+
+ return erasable_devices
+
+ def erase_devices_metadata(self, node, ports):
+ """Attempt to erase the disk devices metadata.
+ :param node: Ironic node object
+ :param ports: list of Ironic port objects
+ :raises BlockDeviceEraseError: when there's an error erasing the
+ block device
+ """
+ block_devices = self.list_block_devices(include_partitions=True)
+ # NOTE(coreywright): Reverse sort by device name so a partition (eg
+ # sda1) is processed before it disappears when its associated disk (eg
+ # sda) has its partition table erased and the kernel notified.
+ block_devices.sort(key=lambda dev: dev.name, reverse=True)
+ erase_errors = {}
+ for dev in self._list_erasable_devices():
try:
disk_utils.destroy_disk_metadata(dev.name, node['uuid'])
except processutils.ProcessExecutionError as e:
@@ -1388,6 +1399,55 @@ class GenericHardwareManager(HardwareManager):
for k, v in erase_errors.items()]))
raise errors.BlockDeviceEraseError(excpt_msg)
+ def erase_devices_express(self, node, ports):
+ """Attempt to perform time-optimised disk erasure:
+
+ for NVMe devices, perform NVMe Secure Erase if supported. For other
+ devices, perform metadata erasure
+
+ :param node: Ironic node object
+ :param ports: list of Ironic port objects
+ :raises BlockDeviceEraseError: when there's an error erasing the
+ block device
+ """
+ erase_errors = {}
+ info = node.get('driver_internal_info', {})
+ if not self._list_erasable_devices:
+ LOG.debug("No erasable devices have been found.")
+ return
+ for dev in self._list_erasable_devices():
+ try:
+ if self._is_nvme(dev):
+ execute_nvme_erase = info.get(
+ 'agent_enable_nvme_secure_erase', True)
+ if execute_nvme_erase and self._nvme_erase(dev):
+ continue
+ except errors.BlockDeviceEraseError as e:
+ LOG.error('Failed to securely erase device "%(dev)s". '
+ 'Error: %(error)s, falling back to metadata '
+ 'clean', {'dev': dev.name, 'error': e})
+ secure_erase_error = e
+ try:
+ disk_utils.destroy_disk_metadata(dev.name, node['uuid'])
+ except processutils.ProcessExecutionError as e:
+ LOG.error('Failed to erase the metadata on device '
+ '"%(dev)s". Error: %(error)s',
+ {'dev': dev.name, 'error': e})
+ if secure_erase_error:
+ erase_errors[dev.name] = (
+ "Secure erase failed: %s. "
+ "Fallback to metadata erase also failed: %s.",
+ secure_erase_error, e)
+ else:
+ erase_errors[dev.name] = e
+
+ if erase_errors:
+ excpt_msg = ('Failed to conduct an express erase on '
+ 'the device(s): %s' % '\n'.join('"%s": %s' % item
+ for item in
+ erase_errors.items()))
+ raise errors.BlockDeviceEraseError(excpt_msg)
+
def _find_pstore_mount_point(self):
"""Find the pstore mount point by scanning /proc/mounts.
@@ -1941,6 +2001,13 @@ class GenericHardwareManager(HardwareManager):
'abortable': True
},
{
+ 'step': 'erase_devices_express',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
+ },
+ {
'step': 'erase_pstore',
'priority': 0,
'interface': 'deploy',
diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py
index 7b19931b..e02c908f 100644
--- a/ironic_python_agent/tests/unit/test_hardware.py
+++ b/ironic_python_agent/tests/unit/test_hardware.py
@@ -130,6 +130,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'abortable': True
},
{
+ 'step': 'erase_devices_express',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
+ },
+ {
'step': 'erase_pstore',
'priority': 0,
'interface': 'deploy',
@@ -2096,6 +2103,32 @@ class TestGenericHardwareManager(base.IronicAgentTest):
])
@mock.patch.object(il_utils, 'execute', autospec=True)
+ @mock.patch.object(disk_utils, 'destroy_disk_metadata', autospec=True)
+ @mock.patch.object(hardware.GenericHardwareManager,
+ '_nvme_erase', autospec=True)
+ @mock.patch.object(hardware.GenericHardwareManager,
+ '_list_erasable_devices', autospec=True)
+ def test_erase_devices_express(
+ self, mock_list_erasable_devices, mock_nvme_erase,
+ mock_destroy_disk_metadata, mock_execute):
+ block_devices = [
+ hardware.BlockDevice('/dev/sda', 'sata', 65535, False),
+ hardware.BlockDevice('/dev/md0', 'raid-device', 32767, False),
+ hardware.BlockDevice('/dev/nvme0n1', 'nvme', 32767, False),
+ hardware.BlockDevice('/dev/nvme1n1', 'nvme', 32767, False)
+ ]
+ mock_list_erasable_devices.return_value = list(block_devices)
+
+ self.hardware.erase_devices_express(self.node, [])
+ self.assertEqual([mock.call(self.hardware, block_devices[2]),
+ mock.call(self.hardware, block_devices[3])],
+ mock_nvme_erase.call_args_list)
+ self.assertEqual([mock.call('/dev/sda', self.node['uuid']),
+ mock.call('/dev/md0', self.node['uuid'])],
+ mock_destroy_disk_metadata.call_args_list)
+ mock_list_erasable_devices.assert_called_with(self.hardware)
+
+ @mock.patch.object(il_utils, 'execute', autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'_is_virtual_media_device', autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
@@ -2128,8 +2161,8 @@ class TestGenericHardwareManager(base.IronicAgentTest):
mock.call('/dev/sda', self.node['uuid']),
mock.call('/dev/md0', self.node['uuid'])],
mock_metadata.call_args_list)
- mock_list_devs.assert_called_once_with(self.hardware,
- include_partitions=True)
+ mock_list_devs.assert_called_with(self.hardware,
+ include_partitions=True)
self.assertEqual([mock.call(self.hardware, block_devices[0]),
mock.call(self.hardware, block_devices[1]),
mock.call(self.hardware, block_devices[4]),
@@ -2176,8 +2209,8 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.assertEqual([mock.call('/dev/sdb', self.node['uuid']),
mock.call('/dev/sda', self.node['uuid'])],
mock_metadata.call_args_list)
- mock_list_devs.assert_called_once_with(self.hardware,
- include_partitions=True)
+ mock_list_devs.assert_called_with(self.hardware,
+ include_partitions=True)
self.assertEqual([mock.call(self.hardware, block_devices[1]),
mock.call(self.hardware, block_devices[0])],
mock__is_vmedia.call_args_list)
diff --git a/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml b/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml
new file mode 100644
index 00000000..790d1189
--- /dev/null
+++ b/releasenotes/notes/add-erase-devices-express-1df107c75f2b3627.yaml
@@ -0,0 +1,8 @@
+---
+features:
+ - |
+ Adds support for express cleaning mode where hardware-assisted, fast and
+ secure data erasure is performed on NVMe devices that support it,
+ while other devices fall back to erase_devices_metadata.
+ The goal of this feature is to enable express node cleaning in
+ environments with hybrid storage configuration (e.g. NVMe + HDD).