summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArne Wiebalck <Arne.Wiebalck@cern.ch>2021-04-30 10:44:53 +0200
committerArne Wiebalck <Arne.Wiebalck@cern.ch>2021-05-01 10:36:58 +0200
commit5c222560f07fff9478b063d401f4415e1139f496 (patch)
tree2482232ebea52b138b1feef50255298a8087ceb5
parent6702fcaa43fda3384415f8e6c2848b8abc6ff15e (diff)
downloadironic-python-agent-5c222560f07fff9478b063d401f4415e1139f496.tar.gz
Burn-in: Add memory step
Add a clean step for memory burn-in via stress-ng. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42383 Change-Id: I33a83968c9f87cf795ec7ec922bce98b52c5181c
-rw-r--r--doc/source/admin/hardware_managers.rst3
-rw-r--r--ironic_python_agent/burnin.py32
-rw-r--r--ironic_python_agent/hardware.py15
-rw-r--r--ironic_python_agent/tests/unit/test_burnin.py35
-rw-r--r--ironic_python_agent/tests/unit/test_hardware.py7
-rw-r--r--releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml7
6 files changed, 99 insertions, 0 deletions
diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst
index 51a6e3e3..2144cf9f 100644
--- a/doc/source/admin/hardware_managers.rst
+++ b/doc/source/admin/hardware_managers.rst
@@ -77,6 +77,9 @@ Clean steps
``deploy.burnin_cpu``
Stress-test the CPUs of a node via stress-ng for a configurable
amount of time. Disabled by default.
+``deploy.burnin_memory``
+ Stress-test the memory of a node via stress-ng for a configurable
+ amount of time. Disabled by default.
``deploy.erase_devices``
Securely erases all information from all recognized disk devices.
Relatively fast when secure ATA erase is available, otherwise can take
diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py
index 5a9275e2..bd654547 100644
--- a/ironic_python_agent/burnin.py
+++ b/ironic_python_agent/burnin.py
@@ -46,3 +46,35 @@ def stress_ng_cpu(node):
{'err': e})
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)
+
+
+def stress_ng_vm(node):
+ """Burn-in the memory with the vm stressor in stress-ng
+
+ Run stress-ng with a configurable number of workers on
+ a configurable amount of the available memory for
+ a configurable amount of time. Without config use
+ as many workers as CPUs, 98% of the memory and stress
+ it for 24 hours.
+
+ :param node: Ironic node object
+ :raises: CommandExecutionError if the execution of stress-ng fails.
+ """
+ info = node.get('driver_info', {})
+ vm = info.get('agent_burnin_vm_vm', 0)
+ vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
+ timeout = info.get('agent_burnin_vm_timeout', 86400)
+
+ args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
+ '--timeout', timeout, '--metrics-brief')
+ LOG.debug('Burn-in stress_ng_vm command: %s', args)
+
+ try:
+ _, err = utils.execute(*args)
+ # stress-ng reports on stderr only
+ LOG.info(err)
+ except (processutils.ProcessExecutionError, OSError) as e:
+ error_msg = ("stress-ng (vm) failed with error %(err)s",
+ {'err': e})
+ LOG.error(error_msg)
+ raise errors.CommandExecutionError(error_msg)
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py
index 97c45449..0180adae 100644
--- a/ironic_python_agent/hardware.py
+++ b/ironic_python_agent/hardware.py
@@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager):
"""
burnin.stress_ng_cpu(node)
+ def burnin_memory(self, node, ports):
+ """Burn-in the memory
+
+ :param node: Ironic node object
+ :param ports: list of Ironic port objects
+ """
+ burnin.stress_ng_vm(node)
+
def _shred_block_device(self, node, block_device):
"""Erase a block device using shred.
@@ -1882,6 +1890,13 @@ class GenericHardwareManager(HardwareManager):
'reboot_requested': False,
'abortable': True
},
+ {
+ 'step': 'burnin_memory',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
+ },
]
def get_deploy_steps(self, node, ports):
diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py
index d8339b6c..7f411b9f 100644
--- a/ironic_python_agent/tests/unit/test_burnin.py
+++ b/ironic_python_agent/tests/unit/test_burnin.py
@@ -54,3 +54,38 @@ class TestBurnin(base.IronicAgentTest):
self.assertRaises(errors.CommandExecutionError,
burnin.stress_ng_cpu, node)
+
+ def test_stress_ng_vm_default(self, mock_execute):
+
+ node = {'driver_info': {}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.stress_ng_vm(node)
+
+ mock_execute.assert_called_once_with(
+ 'stress-ng', '--vm', 0, '--vm-bytes', '98%',
+ '--timeout', 86400, '--metrics-brief')
+
+ def test_stress_ng_vm_non_default(self, mock_execute):
+
+ node = {'driver_info': {'agent_burnin_vm_vm': 2,
+ 'agent_burnin_vm_vm-bytes': '25%',
+ 'agent_burnin_vm_timeout': 120}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.stress_ng_vm(node)
+
+ mock_execute.assert_called_once_with(
+ 'stress-ng', '--vm', 2, '--vm-bytes', '25%',
+ '--timeout', 120, '--metrics-brief')
+
+ def test_stress_ng_vm_no_stress_ng(self, mock_execute):
+
+ node = {'driver_info': {}}
+ mock_execute.side_effect = (['out', 'err'],
+ processutils.ProcessExecutionError())
+
+ burnin.stress_ng_vm(node)
+
+ self.assertRaises(errors.CommandExecutionError,
+ burnin.stress_ng_vm, node)
diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py
index 019a14c0..5884344c 100644
--- a/ironic_python_agent/tests/unit/test_hardware.py
+++ b/ironic_python_agent/tests/unit/test_hardware.py
@@ -156,6 +156,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
+ },
+ {
+ 'step': 'burnin_memory',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
}
]
clean_steps = self.hardware.get_clean_steps(self.node, [])
diff --git a/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml b/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml
new file mode 100644
index 00000000..8aeb854e
--- /dev/null
+++ b/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml
@@ -0,0 +1,7 @@
+---
+features:
+ - |
+ Adds a burn-in cleaning step 'burnin_memory' to stress test memory for a
+ configurable amount of time with stress-ng. To use this step, stress-ng
+ needs to be installed on the RAM disk.
+