diff options
author | Arne Wiebalck <Arne.Wiebalck@cern.ch> | 2021-04-26 12:00:44 +0200 |
---|---|---|
committer | Arne Wiebalck <Arne.Wiebalck@cern.ch> | 2021-05-01 10:36:20 +0200 |
commit | 6702fcaa43fda3384415f8e6c2848b8abc6ff15e (patch) | |
tree | 067ddc65dfdb0f6b2a0c9e5c1f176bc2c42efba6 | |
parent | 9edb13d891de658124d11099e2287163a76f1050 (diff) | |
download | ironic-python-agent-6702fcaa43fda3384415f8e6c2848b8abc6ff15e.tar.gz |
Burn-in: Add CPU step
Add a clean step for CPU burn-in via stress-ng. Get basic
run parameters from the node's driver_info.
Story: #2007523
Task: #42382
Change-Id: I14fd4164991fb94263757244f716b6bfe8edf875
-rw-r--r-- | doc/source/admin/hardware_managers.rst | 3 | ||||
-rw-r--r-- | ironic_python_agent/burnin.py | 48 | ||||
-rw-r--r-- | ironic_python_agent/hardware.py | 18 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_burnin.py | 56 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_hardware.py | 7 | ||||
-rw-r--r-- | releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml | 7 |
6 files changed, 138 insertions, 1 deletions
diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst index 579e6787..51a6e3e3 100644 --- a/doc/source/admin/hardware_managers.rst +++ b/doc/source/admin/hardware_managers.rst @@ -74,6 +74,9 @@ Known limitations: Clean steps ----------- +``deploy.burnin_cpu`` + Stress-test the CPUs of a node via stress-ng for a configurable + amount of time. Disabled by default. ``deploy.erase_devices`` Securely erases all information from all recognized disk devices. Relatively fast when secure ATA erase is available, otherwise can take diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py new file mode 100644 index 00000000..5a9275e2 --- /dev/null +++ b/ironic_python_agent/burnin.py @@ -0,0 +1,48 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ironic_lib import utils +from oslo_concurrency import processutils +from oslo_log import log + +from ironic_python_agent import errors + +LOG = log.getLogger(__name__) + + +def stress_ng_cpu(node): + """Burn-in the CPU with stress-ng + + Run stress-ng on a configurable number of CPUs for + a configurable amount of time. Without config use + all CPUs and stress them for 24 hours. + + :param node: Ironic node object + :raises: CommandExecutionError if the execution of stress-ng fails. + """ + info = node.get('driver_info', {}) + cpu = info.get('agent_burnin_cpu_cpu', 0) + timeout = info.get('agent_burnin_cpu_timeout', 86400) + + args = ('stress-ng', '--cpu', cpu, '--timeout', timeout, + '--metrics-brief') + LOG.debug('Burn-in stress_ng_cpu command: %s', args) + + try: + _, err = utils.execute(*args) + # stress-ng reports on stderr only + LOG.info(err) + except (processutils.ProcessExecutionError, OSError) as e: + error_msg = ("stress-ng (cpu) failed with error %(err)s", + {'err': e}) + LOG.error(error_msg) + raise errors.CommandExecutionError(error_msg) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index abda1f1a..97c45449 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -38,6 +38,7 @@ import pyudev import stevedore import yaml +from ironic_python_agent import burnin from ironic_python_agent import encoding from ironic_python_agent import errors from ironic_python_agent.extensions import base as ext_base @@ -1393,6 +1394,14 @@ class GenericHardwareManager(HardwareManager): except OSError: os.remove(filepath) + def burnin_cpu(self, node, ports): + """Burn-in the CPU + + :param node: Ironic node object + :param ports: list of Ironic port objects + """ + burnin.stress_ng_cpu(node) + def _shred_block_device(self, node, block_device): """Erase a block device using shred. @@ -1865,7 +1874,14 @@ class GenericHardwareManager(HardwareManager): 'interface': 'raid', 'reboot_requested': False, 'abortable': True - } + }, + { + 'step': 'burnin_cpu', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, ] def get_deploy_steps(self, node, ports): diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py new file mode 100644 index 00000000..d8339b6c --- /dev/null +++ b/ironic_python_agent/tests/unit/test_burnin.py @@ -0,0 +1,56 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from unittest import mock + +from ironic_lib import utils +from oslo_concurrency import processutils + +from ironic_python_agent import burnin +from ironic_python_agent import errors +from ironic_python_agent.tests.unit import base + + +@mock.patch.object(utils, 'execute', autospec=True) +class TestBurnin(base.IronicAgentTest): + + def test_stress_ng_cpu_default(self, mock_execute): + + node = {'driver_info': {}} + mock_execute.return_value = (['out', 'err']) + + burnin.stress_ng_cpu(node) + + mock_execute.assert_called_once_with( + 'stress-ng', '--cpu', 0, '--timeout', 86400, '--metrics-brief') + + def test_stress_ng_cpu_non_default(self, mock_execute): + + node = {'driver_info': {'agent_burnin_cpu_cpu': 3, + 'agent_burnin_cpu_timeout': 2911}} + mock_execute.return_value = (['out', 'err']) + + burnin.stress_ng_cpu(node) + + mock_execute.assert_called_once_with( + 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief') + + def test_stress_ng_cpu_no_stress_ng(self, mock_execute): + + node = {'driver_info': {}} + mock_execute.side_effect = (['out', 'err'], + processutils.ProcessExecutionError()) + + burnin.stress_ng_cpu(node) + + self.assertRaises(errors.CommandExecutionError, + burnin.stress_ng_cpu, node) diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 4135985e..019a14c0 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -149,6 +149,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): 'interface': 'raid', 'reboot_requested': False, 'abortable': True + }, + { + 'step': 'burnin_cpu', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True } ] clean_steps = self.hardware.get_clean_steps(self.node, []) diff --git a/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml b/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml new file mode 100644 index 00000000..9403a12c --- /dev/null +++ b/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Adds a burn-in cleaning step 'burnin_cpu' to stress test CPUs for a + configurable amount of time with stress-ng. To use this step, + stress-ng needs to be installed on the RAM disk. + |