summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArne Wiebalck <Arne.Wiebalck@cern.ch>2021-04-26 12:00:44 +0200
committerArne Wiebalck <Arne.Wiebalck@cern.ch>2021-05-01 10:36:20 +0200
commit6702fcaa43fda3384415f8e6c2848b8abc6ff15e (patch)
tree067ddc65dfdb0f6b2a0c9e5c1f176bc2c42efba6
parent9edb13d891de658124d11099e2287163a76f1050 (diff)
downloadironic-python-agent-6702fcaa43fda3384415f8e6c2848b8abc6ff15e.tar.gz
Burn-in: Add CPU step
Add a clean step for CPU burn-in via stress-ng. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42382 Change-Id: I14fd4164991fb94263757244f716b6bfe8edf875
-rw-r--r--doc/source/admin/hardware_managers.rst3
-rw-r--r--ironic_python_agent/burnin.py48
-rw-r--r--ironic_python_agent/hardware.py18
-rw-r--r--ironic_python_agent/tests/unit/test_burnin.py56
-rw-r--r--ironic_python_agent/tests/unit/test_hardware.py7
-rw-r--r--releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml7
6 files changed, 138 insertions, 1 deletions
diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst
index 579e6787..51a6e3e3 100644
--- a/doc/source/admin/hardware_managers.rst
+++ b/doc/source/admin/hardware_managers.rst
@@ -74,6 +74,9 @@ Known limitations:
Clean steps
-----------
+``deploy.burnin_cpu``
+ Stress-test the CPUs of a node via stress-ng for a configurable
+ amount of time. Disabled by default.
``deploy.erase_devices``
Securely erases all information from all recognized disk devices.
Relatively fast when secure ATA erase is available, otherwise can take
diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py
new file mode 100644
index 00000000..5a9275e2
--- /dev/null
+++ b/ironic_python_agent/burnin.py
@@ -0,0 +1,48 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ironic_lib import utils
+from oslo_concurrency import processutils
+from oslo_log import log
+
+from ironic_python_agent import errors
+
+LOG = log.getLogger(__name__)
+
+
+def stress_ng_cpu(node):
+ """Burn-in the CPU with stress-ng
+
+ Run stress-ng on a configurable number of CPUs for
+ a configurable amount of time. Without config use
+ all CPUs and stress them for 24 hours.
+
+ :param node: Ironic node object
+ :raises: CommandExecutionError if the execution of stress-ng fails.
+ """
+ info = node.get('driver_info', {})
+ cpu = info.get('agent_burnin_cpu_cpu', 0)
+ timeout = info.get('agent_burnin_cpu_timeout', 86400)
+
+ args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
+ '--metrics-brief')
+ LOG.debug('Burn-in stress_ng_cpu command: %s', args)
+
+ try:
+ _, err = utils.execute(*args)
+ # stress-ng reports on stderr only
+ LOG.info(err)
+ except (processutils.ProcessExecutionError, OSError) as e:
+ error_msg = ("stress-ng (cpu) failed with error %(err)s",
+ {'err': e})
+ LOG.error(error_msg)
+ raise errors.CommandExecutionError(error_msg)
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py
index abda1f1a..97c45449 100644
--- a/ironic_python_agent/hardware.py
+++ b/ironic_python_agent/hardware.py
@@ -38,6 +38,7 @@ import pyudev
import stevedore
import yaml
+from ironic_python_agent import burnin
from ironic_python_agent import encoding
from ironic_python_agent import errors
from ironic_python_agent.extensions import base as ext_base
@@ -1393,6 +1394,14 @@ class GenericHardwareManager(HardwareManager):
except OSError:
os.remove(filepath)
+ def burnin_cpu(self, node, ports):
+ """Burn-in the CPU
+
+ :param node: Ironic node object
+ :param ports: list of Ironic port objects
+ """
+ burnin.stress_ng_cpu(node)
+
def _shred_block_device(self, node, block_device):
"""Erase a block device using shred.
@@ -1865,7 +1874,14 @@ class GenericHardwareManager(HardwareManager):
'interface': 'raid',
'reboot_requested': False,
'abortable': True
- }
+ },
+ {
+ 'step': 'burnin_cpu',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
+ },
]
def get_deploy_steps(self, node, ports):
diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py
new file mode 100644
index 00000000..d8339b6c
--- /dev/null
+++ b/ironic_python_agent/tests/unit/test_burnin.py
@@ -0,0 +1,56 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from unittest import mock
+
+from ironic_lib import utils
+from oslo_concurrency import processutils
+
+from ironic_python_agent import burnin
+from ironic_python_agent import errors
+from ironic_python_agent.tests.unit import base
+
+
+@mock.patch.object(utils, 'execute', autospec=True)
+class TestBurnin(base.IronicAgentTest):
+
+ def test_stress_ng_cpu_default(self, mock_execute):
+
+ node = {'driver_info': {}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.stress_ng_cpu(node)
+
+ mock_execute.assert_called_once_with(
+ 'stress-ng', '--cpu', 0, '--timeout', 86400, '--metrics-brief')
+
+ def test_stress_ng_cpu_non_default(self, mock_execute):
+
+ node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
+ 'agent_burnin_cpu_timeout': 2911}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.stress_ng_cpu(node)
+
+ mock_execute.assert_called_once_with(
+ 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
+
+ def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
+
+ node = {'driver_info': {}}
+ mock_execute.side_effect = (['out', 'err'],
+ processutils.ProcessExecutionError())
+
+ burnin.stress_ng_cpu(node)
+
+ self.assertRaises(errors.CommandExecutionError,
+ burnin.stress_ng_cpu, node)
diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py
index 4135985e..019a14c0 100644
--- a/ironic_python_agent/tests/unit/test_hardware.py
+++ b/ironic_python_agent/tests/unit/test_hardware.py
@@ -149,6 +149,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'interface': 'raid',
'reboot_requested': False,
'abortable': True
+ },
+ {
+ 'step': 'burnin_cpu',
+ 'priority': 0,
+ 'interface': 'deploy',
+ 'reboot_requested': False,
+ 'abortable': True
}
]
clean_steps = self.hardware.get_clean_steps(self.node, [])
diff --git a/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml b/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml
new file mode 100644
index 00000000..9403a12c
--- /dev/null
+++ b/releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml
@@ -0,0 +1,7 @@
+---
+features:
+ - |
+ Adds a burn-in cleaning step 'burnin_cpu' to stress test CPUs for a
+ configurable amount of time with stress-ng. To use this step,
+ stress-ng needs to be installed on the RAM disk.
+