summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArne Wiebalck <Arne.Wiebalck@cern.ch>2021-12-01 12:22:43 +0100
committerArne Wiebalck <Arne.Wiebalck@cern.ch>2021-12-08 17:47:19 +0100
commite751218059f86d788554883ef8a5db99efce44f4 (patch)
tree5c76588383b961f777f008cff304cf1c1aff7f48
parent87a42cc887a2c18855befeb9411ded759526de95 (diff)
downloadironic-python-agent-e751218059f86d788554883ef8a5db99efce44f4.tar.gz
Burn-in: Add options for named log files
In order to ease logging of the various burn-in steps, this patch proposes options to define the outpout files for all burn-in steps: {'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network', 'agent_burnin_fio_disk'}_outputfile via a node's driver-info. Story: #2007523 Task: #44102 Change-Id: I327cae5949d38e738d3c535487b3795d00ad8f1e
-rw-r--r--ironic_python_agent/burnin.py40
-rw-r--r--ironic_python_agent/tests/unit/test_burnin.py91
-rw-r--r--releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml7
3 files changed, 117 insertions, 21 deletions
diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py
index c97eea96..c081869c 100644
--- a/ironic_python_agent/burnin.py
+++ b/ironic_python_agent/burnin.py
@@ -38,9 +38,13 @@ def stress_ng_cpu(node):
info = node.get('driver_info', {})
cpu = info.get('agent_burnin_cpu_cpu', 0)
timeout = info.get('agent_burnin_cpu_timeout', 86400)
+ outputfile = info.get('agent_burnin_cpu_outputfile', None)
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
'--metrics-brief')
+ if outputfile:
+ args += ('--log-file', outputfile,)
+
LOG.debug('Burn-in stress_ng_cpu command: %s', args)
try:
@@ -69,9 +73,13 @@ def stress_ng_vm(node):
vm = info.get('agent_burnin_vm_vm', 0)
vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
timeout = info.get('agent_burnin_vm_timeout', 86400)
+ outputfile = info.get('agent_burnin_vm_outputfile', None)
args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
'--timeout', timeout, '--metrics-brief')
+ if outputfile:
+ args += ('--log-file', outputfile,)
+
LOG.debug('Burn-in stress_ng_vm command: %s', args)
try:
@@ -97,11 +105,14 @@ def fio_disk(node):
# 4 iterations, same as badblock's default
loops = info.get('agent_burnin_fio_disk_loops', 4)
runtime = info.get('agent_burnin_fio_disk_runtime', 0)
+ outputfile = info.get('agent_burnin_fio_disk_outputfile', None)
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', loops, '--runtime', runtime, '--time_based']
+ if outputfile:
+ args.extend(['--output-format', 'json', '--output', outputfile])
devices = hardware.list_all_block_devices()
for device in devices:
@@ -119,7 +130,7 @@ def fio_disk(node):
raise errors.CommandExecutionError(error_msg)
-def _do_fio_network(writer, runtime, partner):
+def _do_fio_network(writer, runtime, partner, outputfile):
args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1,
'--group_reporting', '--gtod_reduce', 1, '--numjobs', 16]
@@ -129,6 +140,8 @@ def _do_fio_network(writer, runtime, partner):
else:
xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner]
args.extend(xargs)
+ if outputfile:
+ args.extend(['--output-format', 'json', '--output', outputfile])
while True:
LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args)))
@@ -137,13 +150,17 @@ def _do_fio_network(writer, runtime, partner):
# fio reports on stdout
LOG.info(out)
break
- except (processutils.ProcessExecutionError, OSError) as e:
+ except processutils.ProcessExecutionError as e:
error_msg = "fio (network) failed with error %s" % e
LOG.error(error_msg)
- # while the writer blocks in fio, the reader fails with
+ if writer:
+ raise errors.CommandExecutionError(error_msg)
+ # While the writer blocks in fio, the reader fails with
# 'Connection {refused, timeout}' errors if the partner
- # is not ready, so we need to wait explicitly
- if not writer and 'Connection' in str(e):
+ # is not ready, so we need to wait explicitly. Using the
+ # exit code accounts for both, logging to stderr as well
+ # as to a file.
+ if e.exit_code == 16:
LOG.info("fio (network): reader retrying in %s seconds ...",
NETWORK_READER_CYCLE)
time.sleep(NETWORK_READER_CYCLE)
@@ -171,6 +188,7 @@ def fio_network(node):
info = node.get('driver_info', {})
runtime = info.get('agent_burnin_fio_network_runtime', 21600)
+ outputfile = info.get('agent_burnin_fio_network_outputfile', None)
# get our role and identify our partner
config = info.get('agent_burnin_fio_network_config')
@@ -190,6 +208,14 @@ def fio_network(node):
error_msg = ("fio (network) failed to find partner")
raise errors.CleaningError(error_msg)
- _do_fio_network(role == 'writer', runtime, partner)
+ logfilename = None
+ if outputfile:
+ logfilename = outputfile + '.' + role
+ _do_fio_network(role == 'writer', runtime, partner, logfilename)
+
LOG.debug("fio (network): first direction done, swapping roles ...")
- _do_fio_network(not role == 'writer', runtime, partner)
+
+ if outputfile:
+ irole = "reader" if (role == "writer") else "writer"
+ logfilename = outputfile + '.' + irole
+ _do_fio_network(not role == 'writer', runtime, partner, logfilename)
diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py
index 2258352e..12127576 100644
--- a/ironic_python_agent/tests/unit/test_burnin.py
+++ b/ironic_python_agent/tests/unit/test_burnin.py
@@ -36,14 +36,17 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_cpu_non_default(self, mock_execute):
- node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
- 'agent_burnin_cpu_timeout': 2911}}
+ node = {'driver_info': {
+ 'agent_burnin_cpu_cpu': 3,
+ 'agent_burnin_cpu_timeout': 2911,
+ 'agent_burnin_cpu_outputfile': '/var/log/burnin.cpu'}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_cpu(node)
mock_execute.assert_called_once_with(
- 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
+ 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief',
+ '--log-file', '/var/log/burnin.cpu')
def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
@@ -70,16 +73,19 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_vm_non_default(self, mock_execute):
- node = {'driver_info': {'agent_burnin_vm_vm': 2,
- 'agent_burnin_vm_vm-bytes': '25%',
- 'agent_burnin_vm_timeout': 120}}
+ node = {'driver_info': {
+ 'agent_burnin_vm_vm': 2,
+ 'agent_burnin_vm_vm-bytes': '25%',
+ 'agent_burnin_vm_timeout': 120,
+ 'agent_burnin_vm_outputfile': '/var/log/burnin.vm'}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with(
'stress-ng', '--vm', 2, '--vm-bytes', '25%',
- '--timeout', 120, '--metrics-brief')
+ '--timeout', 120, '--metrics-brief',
+ '--log-file', '/var/log/burnin.vm')
def test_stress_ng_vm_no_stress_ng(self, mock_execute):
@@ -115,8 +121,10 @@ class TestBurnin(base.IronicAgentTest):
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_default(self, mock_list, mock_execute):
- node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600,
- 'agent_burnin_fio_disk_loops': 5}}
+ node = {'driver_info': {
+ 'agent_burnin_fio_disk_runtime': 600,
+ 'agent_burnin_fio_disk_loops': 5,
+ 'agent_burnin_fio_disk_outputfile': '/var/log/burnin.disk'}}
mock_list.return_value = [
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
@@ -130,8 +138,9 @@ class TestBurnin(base.IronicAgentTest):
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
- '--loops', 5, '--runtime', 600, '--time_based', '--name',
- '/dev/sdj', '--name', '/dev/hdaa')
+ '--loops', 5, '--runtime', 600, '--time_based', '--output-format',
+ 'json', '--output', '/var/log/burnin.disk', '--name', '/dev/sdj',
+ '--name', '/dev/hdaa', )
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_fio(self, mock_list, mock_execute):
@@ -167,6 +176,33 @@ class TestBurnin(base.IronicAgentTest):
'--listen')]
mock_execute.assert_has_calls(expected_calls)
+ def test_fio_network_reader_w_logfile(self, mock_execute):
+
+ node = {'driver_info': {
+ 'agent_burnin_fio_network_runtime': 600,
+ 'agent_burnin_fio_network_config':
+ {'partner': 'host-002',
+ 'role': 'reader'},
+ 'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.fio_network(node)
+
+ expected_calls = [
+ mock.call('fio', '--ioengine', 'net', '--port', '9000',
+ '--fill_device', 1, '--group_reporting',
+ '--gtod_reduce', 1, '--numjobs', 16, '--name',
+ 'reader', '--rw', 'read', '--hostname', 'host-002',
+ '--output-format', 'json', '--output',
+ '/var/log/burnin.network.reader'),
+ mock.call('fio', '--ioengine', 'net', '--port', '9000',
+ '--fill_device', 1, '--group_reporting',
+ '--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
+ '--rw', 'write', '--runtime', 600, '--time_based',
+ '--listen', '--output-format', 'json', '--output',
+ '/var/log/burnin.network.writer')]
+ mock_execute.assert_has_calls(expected_calls)
+
def test_fio_network_writer(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_runtime': 600,
@@ -189,6 +225,33 @@ class TestBurnin(base.IronicAgentTest):
'reader', '--rw', 'read', '--hostname', 'host-001')]
mock_execute.assert_has_calls(expected_calls)
+ def test_fio_network_writer_w_logfile(self, mock_execute):
+
+ node = {'driver_info': {
+ 'agent_burnin_fio_network_runtime': 600,
+ 'agent_burnin_fio_network_config':
+ {'partner': 'host-001',
+ 'role': 'writer'},
+ 'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
+ mock_execute.return_value = (['out', 'err'])
+
+ burnin.fio_network(node)
+
+ expected_calls = [
+ mock.call('fio', '--ioengine', 'net', '--port', '9000',
+ '--fill_device', 1, '--group_reporting',
+ '--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
+ '--rw', 'write', '--runtime', 600, '--time_based',
+ '--listen', '--output-format', 'json', '--output',
+ '/var/log/burnin.network.writer'),
+ mock.call('fio', '--ioengine', 'net', '--port', '9000',
+ '--fill_device', 1, '--group_reporting',
+ '--gtod_reduce', 1, '--numjobs', 16, '--name',
+ 'reader', '--rw', 'read', '--hostname', 'host-001',
+ '--output-format', 'json', '--output',
+ '/var/log/burnin.network.reader')]
+ mock_execute.assert_has_calls(expected_calls)
+
def test_fio_network_no_fio(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_config':
@@ -226,11 +289,11 @@ class TestBurnin(base.IronicAgentTest):
{'partner': 'host-004', 'role': 'reader'}}}
# mock the infinite loop
mock_execute.side_effect = (processutils.ProcessExecutionError(
- 'Connection timeout'),
+ 'Connection timeout', exit_code=16),
processutils.ProcessExecutionError(
- 'Connection timeout'),
+ 'Connection timeout', exit_code=16),
processutils.ProcessExecutionError(
- 'Connection refused'),
+ 'Connection refused', exit_code=16),
['out', 'err'], # connected!
['out', 'err']) # reversed roles
diff --git a/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml b/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml
new file mode 100644
index 00000000..9391d69d
--- /dev/null
+++ b/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml
@@ -0,0 +1,7 @@
+---
+features:
+ - |
+ Add options to have named output files for the burn-in logging:
+ {'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network',
+ 'agent_burnin_fio_disk'}_outputfile. This should ease collecting
+ the output of the burn-in steps for analysis.