diff options
author | Dmitry Tantsur <dtantsur@redhat.com> | 2016-05-24 10:04:12 +0200 |
---|---|---|
committer | Dmitry Tantsur <divius.inside@gmail.com> | 2016-05-25 09:43:11 +0000 |
commit | 2ec82a4dcf278c6f7fd94a41998d877a82f76579 (patch) | |
tree | 4e6564895f6ac37023839b322d08484871505efd | |
parent | 748a8b00e6f78f47d0a6f94c5beaea7aa53e11c7 (diff) | |
download | ironic-python-agent-2ec82a4dcf278c6f7fd94a41998d877a82f76579.tar.gz |
Wait for at least one suitable disk to appear on start up
Some kernel modules take substantial time to initialize. For example,
with mpt2sas RAID driver inspection and deployment randomly fail
due to IPA starting before the driver finishes initialization.
This problem is probably impossible to solve in a generic case, as
modern Linux environment do not have a notion of "hardware is fully
initialized" moment. All hardware is essentially hotplug.
To solve it at least for the simplest case, this patch adds a wait loop
on start up waiting for at least one suitable disk to appear in inventory.
Note that root device hints are not considered, as the node might not
be known at that moment yet.
Change-Id: Id163ca28f7c140c302ea04947ded3f3c58b284de
Partial-Bug: #1582797
(cherry picked from commit c15ed6a48e36da26576242277aa531720fd15d2d)
-rw-r--r-- | ironic_python_agent/hardware.py | 23 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_agent.py | 6 | ||||
-rw-r--r-- | ironic_python_agent/tests/unit/test_hardware.py | 38 | ||||
-rw-r--r-- | releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml | 5 |
4 files changed, 71 insertions, 1 deletions
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index b45d1191..3efb7671 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -16,6 +16,7 @@ import abc import functools import os import shlex +import time import netifaces from oslo_concurrency import processutils @@ -38,6 +39,9 @@ UNIT_CONVERTER = pint.UnitRegistry(filename=None) UNIT_CONVERTER.define('MB = []') UNIT_CONVERTER.define('GB = 1024 MB') +_DISK_WAIT_ATTEMPTS = 10 +_DISK_WAIT_DELAY = 3 + def _get_device_vendor(dev): """Get the vendor name of a given device.""" @@ -394,8 +398,27 @@ class GenericHardwareManager(HardwareManager): self.sys_path = '/sys' def evaluate_hardware_support(self): + # Do some initialization before we declare ourself ready + self._wait_for_disks() return HardwareSupport.GENERIC + def _wait_for_disks(self): + # Wait for at least one suitable disk to show up, otherwise neither + # inspection not deployment have any chances to succeed. + for attempt in range(_DISK_WAIT_ATTEMPTS): + try: + block_devices = self.list_block_devices() + utils.guess_root_disk(block_devices) + except errors.DeviceNotFound: + LOG.debug('Still waiting for at least one disk to appear, ' + 'attempt %d of %d', attempt + 1, _DISK_WAIT_ATTEMPTS) + time.sleep(_DISK_WAIT_DELAY) + else: + break + else: + LOG.warning('No disks detected in %d seconds', + _DISK_WAIT_DELAY * _DISK_WAIT_ATTEMPTS) + def _get_interface_info(self, interface_name): addr_path = '{0}/class/net/{1}/address'.format(self.sys_path, interface_name) diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py index b51e6851..9885014a 100644 --- a/ironic_python_agent/tests/unit/test_agent.py +++ b/ironic_python_agent/tests/unit/test_agent.py @@ -127,6 +127,8 @@ class TestHeartbeater(test_base.BaseTestCase): self.assertEqual(2.7, self.heartbeater.error_delay) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) class TestBaseAgent(test_base.BaseTestCase): def setUp(self): @@ -294,6 +296,8 @@ class TestBaseAgent(test_base.BaseTestCase): self.agent.get_node_uuid) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) class TestAgentStandalone(test_base.BaseTestCase): def setUp(self): @@ -338,6 +342,8 @@ class TestAgentStandalone(test_base.BaseTestCase): self.assertFalse(self.agent.api_client.lookup_node.called) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) @mock.patch.object(socket, 'gethostbyname', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) class TestAdvertiseAddress(test_base.BaseTestCase): diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 5cb4a418..25782acd 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import time + import mock import netifaces -import os from oslo_concurrency import processutils from oslo_utils import units from oslotest import base as test_base @@ -915,6 +917,40 @@ class TestGenericHardwareManager(test_base.BaseTestCase): self.assertEqual('NEC', self.hardware.get_system_vendor_info().manufacturer) + @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices', + autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + @mock.patch.object(utils, 'guess_root_disk', autospec=True) + def test_evaluate_hw_waits_for_disks(self, mocked_root_dev, mocked_sleep, + mocked_block_dev): + mocked_root_dev.side_effect = [ + errors.DeviceNotFound('boom'), + None + ] + + result = self.hardware.evaluate_hardware_support() + + self.assertEqual(hardware.HardwareSupport.GENERIC, result) + mocked_root_dev.assert_called_with(mocked_block_dev.return_value) + self.assertEqual(2, mocked_root_dev.call_count) + mocked_sleep.assert_called_once_with(hardware._DISK_WAIT_DELAY) + + @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices', + autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + @mock.patch.object(utils, 'guess_root_disk', autospec=True) + def test_evaluate_hw_disks_timeout(self, mocked_root_dev, mocked_sleep, + mocked_block_dev): + mocked_root_dev.side_effect = errors.DeviceNotFound('boom') + + result = self.hardware.evaluate_hardware_support() + + self.assertEqual(hardware.HardwareSupport.GENERIC, result) + mocked_root_dev.assert_called_with(mocked_block_dev.return_value) + self.assertEqual(hardware._DISK_WAIT_ATTEMPTS, + mocked_root_dev.call_count) + mocked_sleep.assert_called_with(hardware._DISK_WAIT_DELAY) + @mock.patch.object(utils, 'execute', autospec=True) class TestModuleFunctions(test_base.BaseTestCase): diff --git a/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml new file mode 100644 index 00000000..3b5ea703 --- /dev/null +++ b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - On start up wait up to 30 seconds for the first disk device suitable for + deployment to appear. This is to fix both inspection and deployment on + hardware that takes long to initialize (e.g. some RAID devices). |