summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Tantsur <dtantsur@redhat.com>2016-05-24 10:04:12 +0200
committerDmitry Tantsur <divius.inside@gmail.com>2016-05-25 09:43:11 +0000
commit2ec82a4dcf278c6f7fd94a41998d877a82f76579 (patch)
tree4e6564895f6ac37023839b322d08484871505efd
parent748a8b00e6f78f47d0a6f94c5beaea7aa53e11c7 (diff)
downloadironic-python-agent-2ec82a4dcf278c6f7fd94a41998d877a82f76579.tar.gz
Wait for at least one suitable disk to appear on start up
Some kernel modules take substantial time to initialize. For example, with mpt2sas RAID driver inspection and deployment randomly fail due to IPA starting before the driver finishes initialization. This problem is probably impossible to solve in a generic case, as modern Linux environment do not have a notion of "hardware is fully initialized" moment. All hardware is essentially hotplug. To solve it at least for the simplest case, this patch adds a wait loop on start up waiting for at least one suitable disk to appear in inventory. Note that root device hints are not considered, as the node might not be known at that moment yet. Change-Id: Id163ca28f7c140c302ea04947ded3f3c58b284de Partial-Bug: #1582797 (cherry picked from commit c15ed6a48e36da26576242277aa531720fd15d2d)
-rw-r--r--ironic_python_agent/hardware.py23
-rw-r--r--ironic_python_agent/tests/unit/test_agent.py6
-rw-r--r--ironic_python_agent/tests/unit/test_hardware.py38
-rw-r--r--releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml5
4 files changed, 71 insertions, 1 deletions
diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py
index b45d1191..3efb7671 100644
--- a/ironic_python_agent/hardware.py
+++ b/ironic_python_agent/hardware.py
@@ -16,6 +16,7 @@ import abc
import functools
import os
import shlex
+import time
import netifaces
from oslo_concurrency import processutils
@@ -38,6 +39,9 @@ UNIT_CONVERTER = pint.UnitRegistry(filename=None)
UNIT_CONVERTER.define('MB = []')
UNIT_CONVERTER.define('GB = 1024 MB')
+_DISK_WAIT_ATTEMPTS = 10
+_DISK_WAIT_DELAY = 3
+
def _get_device_vendor(dev):
"""Get the vendor name of a given device."""
@@ -394,8 +398,27 @@ class GenericHardwareManager(HardwareManager):
self.sys_path = '/sys'
def evaluate_hardware_support(self):
+ # Do some initialization before we declare ourself ready
+ self._wait_for_disks()
return HardwareSupport.GENERIC
+ def _wait_for_disks(self):
+ # Wait for at least one suitable disk to show up, otherwise neither
+ # inspection not deployment have any chances to succeed.
+ for attempt in range(_DISK_WAIT_ATTEMPTS):
+ try:
+ block_devices = self.list_block_devices()
+ utils.guess_root_disk(block_devices)
+ except errors.DeviceNotFound:
+ LOG.debug('Still waiting for at least one disk to appear, '
+ 'attempt %d of %d', attempt + 1, _DISK_WAIT_ATTEMPTS)
+ time.sleep(_DISK_WAIT_DELAY)
+ else:
+ break
+ else:
+ LOG.warning('No disks detected in %d seconds',
+ _DISK_WAIT_DELAY * _DISK_WAIT_ATTEMPTS)
+
def _get_interface_info(self, interface_name):
addr_path = '{0}/class/net/{1}/address'.format(self.sys_path,
interface_name)
diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py
index b51e6851..9885014a 100644
--- a/ironic_python_agent/tests/unit/test_agent.py
+++ b/ironic_python_agent/tests/unit/test_agent.py
@@ -127,6 +127,8 @@ class TestHeartbeater(test_base.BaseTestCase):
self.assertEqual(2.7, self.heartbeater.error_delay)
+@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
+ lambda self: None)
class TestBaseAgent(test_base.BaseTestCase):
def setUp(self):
@@ -294,6 +296,8 @@ class TestBaseAgent(test_base.BaseTestCase):
self.agent.get_node_uuid)
+@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
+ lambda self: None)
class TestAgentStandalone(test_base.BaseTestCase):
def setUp(self):
@@ -338,6 +342,8 @@ class TestAgentStandalone(test_base.BaseTestCase):
self.assertFalse(self.agent.api_client.lookup_node.called)
+@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
+ lambda self: None)
@mock.patch.object(socket, 'gethostbyname', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
class TestAdvertiseAddress(test_base.BaseTestCase):
diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py
index 5cb4a418..25782acd 100644
--- a/ironic_python_agent/tests/unit/test_hardware.py
+++ b/ironic_python_agent/tests/unit/test_hardware.py
@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import os
+import time
+
import mock
import netifaces
-import os
from oslo_concurrency import processutils
from oslo_utils import units
from oslotest import base as test_base
@@ -915,6 +917,40 @@ class TestGenericHardwareManager(test_base.BaseTestCase):
self.assertEqual('NEC',
self.hardware.get_system_vendor_info().manufacturer)
+ @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
+ autospec=True)
+ @mock.patch.object(time, 'sleep', autospec=True)
+ @mock.patch.object(utils, 'guess_root_disk', autospec=True)
+ def test_evaluate_hw_waits_for_disks(self, mocked_root_dev, mocked_sleep,
+ mocked_block_dev):
+ mocked_root_dev.side_effect = [
+ errors.DeviceNotFound('boom'),
+ None
+ ]
+
+ result = self.hardware.evaluate_hardware_support()
+
+ self.assertEqual(hardware.HardwareSupport.GENERIC, result)
+ mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
+ self.assertEqual(2, mocked_root_dev.call_count)
+ mocked_sleep.assert_called_once_with(hardware._DISK_WAIT_DELAY)
+
+ @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
+ autospec=True)
+ @mock.patch.object(time, 'sleep', autospec=True)
+ @mock.patch.object(utils, 'guess_root_disk', autospec=True)
+ def test_evaluate_hw_disks_timeout(self, mocked_root_dev, mocked_sleep,
+ mocked_block_dev):
+ mocked_root_dev.side_effect = errors.DeviceNotFound('boom')
+
+ result = self.hardware.evaluate_hardware_support()
+
+ self.assertEqual(hardware.HardwareSupport.GENERIC, result)
+ mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
+ self.assertEqual(hardware._DISK_WAIT_ATTEMPTS,
+ mocked_root_dev.call_count)
+ mocked_sleep.assert_called_with(hardware._DISK_WAIT_DELAY)
+
@mock.patch.object(utils, 'execute', autospec=True)
class TestModuleFunctions(test_base.BaseTestCase):
diff --git a/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml
new file mode 100644
index 00000000..3b5ea703
--- /dev/null
+++ b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - On start up wait up to 30 seconds for the first disk device suitable for
+ deployment to appear. This is to fix both inspection and deployment on
+ hardware that takes long to initialize (e.g. some RAID devices).