diff options
author | Dmitry Tantsur <dtantsur@protonmail.com> | 2019-09-19 14:21:58 +0200 |
---|---|---|
committer | Dmitry Tantsur <dtantsur@protonmail.com> | 2019-09-20 15:24:28 +0200 |
commit | b2834e66619c5f968389fa6ab1cf3e5f99cbafa3 (patch) | |
tree | fda99a4e472c26cb87dbb12d65b886f0d2f5eac2 | |
parent | 9fa39045dab1c3b401efac740d14a7e88c6c41f5 (diff) | |
download | ironic-b2834e66619c5f968389fa6ab1cf3e5f99cbafa3.tar.gz |
Allow retrying PXE boot if it takes too long
PXE is inherently unreliable and sometimes times out without an
obvious reason. It happens particularly often in resource constrained
environments, such as the CI. This change allows an operator to
set a timeout, after which the boot is retried again.
The _add_node_filters call had to be refactored to avoid hitting
the complexity limit.
Change-Id: I34a11f52e8e98e5b64f2d21f7190468a9e4b030d
Story: #2005167
Task: #29901
-rw-r--r-- | devstack/lib/ironic | 16 | ||||
-rw-r--r-- | ironic/conductor/utils.py | 39 | ||||
-rw-r--r-- | ironic/conf/pxe.py | 10 | ||||
-rw-r--r-- | ironic/db/sqlalchemy/api.py | 57 | ||||
-rw-r--r-- | ironic/drivers/modules/pxe.py | 10 | ||||
-rw-r--r-- | ironic/drivers/modules/pxe_base.py | 90 | ||||
-rw-r--r-- | ironic/tests/unit/db/test_nodes.py | 7 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_pxe.py | 96 | ||||
-rw-r--r-- | releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml | 10 |
9 files changed, 279 insertions, 56 deletions
diff --git a/devstack/lib/ironic b/devstack/lib/ironic index 910be51e5..cfd22a94c 100644 --- a/devstack/lib/ironic +++ b/devstack/lib/ironic @@ -88,9 +88,19 @@ IRONIC_USE_MOD_WSGI=$(trueorfalse $ENABLE_HTTPD_MOD_WSGI_SERVICES IRONIC_USE_MOD # Defaults to the (now confusingly named) IRONIC_USE_MOD_WSGI for backward compat IRONIC_USE_WSGI=$(trueorfalse $IRONIC_USE_MOD_WSGI IRONIC_USE_WSGI) +# Whether DevStack will be setup for bare metal or VMs +IRONIC_IS_HARDWARE=$(trueorfalse False IRONIC_IS_HARDWARE) + # Deploy callback timeout can be changed from its default (1800), if required. IRONIC_CALLBACK_TIMEOUT=${IRONIC_CALLBACK_TIMEOUT:-} +# Timeout before retrying PXE boot. Set low to help the CI. +if [[ "$IRONIC_IS_HARDWARE" == False ]]; then + IRONIC_PXE_BOOT_RETRY_TIMEOUT=${IRONIC_PXE_BOOT_RETRY_TIMEOUT:-600} +else + IRONIC_PXE_BOOT_RETRY_TIMEOUT=${IRONIC_PXE_BOOT_RETRY_TIMEOUT:-} +fi + # Ping timeout after the node becomes active IRONIC_PING_TIMEOUT=${IRONIC_PING_TIMEOUT:-} @@ -327,9 +337,6 @@ IRONIC_HTTP_PORT=${IRONIC_HTTP_PORT:-3928} IRONIC_RPC_TRANSPORT=${IRONIC_RPC_TRANSPORT:-oslo} IRONIC_JSON_RPC_PORT=${IRONIC_JSON_RPC_PORT:-8089} -# Whether DevStack will be setup for bare metal or VMs -IRONIC_IS_HARDWARE=$(trueorfalse False IRONIC_IS_HARDWARE) - # The first port in the range to bind the Virtual BMCs. The number of # ports that will be used depends on $IRONIC_VM_COUNT variable, e.g if # $IRONIC_VM_COUNT=3 the ports 6230, 6231 and 6232 will be used for the @@ -1475,6 +1482,9 @@ function configure_ironic_conductor { iniset $IRONIC_CONF_FILE pxe tftp_server $IRONIC_TFTPSERVER_IP iniset $IRONIC_CONF_FILE pxe tftp_root $IRONIC_TFTPBOOT_DIR iniset $IRONIC_CONF_FILE pxe tftp_master_path $IRONIC_TFTPBOOT_DIR/master_images + if [[ -n "$IRONIC_PXE_BOOT_RETRY_TIMEOUT" ]]; then + iniset $IRONIC_CONF_FILE pxe boot_retry_timeout $IRONIC_PXE_BOOT_RETRY_TIMEOUT + fi if [[ "$IRONIC_TERMINAL_SSL" == "True" ]]; then # Make sure the cert directory exist sudo mkdir -p $IRONIC_TERMINAL_CERT_DIR diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 4cea2f2c7..35a5b2fdd 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -272,6 +272,10 @@ def node_power_action(task, new_state, timeout=None): if node['target_power_state'] != target_state: node['target_power_state'] = target_state node['last_error'] = None + driver_internal_info = node.driver_internal_info + driver_internal_info['last_power_state_change'] = str( + timeutils.utcnow().isoformat()) + node.driver_internal_info = driver_internal_info node.save() # take power action @@ -866,6 +870,22 @@ def fast_track_able(task): and task.node.last_error is None) +def value_within_timeout(value, timeout): + """Checks if the time is within the previous timeout seconds from now. + + :param value: a string representing date and time or None. + :param timeout: timeout in seconds. + """ + # use native datetime objects for conversion and compare + # slightly odd because py2 compatability :( + last = datetime.datetime.strptime(value or '1970-01-01T00:00:00.000000', + "%Y-%m-%dT%H:%M:%S.%f") + # If we found nothing, we assume that the time is essentially epoch. + time_delta = datetime.timedelta(seconds=timeout) + last_valid = timeutils.utcnow() - time_delta + return last_valid <= last + + def is_fast_track(task): """Checks a fast track is available. @@ -882,19 +902,8 @@ def is_fast_track(task): :returns: True if the last heartbeat that was recorded was within the [deploy]fast_track_timeout setting. """ - if not fast_track_able(task): - return False - # use native datetime objects for conversion and compare - # slightly odd because py2 compatability :( - last = datetime.datetime.strptime( - task.node.driver_internal_info.get( - 'agent_last_heartbeat', - '1970-01-01T00:00:00.000000'), - "%Y-%m-%dT%H:%M:%S.%f") - # If we found nothing, we assume that the time is essentially epoch. - time_delta = datetime.timedelta(seconds=CONF.deploy.fast_track_timeout) - last_valid = timeutils.utcnow() - time_delta - # Checking the power state, because if we find the machine off due to - # any action, we can't actually fast track the node. :( - return (last_valid <= last + return (fast_track_able(task) + and value_within_timeout( + task.node.driver_internal_info.get('agent_last_heartbeat'), + CONF.deploy.fast_track_timeout) and task.driver.power.get_power_state(task) == states.POWER_ON) diff --git a/ironic/conf/pxe.py b/ironic/conf/pxe.py index 49882ebfe..05b955ab8 100644 --- a/ironic/conf/pxe.py +++ b/ironic/conf/pxe.py @@ -126,6 +126,16 @@ opts = [ default=0, help=_('Timeout value (in seconds) for downloading an image ' 'via iPXE. Defaults to 0 (no timeout)')), + cfg.IntOpt('boot_retry_timeout', + min=60, + help=_('Timeout (in seconds) after which PXE boot should be ' + 'retried. Must be less than [conductor]' + 'deploy_callback_timeout. Disabled by default.')), + cfg.IntOpt('boot_retry_check_interval', + default=90, min=1, + help=_('Interval (in seconds) between periodic checks on PXE ' + 'boot retry. Has no effect if boot_retry_timeout ' + 'is not set.')), cfg.StrOpt('ip_version', default='4', choices=[('4', _('IPv4')), diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index eee192c43..8ea75ba04 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -279,20 +279,28 @@ def _zip_matching(a, b, key): class Connection(api.Connection): """SqlAlchemy connection.""" + _NODE_QUERY_FIELDS = {'console_enabled', 'maintenance', 'driver', + 'resource_class', 'provision_state', 'uuid', 'id', + 'fault', 'conductor_group', 'owner'} + _NODE_IN_QUERY_FIELDS = {'%s_in' % field: field + for field in ('uuid', 'provision_state')} + _NODE_NON_NULL_FILTERS = {'associated': 'instance_uuid', + 'reserved': 'reservation', + 'with_power_state': 'power_state'} + _NODE_FILTERS = ({'chassis_uuid', 'reserved_by_any_of', + 'provisioned_before', 'inspection_started_before', + 'description_contains'} + | _NODE_QUERY_FIELDS + | set(_NODE_IN_QUERY_FIELDS) + | set(_NODE_NON_NULL_FILTERS)) + def __init__(self): pass def _validate_nodes_filters(self, filters): if filters is None: filters = dict() - supported_filters = {'console_enabled', 'maintenance', 'driver', - 'resource_class', 'provision_state', 'uuid', 'id', - 'chassis_uuid', 'associated', 'reserved', - 'reserved_by_any_of', 'provisioned_before', - 'inspection_started_before', 'fault', - 'conductor_group', 'owner', 'uuid_in', - 'with_power_state', 'description_contains'} - unsupported_filters = set(filters).difference(supported_filters) + unsupported_filters = set(filters).difference(self._NODE_FILTERS) if unsupported_filters: msg = _("SqlAlchemy API does not support " "filtering by %s") % ', '.join(unsupported_filters) @@ -301,26 +309,26 @@ class Connection(api.Connection): def _add_nodes_filters(self, query, filters): filters = self._validate_nodes_filters(filters) - for field in ['console_enabled', 'maintenance', 'driver', - 'resource_class', 'provision_state', 'uuid', 'id', - 'fault', 'conductor_group', 'owner']: + for field in self._NODE_QUERY_FIELDS: if field in filters: query = query.filter_by(**{field: filters[field]}) + for key, field in self._NODE_IN_QUERY_FIELDS.items(): + if key in filters: + query = query.filter( + getattr(models.Node, field).in_(filters[key])) + for key, field in self._NODE_NON_NULL_FILTERS.items(): + if key in filters: + column = getattr(models.Node, field) + if filters[key]: + query = query.filter(column != sql.null()) + else: + query = query.filter(column == sql.null()) + if 'chassis_uuid' in filters: # get_chassis_by_uuid() to raise an exception if the chassis # is not found chassis_obj = self.get_chassis_by_uuid(filters['chassis_uuid']) query = query.filter_by(chassis_id=chassis_obj.id) - if 'associated' in filters: - if filters['associated']: - query = query.filter(models.Node.instance_uuid != sql.null()) - else: - query = query.filter(models.Node.instance_uuid == sql.null()) - if 'reserved' in filters: - if filters['reserved']: - query = query.filter(models.Node.reservation != sql.null()) - else: - query = query.filter(models.Node.reservation == sql.null()) if 'reserved_by_any_of' in filters: query = query.filter(models.Node.reservation.in_( filters['reserved_by_any_of'])) @@ -334,13 +342,6 @@ class Connection(api.Connection): - (datetime.timedelta( seconds=filters['inspection_started_before']))) query = query.filter(models.Node.inspection_started_at < limit) - if 'uuid_in' in filters: - query = query.filter(models.Node.uuid.in_(filters['uuid_in'])) - if 'with_power_state' in filters: - if filters['with_power_state']: - query = query.filter(models.Node.power_state != sql.null()) - else: - query = query.filter(models.Node.power_state == sql.null()) if 'description_contains' in filters: keyword = filters['description_contains'] if keyword is not None: diff --git a/ironic/drivers/modules/pxe.py b/ironic/drivers/modules/pxe.py index d0c47caff..9fd7d78f7 100644 --- a/ironic/drivers/modules/pxe.py +++ b/ironic/drivers/modules/pxe.py @@ -17,7 +17,6 @@ PXE Boot Interface from ironic_lib import metrics_utils from oslo_log import log as logging -from oslo_utils import strutils from ironic.common import boot_devices from ironic.common import dhcp_factory @@ -174,14 +173,7 @@ class PXEBoot(pxe_base.PXEBaseMixin, base.BootInterface): pxe_config_template, ipxe_enabled=CONF.pxe.ipxe_enabled) - persistent = False - value = node.driver_info.get('force_persistent_boot_device', - 'Default') - if value in {'Always', 'Default', 'Never'}: - if value == 'Always': - persistent = True - else: - persistent = strutils.bool_from_string(value, False) + persistent = self._persistent_ramdisk_boot(node) manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=persistent) diff --git a/ironic/drivers/modules/pxe_base.py b/ironic/drivers/modules/pxe_base.py index 39851f5ab..dfc4a0e4a 100644 --- a/ironic/drivers/modules/pxe_base.py +++ b/ironic/drivers/modules/pxe_base.py @@ -13,13 +13,24 @@ Base PXE Interface Methods """ +from futurist import periodics from ironic_lib import metrics_utils +from oslo_config import cfg from oslo_log import log as logging +from oslo_utils import strutils +from ironic.common import boot_devices from ironic.common import exception from ironic.common.i18n import _ from ironic.common import pxe_utils +from ironic.common import states +from ironic.conductor import task_manager +from ironic.conductor import utils as manager_utils from ironic.drivers.modules import deploy_utils + + +CONF = cfg.CONF + LOG = logging.getLogger(__name__) METRICS = metrics_utils.get_metrics_logger(__name__) @@ -99,3 +110,82 @@ class PXEBaseMixin(object): parameters """ pxe_utils.parse_driver_info(task.node, mode='rescue') + + def _persistent_ramdisk_boot(self, node): + """If the ramdisk should be configured as a persistent boot device.""" + value = node.driver_info.get('force_persistent_boot_device', 'Default') + if value in {'Always', 'Default', 'Never'}: + return value == 'Always' + else: + return strutils.bool_from_string(value, False) + + _RETRY_ALLOWED_STATES = {states.DEPLOYWAIT, states.CLEANWAIT, + states.RESCUEWAIT} + + @METRICS.timer('PXEBaseMixin._check_boot_timeouts') + @periodics.periodic(spacing=CONF.pxe.boot_retry_check_interval, + enabled=bool(CONF.pxe.boot_retry_timeout)) + def _check_boot_timeouts(self, manager, context): + """Periodically checks whether boot has timed out and retry it. + + :param manager: conductor manager. + :param context: request context. + """ + filters = {'provision_state_in': self._RETRY_ALLOWED_STATES, + 'reserved': False, + 'maintenance': False, + 'provisioned_before': CONF.pxe.boot_retry_timeout} + node_iter = manager.iter_nodes(filters=filters) + + for node_uuid, driver, conductor_group in node_iter: + try: + lock_purpose = 'checking PXE boot status' + with task_manager.acquire(context, node_uuid, + shared=True, + purpose=lock_purpose) as task: + self._check_boot_status(task) + except (exception.NodeLocked, exception.NodeNotFound): + continue + + def _check_boot_status(self, task): + if not isinstance(task.driver.boot, PXEBaseMixin): + return + + if not _should_retry_boot(task.node): + return + + task.upgrade_lock(purpose='retrying PXE boot') + + # Retry critical checks after acquiring the exclusive lock. + if (task.node.maintenance or task.node.provision_state + not in self._RETRY_ALLOWED_STATES + or not _should_retry_boot(task.node)): + return + + LOG.info('Booting the ramdisk on node %(node)s is taking more than ' + '%(timeout)d seconds, retrying boot', + {'node': task.node.uuid, + 'timeout': CONF.pxe.boot_retry_timeout}) + + manager_utils.node_power_action(task, states.POWER_OFF) + # NOTE(dtantsur): retry even persistent boot setting in case it did not + # work for some reason. + persistent = self._persistent_ramdisk_boot(task.node) + manager_utils.node_set_boot_device(task, boot_devices.PXE, + persistent=persistent) + manager_utils.node_power_action(task, states.POWER_ON) + + +def _should_retry_boot(node): + # NOTE(dtantsur): this assumes IPA, do we need to make it generic? + for field in ('agent_last_heartbeat', 'last_power_state_change'): + if manager_utils.value_within_timeout( + node.driver_internal_info.get(field), + CONF.pxe.boot_retry_timeout): + # Alive and heartbeating, probably busy with something long + LOG.debug('Not retrying PXE boot for node %(node)s; its ' + '%(event)s happened less than %(timeout)d seconds ago', + {'node': node.uuid, 'event': field, + 'timeout': CONF.pxe.boot_retry_timeout}) + return False + return True diff --git a/ironic/tests/unit/db/test_nodes.py b/ironic/tests/unit/db/test_nodes.py index 11568b963..e61302be7 100644 --- a/ironic/tests/unit/db/test_nodes.py +++ b/ironic/tests/unit/db/test_nodes.py @@ -231,7 +231,8 @@ class DbNodeTestCase(base.DbTestCase): # node with provision_updated timeout node1 = utils.create_test_node(uuid=uuidutils.generate_uuid(), - provision_updated_at=past) + provision_updated_at=past, + provision_state=states.DEPLOYING) # node with None in provision_updated_at node2 = utils.create_test_node(uuid=uuidutils.generate_uuid(), provision_state=states.DEPLOYWAIT) @@ -247,6 +248,10 @@ class DbNodeTestCase(base.DbTestCase): states.DEPLOYWAIT}) self.assertEqual([node2.id], [r[0] for r in res]) + res = self.dbapi.get_nodeinfo_list( + filters={'provision_state_in': [states.ACTIVE, states.DEPLOYING]}) + self.assertEqual([node1.id], [r[0] for r in res]) + @mock.patch.object(timeutils, 'utcnow', autospec=True) def test_get_nodeinfo_list_inspection(self, mock_utcnow): past = datetime.datetime(2000, 1, 1, 0, 0) diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index bbc49e139..87e770e0b 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -21,6 +21,7 @@ import tempfile import mock from oslo_config import cfg from oslo_serialization import jsonutils as json +from oslo_utils import timeutils from oslo_utils import uuidutils from ironic.common import boot_devices @@ -1162,3 +1163,98 @@ class PXEValidateRescueTestCase(db_base.DbTestCase): self.assertRaisesRegex(exception.MissingParameterValue, 'Missing.*rescue_kernel', task.driver.boot.validate_rescue, task) + + +@mock.patch.object(ipxe.iPXEBoot, '__init__', lambda self: None) +@mock.patch.object(pxe.PXEBoot, '__init__', lambda self: None) +@mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) +@mock.patch.object(manager_utils, 'node_power_action', autospec=True) +class PXEBootRetryTestCase(db_base.DbTestCase): + + boot_interface = 'pxe' + + def setUp(self): + super(PXEBootRetryTestCase, self).setUp() + self.config(enabled_boot_interfaces=['pxe', 'ipxe', 'fake']) + self.config(boot_retry_timeout=300, group='pxe') + self.node = obj_utils.create_test_node( + self.context, + driver='fake-hardware', + boot_interface=self.boot_interface, + provision_state=states.DEPLOYWAIT) + + @mock.patch.object(pxe.PXEBoot, '_check_boot_status', autospec=True) + def test_check_boot_timeouts(self, mock_check_status, mock_power, + mock_boot_dev): + def _side_effect(iface, task): + self.assertEqual(self.node.uuid, task.node.uuid) + + mock_check_status.side_effect = _side_effect + manager = mock.Mock(spec=['iter_nodes']) + manager.iter_nodes.return_value = [ + (uuidutils.generate_uuid(), 'fake-hardware', ''), + (self.node.uuid, self.node.driver, self.node.conductor_group) + ] + iface = pxe.PXEBoot() + iface._check_boot_timeouts(manager, self.context) + mock_check_status.assert_called_once_with(iface, mock.ANY) + + def test_check_boot_status_another_boot_interface(self, mock_power, + mock_boot_dev): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.boot = fake.FakeBoot() + pxe.PXEBoot()._check_boot_status(task) + self.assertTrue(task.shared) + self.assertFalse(mock_power.called) + self.assertFalse(mock_boot_dev.called) + + def test_check_boot_status_recent_power_change(self, mock_power, + mock_boot_dev): + for field in ('agent_last_heartbeat', 'last_power_state_change'): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.driver_internal_info = { + field: str(timeutils.utcnow().isoformat()) + } + task.driver.boot._check_boot_status(task) + self.assertTrue(task.shared) + self.assertFalse(mock_power.called) + self.assertFalse(mock_boot_dev.called) + + def test_check_boot_status_maintenance(self, mock_power, mock_boot_dev): + self.node.maintenance = True + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.boot._check_boot_status(task) + self.assertFalse(task.shared) + self.assertFalse(mock_power.called) + self.assertFalse(mock_boot_dev.called) + + def test_check_boot_status_wrong_state(self, mock_power, mock_boot_dev): + self.node.provision_state = states.DEPLOYING + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.boot._check_boot_status(task) + self.assertFalse(task.shared) + self.assertFalse(mock_power.called) + self.assertFalse(mock_boot_dev.called) + + def test_check_boot_status_retry(self, mock_power, mock_boot_dev): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.boot._check_boot_status(task) + self.assertFalse(task.shared) + mock_power.assert_has_calls([ + mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON) + ]) + mock_boot_dev.assert_called_once_with(task, 'pxe', + persistent=False) + + +class iPXEBootRetryTestCase(PXEBootRetryTestCase): + + boot_interface = 'ipxe' diff --git a/releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml b/releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml new file mode 100644 index 000000000..ca24b7a91 --- /dev/null +++ b/releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Allows retrying PXE/iPXE boot during deployment, cleaning and rescuing. + This feature is disabled by default and can be enabled by setting + ``[pxe]boot_retry_timeout`` to the timeout (in seconds) after which + the boot should be retried. + + The new option ``[pxe]boot_retry_check_interval`` defines how often to + check the nodes for timeout and defaults to 90 seconds. |