diff options
36 files changed, 885 insertions, 113 deletions
diff --git a/doc/source/admin/drivers/ibmc.rst b/doc/source/admin/drivers/ibmc.rst index 1bf9a3ba2..0f7fe1d90 100644 --- a/doc/source/admin/drivers/ibmc.rst +++ b/doc/source/admin/drivers/ibmc.rst @@ -312,6 +312,6 @@ boot_up_seq GET Query boot up sequence get_raid_controller_list GET Query RAID controller summary info ======================== ============ ====================================== -.. _Huawei iBMC: https://e.huawei.com/en/products/cloud-computing-dc/servers/accessories/ibmc +.. _Huawei iBMC: https://e.huawei.com/en/products/computing/kunpeng/accessories/ibmc .. _TLS: https://en.wikipedia.org/wiki/Transport_Layer_Security .. _HUAWEI iBMC Client library: https://pypi.org/project/python-ibmcclient/ diff --git a/doc/source/admin/metrics.rst b/doc/source/admin/metrics.rst index f435a50c5..733c6569b 100644 --- a/doc/source/admin/metrics.rst +++ b/doc/source/admin/metrics.rst @@ -17,8 +17,11 @@ These performance measurements, herein referred to as "metrics", can be emitted from the Bare Metal service, including ironic-api, ironic-conductor, and ironic-python-agent. By default, none of the services will emit metrics. -Configuring the Bare Metal Service to Enable Metrics -==================================================== +It is important to stress that not only statsd is supported for metrics +collection and transmission. This is covered later on in our documentation. + +Configuring the Bare Metal Service to Enable Metrics with Statsd +================================================================ Enabling metrics in ironic-api and ironic-conductor --------------------------------------------------- @@ -62,6 +65,30 @@ in the ironic configuration file as well:: agent_statsd_host = 198.51.100.2 agent_statsd_port = 8125 +.. Note:: + Use of a different metrics backend with the agent is not presently + supported. + +Transmission to the Message Bus Notifier +======================================== + +Regardless if you're using Ceilometer, +`ironic-prometheus-exporter <https://docs.openstack.org/ironic-prometheus-exporter/latest/>`_, +or some scripting you wrote to consume the message bus notifications, +metrics data can be sent to the message bus notifier from the timer methods +*and* additional gauge counters by utilizing the ``[metrics]backend`` +configuration option and setting it to ``collector``. When this is the case, +Information is cached locally and periodically sent along with the general sensor +data update to the messaging notifier, which can consumed off of the message bus, +or via notifier plugin (such as is done with ironic-prometheus-exporter). + +.. NOTE:: + Transmission of timer data only works for the Conductor or ``single-process`` + Ironic service model. A separate webserver process presently does not have + the capability of triggering the call to retrieve and transmit the data. + +.. NOTE:: + This functionality requires ironic-lib version 5.4.0 to be installed. Types of Metrics Emitted ======================== @@ -79,6 +106,9 @@ additional load before enabling metrics. To see which metrics have changed names or have been removed between releases, refer to the `ironic release notes <https://docs.openstack.org/releasenotes/ironic/>`_. +Additional conductor metrics in the form of counts will also be generated in +limited locations where petinant to the activity of the conductor. + .. note:: With the default statsd configuration, each timing metric may create additional metrics due to how statsd handles timing metrics. For more diff --git a/ironic/api/controllers/v1/versions.py b/ironic/api/controllers/v1/versions.py index aa8131570..f4cd26c0f 100644 --- a/ironic/api/controllers/v1/versions.py +++ b/ironic/api/controllers/v1/versions.py @@ -119,6 +119,7 @@ BASE_VERSION = 1 # v1.79: Change allocation behaviour to prefer node name match # v1.80: Marker to represent self service node creation/deletion # v1.81: Add node inventory +# v1.82: Add node sharding capability MINOR_0_JUNO = 0 MINOR_1_INITIAL_VERSION = 1 MINOR_2_AVAILABLE_STATE = 2 diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py index b0eec7758..cb0f23c98 100644 --- a/ironic/common/rpc_service.py +++ b/ironic/common/rpc_service.py @@ -14,6 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime import signal import sys import time @@ -24,6 +25,7 @@ from oslo_log import log import oslo_messaging as messaging from oslo_service import service from oslo_utils import importutils +from oslo_utils import timeutils from ironic.common import context from ironic.common import rpc @@ -93,6 +95,26 @@ class RPCService(service.Service): 'transport': CONF.rpc_transport}) def stop(self): + initial_time = timeutils.utcnow() + extend_time = initial_time + datetime.timedelta( + seconds=CONF.hash_ring_reset_interval) + + try: + self.manager.del_host(deregister=self.deregister) + except Exception as e: + LOG.exception('Service error occurred when cleaning up ' + 'the RPC manager. Error: %s', e) + + if self.manager.get_online_conductor_count() > 1: + # Delay stopping the server until the hash ring has been + # reset on the cluster + stop_time = timeutils.utcnow() + if stop_time < extend_time: + stop_wait = max(0, (extend_time - stop_time).seconds) + LOG.info('Waiting %(stop_wait)s seconds for hash ring reset.', + {'stop_wait': stop_wait}) + time.sleep(stop_wait) + try: if self.rpcserver is not None: self.rpcserver.stop() @@ -100,11 +122,6 @@ class RPCService(service.Service): except Exception as e: LOG.exception('Service error occurred when stopping the ' 'RPC server. Error: %s', e) - try: - self.manager.del_host(deregister=self.deregister) - except Exception as e: - LOG.exception('Service error occurred when cleaning up ' - 'the RPC manager. Error: %s', e) super(RPCService, self).stop(graceful=True) LOG.info('Stopped RPC server for service %(service)s on host ' diff --git a/ironic/common/states.py b/ironic/common/states.py index 89b710189..f2238b41b 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -269,6 +269,9 @@ _FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE, FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES) """States where API lookups are permitted with fast track enabled.""" +FAILURE_STATES = frozenset((DEPLOYFAIL, CLEANFAIL, INSPECTFAIL, + RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL)) + ############## # Power states diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index 22ebd57f5..5c2e4ea95 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -334,6 +334,10 @@ class BaseConductorManager(object): self._started = False + def get_online_conductor_count(self): + """Return a count of currently online conductors""" + return len(self.dbapi.get_online_conductors()) + def _register_and_validate_hardware_interfaces(self, hardware_types): """Register and validate hardware interfaces for this conductor. diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index e59841a99..9e4edb809 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -248,12 +248,21 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): task.process_event(event) +def get_last_error(node): + last_error = _('By request, the clean operation was aborted') + if node.clean_step: + last_error += ( + _(' during or after the completion of step "%s"') + % conductor_steps.step_id(node.clean_step) + ) + return last_error + + @task_manager.require_exclusive_lock -def do_node_clean_abort(task, step_name=None): +def do_node_clean_abort(task): """Internal method to abort an ongoing operation. :param task: a TaskManager instance with an exclusive lock - :param step_name: The name of the clean step. """ node = task.node try: @@ -271,12 +280,13 @@ def do_node_clean_abort(task, step_name=None): set_fail_state=False) return + last_error = get_last_error(node) info_message = _('Clean operation aborted for node %s') % node.uuid - last_error = _('By request, the clean operation was aborted') - if step_name: - msg = _(' after the completion of step "%s"') % step_name - last_error += msg - info_message += msg + if node.clean_step: + info_message += ( + _(' during or after the completion of step "%s"') + % node.clean_step + ) node.last_error = last_error node.clean_step = None @@ -318,7 +328,7 @@ def continue_node_clean(task): target_state = None task.process_event('fail', target_state=target_state) - do_node_clean_abort(task, step_name) + do_node_clean_abort(task) return LOG.debug('The cleaning operation for node %(node)s was ' diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index ad45d2d74..74e3192cf 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -98,6 +98,8 @@ class ConductorManager(base_manager.BaseConductorManager): def __init__(self, host, topic): super(ConductorManager, self).__init__(host, topic) + # NOTE(TheJulia): This is less a metric-able count, but a means to + # sort out nodes and prioritise a subset (of non-responding nodes). self.power_state_sync_count = collections.defaultdict(int) @METRICS.timer('ConductorManager._clean_up_caches') @@ -1349,7 +1351,8 @@ class ConductorManager(base_manager.BaseConductorManager): callback=self._spawn_worker, call_args=(cleaning.do_node_clean_abort, task), err_handler=utils.provisioning_error_handler, - target_state=target_state) + target_state=target_state, + last_error=cleaning.get_last_error(node)) return if node.provision_state == states.RESCUEWAIT: @@ -1433,6 +1436,11 @@ class ConductorManager(base_manager.BaseConductorManager): finally: waiters.wait_for_all(futures) + # report a count of the nodes + METRICS.send_gauge( + 'ConductorManager.PowerSyncNodesCount', + len(nodes)) + def _sync_power_state_nodes_task(self, context, nodes): """Invokes power state sync on nodes from synchronized queue. @@ -1451,6 +1459,7 @@ class ConductorManager(base_manager.BaseConductorManager): can do here to avoid failing a brand new deploy to a node that we've locked here, though. """ + # FIXME(comstud): Since our initial state checks are outside # of the lock (to try to avoid the lock), some checks are # repeated after grabbing the lock so we can unlock quickly. @@ -1497,6 +1506,12 @@ class ConductorManager(base_manager.BaseConductorManager): LOG.info("During sync_power_state, node %(node)s was not " "found and presumed deleted by another process.", {'node': node_uuid}) + # TODO(TheJulia): The chance exists that we orphan a node + # in power_state_sync_count, albeit it is not much data, + # it could eventually cause the memory footprint to grow + # on an exceptionally large ironic deployment. We should + # make sure we clean it up at some point, but overall given + # minimal impact, it is definite low hanging fruit. except exception.NodeLocked: LOG.info("During sync_power_state, node %(node)s was " "already locked by another process. Skip.", @@ -1513,6 +1528,7 @@ class ConductorManager(base_manager.BaseConductorManager): # regular power state checking, maintenance is still a required # condition. filters={'maintenance': True, 'fault': faults.POWER_FAILURE}, + node_count_metric_name='ConductorManager.PowerSyncRecoveryNodeCount', ) def _power_failure_recovery(self, task, context): """Periodic task to check power states for nodes in maintenance. @@ -1855,6 +1871,7 @@ class ConductorManager(base_manager.BaseConductorManager): predicate=lambda n, m: n.conductor_affinity != m.conductor.id, limit=lambda: CONF.conductor.periodic_max_workers, shared_task=False, + node_count_metric_name='ConductorManager.SyncLocalStateNodeCount', ) def _sync_local_state(self, task, context): """Perform any actions necessary to sync local state. @@ -2640,14 +2657,63 @@ class ConductorManager(base_manager.BaseConductorManager): # Yield on every iteration eventlet.sleep(0) + def _sensors_conductor(self, context): + """Called to collect and send metrics "sensors" for the conductor.""" + # populate the message which will be sent to ceilometer + # or other data consumer + message = {'message_id': uuidutils.generate_uuid(), + 'timestamp': datetime.datetime.utcnow(), + 'hostname': self.host} + + try: + ev_type = 'ironic.metrics' + message['event_type'] = ev_type + '.update' + sensors_data = METRICS.get_metrics_data() + except AttributeError: + # TODO(TheJulia): Remove this at some point, but right now + # don't inherently break on version mismatches when people + # disregard requriements. + LOG.warning( + 'get_sensors_data has been configured to collect ' + 'conductor metrics, however the installed ironic-lib ' + 'library lacks the functionality. Please update ' + 'ironic-lib to a minimum of version 5.4.0.') + except Exception as e: + LOG.exception( + "An unknown error occured while attempting to collect " + "sensor data from within the conductor. Error: %(error)s", + {'error': e}) + else: + message['payload'] = ( + self._filter_out_unsupported_types(sensors_data)) + if message['payload']: + self.sensors_notifier.info( + context, ev_type, message) + @METRICS.timer('ConductorManager._send_sensor_data') - @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval, - enabled=CONF.conductor.send_sensor_data) + @periodics.periodic(spacing=CONF.sensor_data.interval, + enabled=CONF.sensor_data.send_sensor_data) def _send_sensor_data(self, context): """Periodically collects and transmits sensor data notifications.""" + if CONF.sensor_data.enable_for_conductor: + if CONF.sensor_data.workers == 1: + # Directly call the sensors_conductor when only one + # worker is permitted, so we collect data serially + # instead. + self._sensors_conductor(context) + else: + # Also, do not apply the general threshold limit to + # the self collection of "sensor" data from the conductor, + # as were not launching external processes, we're just reading + # from an internal data structure, if we can. + self._spawn_worker(self._sensors_conductor, context) + if not CONF.sensor_data.enable_for_nodes: + # NOTE(TheJulia): If node sensor data is not required, then + # skip the rest of this method. + return filters = {} - if not CONF.conductor.send_sensor_data_for_undeployed_nodes: + if not CONF.sensor_data.enable_for_undeployed_nodes: filters['provision_state'] = states.ACTIVE nodes = queue.Queue() @@ -2655,7 +2721,7 @@ class ConductorManager(base_manager.BaseConductorManager): filters=filters): nodes.put_nowait(node_info) - number_of_threads = min(CONF.conductor.send_sensor_data_workers, + number_of_threads = min(CONF.sensor_data.workers, nodes.qsize()) futures = [] for thread_number in range(number_of_threads): @@ -2671,7 +2737,7 @@ class ConductorManager(base_manager.BaseConductorManager): break done, not_done = waiters.wait_for_all( - futures, timeout=CONF.conductor.send_sensor_data_wait_timeout) + futures, timeout=CONF.sensor_data.wait_timeout) if not_done: LOG.warning("%d workers for send sensors data did not complete", len(not_done)) @@ -2680,13 +2746,14 @@ class ConductorManager(base_manager.BaseConductorManager): """Filters out sensor data types that aren't specified in the config. Removes sensor data types that aren't specified in - CONF.conductor.send_sensor_data_types. + CONF.sensor_data.data_types. :param sensors_data: dict containing sensor types and the associated data :returns: dict with unsupported sensor types removed """ - allowed = set(x.lower() for x in CONF.conductor.send_sensor_data_types) + allowed = set(x.lower() for x in + CONF.sensor_data.data_types) if 'all' in allowed: return sensors_data diff --git a/ironic/conductor/periodics.py b/ironic/conductor/periodics.py index 70bc7bc93..b9c8f8844 100644 --- a/ironic/conductor/periodics.py +++ b/ironic/conductor/periodics.py @@ -18,6 +18,7 @@ import inspect import eventlet from futurist import periodics +from ironic_lib import metrics_utils from oslo_log import log from ironic.common import exception @@ -29,6 +30,9 @@ from ironic.drivers import base as driver_base LOG = log.getLogger(__name__) +METRICS = metrics_utils.get_metrics_logger(__name__) + + def periodic(spacing, enabled=True, **kwargs): """A decorator to define a periodic task. @@ -46,7 +50,7 @@ class Stop(Exception): def node_periodic(purpose, spacing, enabled=True, filters=None, predicate=None, predicate_extra_fields=(), limit=None, - shared_task=True): + shared_task=True, node_count_metric_name=None): """A decorator to define a periodic task to act on nodes. Defines a periodic task that fetches the list of nodes mapped to the @@ -84,6 +88,9 @@ def node_periodic(purpose, spacing, enabled=True, filters=None, iteration to determine the limit. :param shared_task: if ``True``, the task will have a shared lock. It is recommended to start with a shared lock and upgrade it only if needed. + :param node_count_metric_name: A string value to identify a metric + representing the count of matching nodes to be recorded upon the + completion of the periodic. """ node_type = collections.namedtuple( 'Node', @@ -116,10 +123,11 @@ def node_periodic(purpose, spacing, enabled=True, filters=None, else: local_limit = limit assert local_limit is None or local_limit > 0 - + node_count = 0 nodes = manager.iter_nodes(filters=filters, fields=predicate_extra_fields) for (node_uuid, *other) in nodes: + node_count += 1 if predicate is not None: node = node_type(node_uuid, *other) if accepts_manager: @@ -158,6 +166,11 @@ def node_periodic(purpose, spacing, enabled=True, filters=None, local_limit -= 1 if not local_limit: return + if node_count_metric_name: + # Send post-run metrics. + METRICS.send_gauge( + node_count_metric_name, + node_count) return wrapper diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index 509c9ce92..922e74cf6 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -527,7 +527,8 @@ class TaskManager(object): self.release_resources() def process_event(self, event, callback=None, call_args=None, - call_kwargs=None, err_handler=None, target_state=None): + call_kwargs=None, err_handler=None, target_state=None, + last_error=None): """Process the given event for the task's current state. :param event: the name of the event to process @@ -540,6 +541,8 @@ class TaskManager(object): prev_target_state) :param target_state: if specified, the target provision state for the node. Otherwise, use the target state from the fsm + :param last_error: last error to set on the node together with + the state transition. :raises: InvalidState if the event is not allowed by the associated state machine """ @@ -572,13 +575,15 @@ class TaskManager(object): # set up the async worker if callback: - # clear the error if we're going to start work in a callback - self.node.last_error = None + # update the error if we're going to start work in a callback + self.node.last_error = last_error if call_args is None: call_args = () if call_kwargs is None: call_kwargs = {} self.spawn_after(callback, *call_args, **call_kwargs) + elif last_error is not None: + self.node.last_error = last_error # publish the state transition by saving the Node self.node.save() diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index c107f076f..cdf3a99ee 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -302,9 +302,11 @@ def node_power_action(task, new_state, timeout=None): # Set the target_power_state and clear any last_error, if we're # starting a new operation. This will expose to other processes - # and clients that work is in progress. - node['target_power_state'] = target_state - node['last_error'] = None + # and clients that work is in progress. Keep the last_error intact + # if the power action happens as a result of a failure. + node.target_power_state = target_state + if node.provision_state not in states.FAILURE_STATES: + node.last_error = None node.timestamp_driver_internal_info('last_power_state_change') # NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in # fast-track (or an accidentally booted agent) will cause subsequent diff --git a/ironic/conf/__init__.py b/ironic/conf/__init__.py index c1a893181..648395362 100644 --- a/ironic/conf/__init__.py +++ b/ironic/conf/__init__.py @@ -29,6 +29,7 @@ from ironic.conf import deploy from ironic.conf import dhcp from ironic.conf import dnsmasq from ironic.conf import drac +from ironic.conf import fake from ironic.conf import glance from ironic.conf import healthcheck from ironic.conf import ibmc @@ -44,6 +45,7 @@ from ironic.conf import neutron from ironic.conf import nova from ironic.conf import pxe from ironic.conf import redfish +from ironic.conf import sensor_data from ironic.conf import service_catalog from ironic.conf import snmp from ironic.conf import swift @@ -65,6 +67,7 @@ deploy.register_opts(CONF) drac.register_opts(CONF) dhcp.register_opts(CONF) dnsmasq.register_opts(CONF) +fake.register_opts(CONF) glance.register_opts(CONF) healthcheck.register_opts(CONF) ibmc.register_opts(CONF) @@ -80,6 +83,7 @@ neutron.register_opts(CONF) nova.register_opts(CONF) pxe.register_opts(CONF) redfish.register_opts(CONF) +sensor_data.register_opts(CONF) service_catalog.register_opts(CONF) snmp.register_opts(CONF) swift.register_opts(CONF) diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 2161b9434..653e30f56 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -97,41 +97,6 @@ opts = [ cfg.IntOpt('node_locked_retry_interval', default=1, help=_('Seconds to sleep between node lock attempts.')), - cfg.BoolOpt('send_sensor_data', - default=False, - help=_('Enable sending sensor data message via the ' - 'notification bus')), - cfg.IntOpt('send_sensor_data_interval', - default=600, - min=1, - help=_('Seconds between conductor sending sensor data message ' - 'to ceilometer via the notification bus.')), - cfg.IntOpt('send_sensor_data_workers', - default=4, min=1, - help=_('The maximum number of workers that can be started ' - 'simultaneously for send data from sensors periodic ' - 'task.')), - cfg.IntOpt('send_sensor_data_wait_timeout', - default=300, - help=_('The time in seconds to wait for send sensors data ' - 'periodic task to be finished before allowing periodic ' - 'call to happen again. Should be less than ' - 'send_sensor_data_interval value.')), - cfg.ListOpt('send_sensor_data_types', - default=['ALL'], - help=_('List of comma separated meter types which need to be' - ' sent to Ceilometer. The default value, "ALL", is a ' - 'special value meaning send all the sensor data.')), - cfg.BoolOpt('send_sensor_data_for_undeployed_nodes', - default=False, - help=_('The default for sensor data collection is to only ' - 'collect data for machines that are deployed, however ' - 'operators may desire to know if there are failures ' - 'in hardware that is not presently in use. ' - 'When set to true, the conductor will collect sensor ' - 'information from all nodes when sensor data ' - 'collection is enabled via the send_sensor_data ' - 'setting.')), cfg.IntOpt('sync_local_state_interval', default=180, help=_('When conductors join or leave the cluster, existing ' diff --git a/ironic/conf/fake.py b/ironic/conf/fake.py new file mode 100644 index 000000000..8f6d75ee3 --- /dev/null +++ b/ironic/conf/fake.py @@ -0,0 +1,85 @@ +# +# Copyright 2022 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_config import cfg + +from ironic.common.i18n import _ + +opts = [ + cfg.StrOpt('power_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'power driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('boot_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'boot driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('deploy_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'deploy driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('vendor_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'vendor driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('management_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'management driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('inspect_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'inspect driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('raid_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'raid driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('bios_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'bios driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('storage_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'storage driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), + cfg.StrOpt('rescue_delay', + default='0', + help=_('Delay in seconds for operations with the fake ' + 'rescue driver. Two comma-delimited values will ' + 'result in a delay with a triangular random ' + 'distribution, weighted on the first value.')), +] + + +def register_opts(conf): + conf.register_opts(opts, group='fake') diff --git a/ironic/conf/opts.py b/ironic/conf/opts.py index fd2e51534..a7ebcfb30 100644 --- a/ironic/conf/opts.py +++ b/ironic/conf/opts.py @@ -43,6 +43,7 @@ _opts = [ ('nova', ironic.conf.nova.list_opts()), ('pxe', ironic.conf.pxe.opts), ('redfish', ironic.conf.redfish.opts), + ('sensor_data', ironic.conf.sensor_data.opts), ('service_catalog', ironic.conf.service_catalog.list_opts()), ('snmp', ironic.conf.snmp.opts), ('swift', ironic.conf.swift.list_opts()), @@ -89,5 +90,8 @@ def update_opt_defaults(): 'openstack=WARNING', # Policy logging is not necessarily useless, but very verbose 'oslo_policy=WARNING', + # Concurrency lock logging is not bad, but exceptionally noisy + # and typically not needed in debugging Ironic itself. + 'oslo_concurrency.lockutils=WARNING', ] ) diff --git a/ironic/conf/sensor_data.py b/ironic/conf/sensor_data.py new file mode 100644 index 000000000..8527113a6 --- /dev/null +++ b/ironic/conf/sensor_data.py @@ -0,0 +1,89 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_config import cfg + +from ironic.common.i18n import _ + +opts = [ + cfg.BoolOpt('send_sensor_data', + default=False, + deprecated_group='conductor', + deprecated_name='send_sensor_data', + help=_('Enable sending sensor data message via the ' + 'notification bus.')), + cfg.IntOpt('interval', + default=600, + min=1, + deprecated_group='conductor', + deprecated_name='send_sensor_data_interval', + help=_('Seconds between conductor sending sensor data message ' + 'via the notification bus. This was originally for ' + 'consumption via ceilometer, but the data may also ' + 'be consumed via a plugin like ' + 'ironic-prometheus-exporter or any other message bus ' + 'data collector.')), + cfg.IntOpt('workers', + default=4, min=1, + deprecated_group='conductor', + deprecated_name='send_sensor_data_workers', + help=_('The maximum number of workers that can be started ' + 'simultaneously for send data from sensors periodic ' + 'task.')), + cfg.IntOpt('wait_timeout', + default=300, + deprecated_group='conductor', + deprecated_name='send_sensor_data_wait_timeout', + help=_('The time in seconds to wait for send sensors data ' + 'periodic task to be finished before allowing periodic ' + 'call to happen again. Should be less than ' + 'send_sensor_data_interval value.')), + cfg.ListOpt('data_types', + default=['ALL'], + deprecated_group='conductor', + deprecated_name='send_sensor_data_types', + help=_('List of comma separated meter types which need to be ' + 'sent to Ceilometer. The default value, "ALL", is a ' + 'special value meaning send all the sensor data. ' + 'This setting only applies to baremetal sensor data ' + 'being processed through the conductor.')), + cfg.BoolOpt('enable_for_undeployed_nodes', + default=False, + deprecated_group='conductor', + deprecated_name='send_sensor_data_for_undeployed_nodes', + help=_('The default for sensor data collection is to only ' + 'collect data for machines that are deployed, however ' + 'operators may desire to know if there are failures ' + 'in hardware that is not presently in use. ' + 'When set to true, the conductor will collect sensor ' + 'information from all nodes when sensor data ' + 'collection is enabled via the send_sensor_data ' + 'setting.')), + cfg.BoolOpt('enable_for_conductor', + default=True, + help=_('If to include sensor metric data for the Conductor ' + 'process itself in the message payload for sensor ' + 'data which allows operators to gather instance ' + 'counts of actions and states to better manage ' + 'the deployment.')), + cfg.BoolOpt('enable_for_nodes', + default=True, + help=_('If to transmit any sensor data for any nodes under ' + 'this conductor\'s management. This option superceeds ' + 'the ``send_sensor_data_for_undeployed_nodes`` ' + 'setting.')), +] + + +def register_opts(conf): + conf.register_opts(opts, group='sensor_data') diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index 9202aab09..93a211fc3 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -904,11 +904,13 @@ class Connection(api.Connection): if values['provision_state'] == states.INSPECTING: values['inspection_started_at'] = timeutils.utcnow() values['inspection_finished_at'] = None - elif (ref.provision_state == states.INSPECTING + elif ((ref.provision_state == states.INSPECTING + or ref.provision_state == states.INSPECTWAIT) and values['provision_state'] == states.MANAGEABLE): values['inspection_finished_at'] = timeutils.utcnow() values['inspection_started_at'] = None - elif (ref.provision_state == states.INSPECTING + elif ((ref.provision_state == states.INSPECTING + or ref.provision_state == states.INSPECTWAIT) and values['provision_state'] == states.INSPECTFAIL): values['inspection_started_at'] = None @@ -1832,6 +1834,9 @@ class Connection(api.Connection): max_to_migrate = max_count or total_to_migrate for model in sql_models: + use_node_id = False + if (not hasattr(model, 'id') and hasattr(model, 'node_id')): + use_node_id = True version = mapping[model.__name__][0] num_migrated = 0 with _session_for_write() as session: @@ -1845,13 +1850,27 @@ class Connection(api.Connection): # max_to_migrate objects. ids = [] for obj in query.slice(0, max_to_migrate): - ids.append(obj['id']) - num_migrated = ( - session.query(model). - filter(sql.and_(model.id.in_(ids), - model.version != version)). - update({model.version: version}, - synchronize_session=False)) + if not use_node_id: + ids.append(obj['id']) + else: + # BIOSSettings, NodeTrait, NodeTag do not have id + # columns, fallback to node_id as they both have + # it. + ids.append(obj['node_id']) + if not use_node_id: + num_migrated = ( + session.query(model). + filter(sql.and_(model.id.in_(ids), + model.version != version)). + update({model.version: version}, + synchronize_session=False)) + else: + num_migrated = ( + session.query(model). + filter(sql.and_(model.node_id.in_(ids), + model.version != version)). + update({model.version: version}, + synchronize_session=False)) else: num_migrated = ( session.query(model). diff --git a/ironic/drivers/modules/console_utils.py b/ironic/drivers/modules/console_utils.py index 6e08b6712..c5e9e857a 100644 --- a/ironic/drivers/modules/console_utils.py +++ b/ironic/drivers/modules/console_utils.py @@ -90,7 +90,7 @@ def _get_console_pid(node_uuid): with open(pid_path, 'r') as f: pid_str = f.readline() return int(pid_str) - except (IOError, ValueError): + except (IOError, ValueError, FileNotFoundError): raise exception.NoConsolePid(pid_path=pid_path) diff --git a/ironic/drivers/modules/fake.py b/ironic/drivers/modules/fake.py index dffd9065d..0a26efb4c 100644 --- a/ironic/drivers/modules/fake.py +++ b/ironic/drivers/modules/fake.py @@ -24,6 +24,9 @@ functionality between a power interface and a deploy interface, when both rely on separate vendor_passthru methods. """ +import random +import time + from oslo_log import log from ironic.common import boot_devices @@ -32,6 +35,7 @@ from ironic.common import exception from ironic.common.i18n import _ from ironic.common import indicator_states from ironic.common import states +from ironic.conf import CONF from ironic.drivers import base from ironic import objects @@ -39,6 +43,34 @@ from ironic import objects LOG = log.getLogger(__name__) +def parse_sleep_range(sleep_range): + if not sleep_range: + return 0, 0 + + sleep_split = sleep_range.split(',') + if len(sleep_split) == 1: + a = sleep_split[0] + b = sleep_split[0] + else: + a = sleep_split[0] + b = sleep_split[1] + return int(a), int(b) + + +def sleep(sleep_range): + earliest, latest = parse_sleep_range(sleep_range) + if earliest == 0 and latest == 0: + # no sleep + return + if earliest == latest: + # constant sleep + sleep = earliest + else: + # triangular random sleep, weighted towards the earliest + sleep = random.triangular(earliest, latest, earliest) + time.sleep(sleep) + + class FakePower(base.PowerInterface): """Example implementation of a simple power interface.""" @@ -49,12 +81,15 @@ class FakePower(base.PowerInterface): pass def get_power_state(self, task): + sleep(CONF.fake.power_delay) return task.node.power_state def reboot(self, task, timeout=None): + sleep(CONF.fake.power_delay) pass def set_power_state(self, task, power_state, timeout=None): + sleep(CONF.fake.power_delay) if power_state not in [states.POWER_ON, states.POWER_OFF, states.SOFT_REBOOT, states.SOFT_POWER_OFF]: raise exception.InvalidParameterValue( @@ -81,15 +116,19 @@ class FakeBoot(base.BootInterface): pass def prepare_ramdisk(self, task, ramdisk_params, mode='deploy'): + sleep(CONF.fake.boot_delay) pass def clean_up_ramdisk(self, task, mode='deploy'): + sleep(CONF.fake.boot_delay) pass def prepare_instance(self, task): + sleep(CONF.fake.boot_delay) pass def clean_up_instance(self, task): + sleep(CONF.fake.boot_delay) pass @@ -108,18 +147,23 @@ class FakeDeploy(base.DeployInterface): @base.deploy_step(priority=100) def deploy(self, task): + sleep(CONF.fake.deploy_delay) return None def tear_down(self, task): + sleep(CONF.fake.deploy_delay) return states.DELETED def prepare(self, task): + sleep(CONF.fake.deploy_delay) pass def clean_up(self, task): + sleep(CONF.fake.deploy_delay) pass def take_over(self, task): + sleep(CONF.fake.deploy_delay) pass @@ -140,6 +184,7 @@ class FakeVendorA(base.VendorInterface): @base.passthru(['POST'], description=_("Test if the value of bar is baz")) def first_method(self, task, http_method, bar): + sleep(CONF.fake.vendor_delay) return True if bar == 'baz' else False @@ -161,16 +206,19 @@ class FakeVendorB(base.VendorInterface): @base.passthru(['POST'], description=_("Test if the value of bar is kazoo")) def second_method(self, task, http_method, bar): + sleep(CONF.fake.vendor_delay) return True if bar == 'kazoo' else False @base.passthru(['POST'], async_call=False, description=_("Test if the value of bar is meow")) def third_method_sync(self, task, http_method, bar): + sleep(CONF.fake.vendor_delay) return True if bar == 'meow' else False @base.passthru(['POST'], require_exclusive_lock=False, description=_("Test if the value of bar is woof")) def fourth_method_shared_lock(self, task, http_method, bar): + sleep(CONF.fake.vendor_delay) return True if bar == 'woof' else False @@ -211,17 +259,21 @@ class FakeManagement(base.ManagementInterface): return [boot_devices.PXE] def set_boot_device(self, task, device, persistent=False): + sleep(CONF.fake.management_delay) if device not in self.get_supported_boot_devices(task): raise exception.InvalidParameterValue(_( "Invalid boot device %s specified.") % device) def get_boot_device(self, task): + sleep(CONF.fake.management_delay) return {'boot_device': boot_devices.PXE, 'persistent': False} def get_sensors_data(self, task): + sleep(CONF.fake.management_delay) return {} def get_supported_indicators(self, task, component=None): + sleep(CONF.fake.management_delay) indicators = { components.CHASSIS: { 'led-0': { @@ -248,6 +300,7 @@ class FakeManagement(base.ManagementInterface): if not component or component == c} def get_indicator_state(self, task, component, indicator): + sleep(CONF.fake.management_delay) indicators = self.get_supported_indicators(task) if component not in indicators: raise exception.InvalidParameterValue(_( @@ -271,6 +324,7 @@ class FakeInspect(base.InspectInterface): pass def inspect_hardware(self, task): + sleep(CONF.fake.inspect_delay) return states.MANAGEABLE @@ -282,9 +336,11 @@ class FakeRAID(base.RAIDInterface): def create_configuration(self, task, create_root_volume=True, create_nonroot_volumes=True): + sleep(CONF.fake.raid_delay) pass def delete_configuration(self, task): + sleep(CONF.fake.raid_delay) pass @@ -302,6 +358,7 @@ class FakeBIOS(base.BIOSInterface): 'to contain a dictionary with name/value pairs'), 'required': True}}) def apply_configuration(self, task, settings): + sleep(CONF.fake.bios_delay) # Note: the implementation of apply_configuration in fake interface # is just for testing purpose, for real driver implementation, please # refer to develop doc at https://docs.openstack.org/ironic/latest/ @@ -328,6 +385,7 @@ class FakeBIOS(base.BIOSInterface): @base.clean_step(priority=0) def factory_reset(self, task): + sleep(CONF.fake.bios_delay) # Note: the implementation of factory_reset in fake interface is # just for testing purpose, for real driver implementation, please # refer to develop doc at https://docs.openstack.org/ironic/latest/ @@ -340,6 +398,7 @@ class FakeBIOS(base.BIOSInterface): @base.clean_step(priority=0) def cache_bios_settings(self, task): + sleep(CONF.fake.bios_delay) # Note: the implementation of cache_bios_settings in fake interface # is just for testing purpose, for real driver implementation, please # refer to develop doc at https://docs.openstack.org/ironic/latest/ @@ -357,9 +416,11 @@ class FakeStorage(base.StorageInterface): return {} def attach_volumes(self, task): + sleep(CONF.fake.storage_delay) pass def detach_volumes(self, task): + sleep(CONF.fake.storage_delay) pass def should_write_image(self, task): @@ -376,7 +437,9 @@ class FakeRescue(base.RescueInterface): pass def rescue(self, task): + sleep(CONF.fake.rescue_delay) return states.RESCUE def unrescue(self, task): + sleep(CONF.fake.rescue_delay) return states.ACTIVE diff --git a/ironic/tests/unit/common/test_rpc_service.py b/ironic/tests/unit/common/test_rpc_service.py index 8483bfb22..09446ecf8 100644 --- a/ironic/tests/unit/common/test_rpc_service.py +++ b/ironic/tests/unit/common/test_rpc_service.py @@ -10,24 +10,28 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime +import time from unittest import mock from oslo_config import cfg import oslo_messaging from oslo_service import service as base_service +from oslo_utils import timeutils from ironic.common import context from ironic.common import rpc from ironic.common import rpc_service from ironic.conductor import manager from ironic.objects import base as objects_base -from ironic.tests import base +from ironic.tests.unit.db import base as db_base +from ironic.tests.unit.db import utils as db_utils CONF = cfg.CONF @mock.patch.object(base_service.Service, '__init__', lambda *_, **__: None) -class TestRPCService(base.TestCase): +class TestRPCService(db_base.DbTestCase): def setUp(self): super(TestRPCService, self).setUp() @@ -35,6 +39,7 @@ class TestRPCService(base.TestCase): mgr_module = "ironic.conductor.manager" mgr_class = "ConductorManager" self.rpc_svc = rpc_service.RPCService(host, mgr_module, mgr_class) + self.rpc_svc.manager.dbapi = self.dbapi @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True) @mock.patch.object(oslo_messaging, 'Target', autospec=True) @@ -108,3 +113,75 @@ class TestRPCService(base.TestCase): self.assertFalse(self.rpc_svc._started) self.assertIn("boom", self.rpc_svc._failure) self.assertRaises(SystemExit, self.rpc_svc.wait_for_start) + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + def test_stop_instant(self, mock_sleep, mock_utcnow): + # del_host returns instantly + mock_utcnow.return_value = datetime.datetime(2023, 2, 2, 21, 10, 0) + conductor1 = db_utils.get_test_conductor(hostname='fake_host') + with mock.patch.object(self.dbapi, 'get_online_conductors', + autospec=True) as mock_cond_list: + mock_cond_list.return_value = [conductor1] + self.rpc_svc.stop() + + # single conductor so exit immediately without waiting + mock_sleep.assert_not_called() + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + def test_stop_after_full_reset_interval(self, mock_sleep, mock_utcnow): + # del_host returns instantly + mock_utcnow.return_value = datetime.datetime(2023, 2, 2, 21, 10, 0) + conductor1 = db_utils.get_test_conductor(hostname='fake_host') + conductor2 = db_utils.get_test_conductor(hostname='other_fake_host') + with mock.patch.object(self.dbapi, 'get_online_conductors', + autospec=True) as mock_cond_list: + # multiple conductors, so wait for hash_ring_reset_interval + mock_cond_list.return_value = [conductor1, conductor2] + self.rpc_svc.stop() + + # wait the total CONF.hash_ring_reset_interval 15 seconds + mock_sleep.assert_has_calls([mock.call(15)]) + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + def test_stop_after_remaining_interval(self, mock_sleep, mock_utcnow): + mock_utcnow.return_value = datetime.datetime(2023, 2, 2, 21, 10, 0) + conductor1 = db_utils.get_test_conductor(hostname='fake_host') + conductor2 = db_utils.get_test_conductor(hostname='other_fake_host') + + # del_host returns after 5 seconds + mock_utcnow.side_effect = [ + datetime.datetime(2023, 2, 2, 21, 10, 0), + datetime.datetime(2023, 2, 2, 21, 10, 5), + ] + with mock.patch.object(self.dbapi, 'get_online_conductors', + autospec=True) as mock_cond_list: + # multiple conductors, so wait for hash_ring_reset_interval + mock_cond_list.return_value = [conductor1, conductor2] + self.rpc_svc.stop() + + # wait the remaining 10 seconds + mock_sleep.assert_has_calls([mock.call(10)]) + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + def test_stop_slow(self, mock_sleep, mock_utcnow): + mock_utcnow.return_value = datetime.datetime(2023, 2, 2, 21, 10, 0) + conductor1 = db_utils.get_test_conductor(hostname='fake_host') + conductor2 = db_utils.get_test_conductor(hostname='other_fake_host') + + # del_host returns after 16 seconds + mock_utcnow.side_effect = [ + datetime.datetime(2023, 2, 2, 21, 10, 0), + datetime.datetime(2023, 2, 2, 21, 10, 16), + ] + with mock.patch.object(self.dbapi, 'get_online_conductors', + autospec=True) as mock_cond_list: + # multiple conductors, so wait for hash_ring_reset_interval + mock_cond_list.return_value = [conductor1, conductor2] + self.rpc_svc.stop() + + # no wait required, CONF.hash_ring_reset_interval already exceeded + mock_sleep.assert_not_called() diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index a4c3d57b6..34e805deb 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -1138,12 +1138,12 @@ class DoNodeCleanTestCase(db_base.DbTestCase): class DoNodeCleanAbortTestCase(db_base.DbTestCase): @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) - def _test__do_node_clean_abort(self, step_name, tear_mock): + def _test_do_node_clean_abort(self, clean_step, tear_mock): node = obj_utils.create_test_node( self.context, driver='fake-hardware', - provision_state=states.CLEANFAIL, + provision_state=states.CLEANWAIT, target_provision_state=states.AVAILABLE, - clean_step={'step': 'foo', 'abortable': True}, + clean_step=clean_step, driver_internal_info={ 'agent_url': 'some url', 'agent_secret_token': 'token', @@ -1153,11 +1153,11 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase): 'skip_current_clean_step': True}) with task_manager.acquire(self.context, node.uuid) as task: - cleaning.do_node_clean_abort(task, step_name=step_name) + cleaning.do_node_clean_abort(task) self.assertIsNotNone(task.node.last_error) tear_mock.assert_called_once_with(task.driver.deploy, task) - if step_name: - self.assertIn(step_name, task.node.last_error) + if clean_step: + self.assertIn(clean_step['step'], task.node.last_error) # assert node's clean_step and metadata was cleaned up self.assertEqual({}, task.node.clean_step) self.assertNotIn('clean_step_index', @@ -1173,11 +1173,12 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase): self.assertNotIn('agent_secret_token', task.node.driver_internal_info) - def test__do_node_clean_abort(self): - self._test__do_node_clean_abort(None) + def test_do_node_clean_abort_early(self): + self._test_do_node_clean_abort(None) - def test__do_node_clean_abort_with_step_name(self): - self._test__do_node_clean_abort('foo') + def test_do_node_clean_abort_with_step(self): + self._test_do_node_clean_abort({'step': 'foo', 'interface': 'deploy', + 'abortable': True}) @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) def test__do_node_clean_abort_tear_down_fail(self, tear_mock): diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index ded80718d..6a6f7e08f 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -26,6 +26,7 @@ from unittest import mock import eventlet from futurist import waiters +from ironic_lib import metrics as ironic_metrics from oslo_config import cfg import oslo_messaging as messaging from oslo_utils import uuidutils @@ -2734,7 +2735,8 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin, # Node will be moved to tgt_prov_state after cleaning, not tested here self.assertEqual(states.CLEANFAIL, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) - self.assertIsNone(node.last_error) + self.assertEqual('By request, the clean operation was aborted', + node.last_error) mock_spawn.assert_called_with( self.service, cleaning.do_node_clean_abort, mock.ANY) @@ -4273,7 +4275,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test__filter_out_unsupported_types_all(self): self._start_service() - CONF.set_override('send_sensor_data_types', ['All'], group='conductor') + CONF.set_override('data_types', ['All'], + group='sensor_data') fake_sensors_data = {"t1": {'f1': 'v1'}, "t2": {'f1': 'v1'}} actual_result = ( self.service._filter_out_unsupported_types(fake_sensors_data)) @@ -4282,7 +4285,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test__filter_out_unsupported_types_part(self): self._start_service() - CONF.set_override('send_sensor_data_types', ['t1'], group='conductor') + CONF.set_override('data_types', ['t1'], + group='sensor_data') fake_sensors_data = {"t1": {'f1': 'v1'}, "t2": {'f1': 'v1'}} actual_result = ( self.service._filter_out_unsupported_types(fake_sensors_data)) @@ -4291,7 +4295,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test__filter_out_unsupported_types_non(self): self._start_service() - CONF.set_override('send_sensor_data_types', ['t3'], group='conductor') + CONF.set_override('data_types', ['t3'], + group='sensor_data') fake_sensors_data = {"t1": {'f1': 'v1'}, "t2": {'f1': 'v1'}} actual_result = ( self.service._filter_out_unsupported_types(fake_sensors_data)) @@ -4305,7 +4310,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): for i in range(5): nodes.put_nowait(('fake_uuid-%d' % i, 'fake-hardware', '', None)) self._start_service() - CONF.set_override('send_sensor_data', True, group='conductor') + CONF.set_override('send_sensor_data', True, + group='sensor_data') task = acquire_mock.return_value.__enter__.return_value task.node.maintenance = False @@ -4334,7 +4340,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): nodes.put_nowait(('fake_uuid', 'fake-hardware', '', None)) self._start_service() self.service._shutdown = True - CONF.set_override('send_sensor_data', True, group='conductor') + CONF.set_override('send_sensor_data', True, + group='sensor_data') self.service._sensors_nodes_task(self.context, nodes) acquire_mock.return_value.__enter__.assert_not_called() @@ -4343,7 +4350,8 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): nodes = queue.Queue() nodes.put_nowait(('fake_uuid', 'fake-hardware', '', None)) - CONF.set_override('send_sensor_data', True, group='conductor') + CONF.set_override('send_sensor_data', True, + group='sensor_data') self._start_service() @@ -4361,7 +4369,7 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): nodes = queue.Queue() nodes.put_nowait(('fake_uuid', 'fake-hardware', '', None)) self._start_service() - CONF.set_override('send_sensor_data', True, group='conductor') + CONF.set_override('send_sensor_data', True, group='sensor_data') task = acquire_mock.return_value.__enter__.return_value task.node.maintenance = True @@ -4384,10 +4392,10 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_spawn): self._start_service() - CONF.set_override('send_sensor_data', True, group='conductor') + CONF.set_override('send_sensor_data', True, group='sensor_data') # NOTE(galyna): do not wait for threads to be finished in unittests - CONF.set_override('send_sensor_data_wait_timeout', 0, - group='conductor') + CONF.set_override('wait_timeout', 0, + group='sensor_data') _mapped_to_this_conductor_mock.return_value = True get_nodeinfo_list_mock.return_value = [('fake_uuid', 'fake', None)] self.service._send_sensor_data(self.context) @@ -4395,6 +4403,37 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.service._sensors_nodes_task, self.context, mock.ANY) + @mock.patch.object(queue, 'Queue', autospec=True) + @mock.patch.object(manager.ConductorManager, '_sensors_conductor', + autospec=True) + @mock.patch.object(manager.ConductorManager, '_spawn_worker', + autospec=True) + @mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor', + autospec=True) + @mock.patch.object(dbapi.IMPL, 'get_nodeinfo_list', autospec=True) + def test___send_sensor_data_disabled( + self, get_nodeinfo_list_mock, + _mapped_to_this_conductor_mock, + mock_spawn, mock_sensors_conductor, + mock_queue): + self._start_service() + + CONF.set_override('send_sensor_data', True, group='sensor_data') + CONF.set_override('enable_for_nodes', False, + group='sensor_data') + CONF.set_override('enable_for_conductor', False, + group='sensor_data') + # NOTE(galyna): do not wait for threads to be finished in unittests + CONF.set_override('wait_timeout', 0, + group='sensor_data') + _mapped_to_this_conductor_mock.return_value = True + get_nodeinfo_list_mock.return_value = [('fake_uuid', 'fake', None)] + self.service._send_sensor_data(self.context) + mock_sensors_conductor.assert_not_called() + # NOTE(TheJulia): Can't use the spawn worker since it records other, + # unrelated calls. So, queue works well here. + mock_queue.assert_not_called() + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker', autospec=True) @mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor', @@ -4407,24 +4446,66 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_spawn.reset_mock() number_of_workers = 8 - CONF.set_override('send_sensor_data', True, group='conductor') - CONF.set_override('send_sensor_data_workers', number_of_workers, - group='conductor') + CONF.set_override('send_sensor_data', True, group='sensor_data') + CONF.set_override('workers', number_of_workers, + group='sensor_data') # NOTE(galyna): do not wait for threads to be finished in unittests - CONF.set_override('send_sensor_data_wait_timeout', 0, - group='conductor') + CONF.set_override('wait_timeout', 0, + group='sensor_data') _mapped_to_this_conductor_mock.return_value = True get_nodeinfo_list_mock.return_value = [('fake_uuid', 'fake', None)] * 20 self.service._send_sensor_data(self.context) - self.assertEqual(number_of_workers, + self.assertEqual(number_of_workers + 1, mock_spawn.call_count) # TODO(TheJulia): At some point, we should add a test to validate that # a modified filter to return all nodes actually works, although # the way the sensor tests are written, the list is all mocked. + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker', + autospec=True) + @mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor', + autospec=True) + @mock.patch.object(dbapi.IMPL, 'get_nodeinfo_list', autospec=True) + def test___send_sensor_data_one_worker( + self, get_nodeinfo_list_mock, _mapped_to_this_conductor_mock, + mock_spawn): + self._start_service() + mock_spawn.reset_mock() + + number_of_workers = 1 + CONF.set_override('send_sensor_data', True, group='sensor_data') + CONF.set_override('workers', number_of_workers, + group='sensor_data') + # NOTE(galyna): do not wait for threads to be finished in unittests + CONF.set_override('wait_timeout', 0, + group='sensor_data') + + _mapped_to_this_conductor_mock.return_value = True + get_nodeinfo_list_mock.return_value = [('fake_uuid', 'fake', + None)] * 20 + self.service._send_sensor_data(self.context) + self.assertEqual(number_of_workers, + mock_spawn.call_count) + + @mock.patch.object(messaging.Notifier, 'info', autospec=True) + @mock.patch.object(ironic_metrics.MetricLogger, + 'get_metrics_data', autospec=True) + def test__sensors_conductor(self, mock_get_metrics, mock_notifier): + metric = {'metric': 'data'} + mock_get_metrics.return_value = metric + self._start_service() + self.service._sensors_conductor(self.context) + self.assertEqual(mock_notifier.call_count, 1) + self.assertEqual('ironic.metrics', mock_notifier.call_args.args[2]) + metrics_dict = mock_notifier.call_args.args[3] + self.assertEqual(metrics_dict.get('event_type'), + 'ironic.metrics.update') + self.assertDictEqual(metrics_dict.get('payload'), + metric) + @mgr_utils.mock_record_keepalive class BootDeviceTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index a424e5132..a29da21a7 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -196,7 +196,8 @@ class NodePowerActionTestCase(db_base.DbTestCase): node = obj_utils.create_test_node(self.context, uuid=uuidutils.generate_uuid(), driver='fake-hardware', - power_state=states.POWER_OFF) + power_state=states.POWER_OFF, + last_error='failed before') task = task_manager.TaskManager(self.context, node.uuid) get_power_mock.return_value = states.POWER_OFF @@ -209,6 +210,27 @@ class NodePowerActionTestCase(db_base.DbTestCase): self.assertIsNone(node['target_power_state']) self.assertIsNone(node['last_error']) + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_keep_last_error(self, get_power_mock): + """Test node_power_action to keep last_error for failed states.""" + node = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF, + provision_state=states.CLEANFAIL, + last_error='failed before') + task = task_manager.TaskManager(self.context, node.uuid) + + get_power_mock.return_value = states.POWER_OFF + + conductor_utils.node_power_action(task, states.POWER_ON) + + node.refresh() + get_power_mock.assert_called_once_with(mock.ANY, mock.ANY) + self.assertEqual(states.POWER_ON, node['power_state']) + self.assertIsNone(node['target_power_state']) + self.assertEqual('failed before', node['last_error']) + @mock.patch('ironic.objects.node.NodeSetPowerStateNotification', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) diff --git a/ironic/tests/unit/db/test_api.py b/ironic/tests/unit/db/test_api.py index 6142fdfae..2396b1253 100644 --- a/ironic/tests/unit/db/test_api.py +++ b/ironic/tests/unit/db/test_api.py @@ -226,6 +226,11 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase): for i in range(0, num_nodes): node = utils.create_test_node(version=version, uuid=uuidutils.generate_uuid()) + # Create entries on the tables so we force field upgrades + utils.create_test_node_trait(node_id=node.id, trait='foo', + version='0.0') + utils.create_test_bios_setting(node_id=node.id, version='1.0') + nodes.append(node.uuid) for uuid in nodes: node = self.dbapi.get_node_by_uuid(uuid) @@ -238,10 +243,15 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase): return nodes = self._create_nodes(5) + # Check/migrate 2, 10 remain. + self.assertEqual( + (10, 2), self.dbapi.update_to_latest_versions(self.context, 2)) + # Check/migrate 10, 8 migrated, 8 remain. self.assertEqual( - (5, 2), self.dbapi.update_to_latest_versions(self.context, 2)) + (8, 8), self.dbapi.update_to_latest_versions(self.context, 10)) + # Just make sure it is still 0, 0 in case more things are added. self.assertEqual( - (3, 3), self.dbapi.update_to_latest_versions(self.context, 10)) + (0, 0), self.dbapi.update_to_latest_versions(self.context, 10)) for uuid in nodes: node = self.dbapi.get_node_by_uuid(uuid) self.assertEqual(self.node_ver, node.version) @@ -250,10 +260,19 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase): if self.node_version_same: # can't test if we don't have diff versions of the node return - - nodes = self._create_nodes(5) + vm_count = 5 + nodes = self._create_nodes(vm_count) + # NOTE(TheJulia): Under current testing, 5 node will result in 10 + # records implicitly needing to be migrated. + migrate_count = vm_count * 2 + self.assertEqual( + (migrate_count, migrate_count), + self.dbapi.update_to_latest_versions(self.context, + migrate_count)) self.assertEqual( - (5, 5), self.dbapi.update_to_latest_versions(self.context, 5)) + (0, 0), self.dbapi.update_to_latest_versions(self.context, + migrate_count)) + for uuid in nodes: node = self.dbapi.get_node_by_uuid(uuid) self.assertEqual(self.node_ver, node.version) diff --git a/ironic/tests/unit/db/test_nodes.py b/ironic/tests/unit/db/test_nodes.py index f7e858f9b..e7053d6f5 100644 --- a/ironic/tests/unit/db/test_nodes.py +++ b/ironic/tests/unit/db/test_nodes.py @@ -884,6 +884,53 @@ class DbNodeTestCase(base.DbTestCase): timeutils.normalize_time(result)) self.assertIsNone(res['inspection_started_at']) + @mock.patch.object(timeutils, 'utcnow', autospec=True) + def test_update_node_inspection_finished_at_inspecting(self, mock_utcnow): + mocked_time = datetime.datetime(2000, 1, 1, 0, 0) + mock_utcnow.return_value = mocked_time + node = utils.create_test_node(uuid=uuidutils.generate_uuid(), + inspection_finished_at=mocked_time, + provision_state=states.INSPECTING) + res = self.dbapi.update_node(node.id, + {'provision_state': states.MANAGEABLE}) + result = res['inspection_finished_at'] + self.assertEqual(mocked_time, + timeutils.normalize_time(result)) + self.assertIsNone(res['inspection_started_at']) + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + def test_update_node_inspection_finished_at_inspectwait(self, + mock_utcnow): + mocked_time = datetime.datetime(2000, 1, 1, 0, 0) + mock_utcnow.return_value = mocked_time + node = utils.create_test_node(uuid=uuidutils.generate_uuid(), + inspection_finished_at=mocked_time, + provision_state=states.INSPECTWAIT) + res = self.dbapi.update_node(node.id, + {'provision_state': states.MANAGEABLE}) + result = res['inspection_finished_at'] + self.assertEqual(mocked_time, + timeutils.normalize_time(result)) + self.assertIsNone(res['inspection_started_at']) + + def test_update_node_inspection_started_at_inspecting(self): + mocked_time = datetime.datetime(2000, 1, 1, 0, 0) + node = utils.create_test_node(uuid=uuidutils.generate_uuid(), + inspection_started_at=mocked_time, + provision_state=states.INSPECTING) + res = self.dbapi.update_node(node.id, + {'provision_state': states.INSPECTFAIL}) + self.assertIsNone(res['inspection_started_at']) + + def test_update_node_inspection_started_at_inspectwait(self): + mocked_time = datetime.datetime(2000, 1, 1, 0, 0) + node = utils.create_test_node(uuid=uuidutils.generate_uuid(), + inspection_started_at=mocked_time, + provision_state=states.INSPECTWAIT) + res = self.dbapi.update_node(node.id, + {'provision_state': states.INSPECTFAIL}) + self.assertIsNone(res['inspection_started_at']) + def test_reserve_node(self): node = utils.create_test_node() self.dbapi.set_node_tags(node.id, ['tag1', 'tag2']) diff --git a/ironic/tests/unit/drivers/test_fake_hardware.py b/ironic/tests/unit/drivers/test_fake_hardware.py index 70460a6a4..637f52bf9 100644 --- a/ironic/tests/unit/drivers/test_fake_hardware.py +++ b/ironic/tests/unit/drivers/test_fake_hardware.py @@ -17,6 +17,8 @@ """Test class for Fake driver.""" +import time +from unittest import mock from ironic.common import boot_devices from ironic.common import boot_modes @@ -26,6 +28,7 @@ from ironic.common import indicator_states from ironic.common import states from ironic.conductor import task_manager from ironic.drivers import base as driver_base +from ironic.drivers.modules import fake from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -164,3 +167,29 @@ class FakeHardwareTestCase(db_base.DbTestCase): self.assertEqual({}, self.driver.inspect.get_properties()) self.driver.inspect.validate(self.task) self.driver.inspect.inspect_hardware(self.task) + + def test_parse_sleep_range(self): + self.assertEqual((0, 0), fake.parse_sleep_range('0')) + self.assertEqual((0, 0), fake.parse_sleep_range('')) + self.assertEqual((1, 1), fake.parse_sleep_range('1')) + self.assertEqual((1, 10), fake.parse_sleep_range('1,10')) + self.assertEqual((10, 20), fake.parse_sleep_range('10, 20')) + + @mock.patch.object(time, 'sleep', autospec=True) + def test_sleep_zero(self, mock_sleep): + fake.sleep("0") + mock_sleep.assert_not_called() + + @mock.patch.object(time, 'sleep', autospec=True) + def test_sleep_one(self, mock_sleep): + fake.sleep("1") + mock_sleep.assert_called_once_with(1) + + @mock.patch.object(time, 'sleep', autospec=True) + def test_sleep_range(self, mock_sleep): + for i in range(100): + fake.sleep("1,10") + for call in mock_sleep.call_args_list: + v = call[0][0] + self.assertGreaterEqual(v, 1) + self.assertLessEqual(v, 10) diff --git a/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml new file mode 100644 index 000000000..270278f1b --- /dev/null +++ b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + When aborting cleaning, the ``last_error`` field is no longer initially + empty. It is now populated on the state transition to ``clean failed``. + - | + When cleaning or deployment fails, the ``last_error`` field is no longer + temporary set to ``None`` while the power off action is running. diff --git a/releasenotes/notes/conductor-metric-collector-support-1b8b8c71f9f59da4.yaml b/releasenotes/notes/conductor-metric-collector-support-1b8b8c71f9f59da4.yaml new file mode 100644 index 000000000..dfa3b0f89 --- /dev/null +++ b/releasenotes/notes/conductor-metric-collector-support-1b8b8c71f9f59da4.yaml @@ -0,0 +1,39 @@ +--- +features: + - | + Adds the ability for Ironic to send conductor process metrics + for monitoring. This requires the use of a new ``[metrics]backend`` + option value of ``collector``. This data was previously only available + through the use of statsd. This requires ``ironic-lib`` version ``5.4.0`` + or newer. This capability can be disabled using the + ``[sensor_data]enable_for_conductor`` option if set to False. + - | + Adds a ``[sensor_data]enable_for_nodes`` configuration option + to allow operators to disable sending node metric data via the + message bus notifier. + - | + Adds a new gauge metric ``ConductorManager.PowerSyncNodesCount`` + which tracks the nodes considered for power state synchrnozation. + - Adds a new gauge metric ``ConductorManager.PowerSyncRecoveryNodeCount`` + which represents the number of nodes which are being evaluated for power + state recovery checking. + - Adds a new gauge metric ``ConductorManager.SyncLocalStateNodeCount`` + which represents the number of nodes being tracked locally by the + conductor. +issues: + - Sensor data notifications to the message bus, such as using the + ``[metrics]backend`` configuration option of ``collector`` on a dedicated + API service process or instance, is not presently supported. This + functionality requires a periodic task to trigger the transmission + of metrics messages to the message bus notifier. +deprecations: + - The setting values starting with ``send_sensor`` in the ``[conductor]`` + configuration group have been deprecated and moved to a ``[sensor_data]`` + configuration group. The names have been updated to shorter, operator + friendly names.. +upgrades: + - Settings starting with ``sensor_data`` in the ``[conductor]`` + configuration group have been moved to a ``[sensor_data]`` configuration + group amd have been renamed to have shorter value names. If configuration + values are not updated, the ``oslo.config`` library will emit a warning + in the logs. diff --git a/releasenotes/notes/console-pid-file-6108d2775ef947fe.yaml b/releasenotes/notes/console-pid-file-6108d2775ef947fe.yaml new file mode 100644 index 000000000..427d04da8 --- /dev/null +++ b/releasenotes/notes/console-pid-file-6108d2775ef947fe.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes an issue that when a node has console enabled but pid + file missing, the console could not be disabled as well as be + restarted, which makes the console feature unusable. diff --git a/releasenotes/notes/fakedelay-7eac23ad8881a736.yaml b/releasenotes/notes/fakedelay-7eac23ad8881a736.yaml new file mode 100644 index 000000000..fe02d33ff --- /dev/null +++ b/releasenotes/notes/fakedelay-7eac23ad8881a736.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + There are now configurable random wait times for fake drivers in a new + ironic.conf [fake] section. Each supported driver having one configuration + option controlling the delay. These delays are applied to operations which + typically block in other drivers. This allows more realistic scenarios to + be arranged for performance and functional testing of ironic itself. diff --git a/releasenotes/notes/fix-inspectwait-finished-at-4b817af4bf4c30c2.yaml b/releasenotes/notes/fix-inspectwait-finished-at-4b817af4bf4c30c2.yaml new file mode 100644 index 000000000..167a7f4a5 --- /dev/null +++ b/releasenotes/notes/fix-inspectwait-finished-at-4b817af4bf4c30c2.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes a database API internal check to update the + ``inspection_finished_at`` field upon the completion of inspection. diff --git a/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml b/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml new file mode 100644 index 000000000..824185aab --- /dev/null +++ b/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml @@ -0,0 +1,14 @@ +--- +fixes: + - | + Fixes an issue in the online upgrade logic where database models for + Node Traits and BIOS Settings resulted in an error when performing + the online data migration. This was because these tables were originally + created as extensions of the Nodes database table, and the schema + of the database was slightly different enough to result in an error + if there was data to migrate in these tables upon upgrade, + which would have occured if an early BIOS Setting adopter had + data in the database prior to upgrading to the Yoga release of Ironic. + + The online upgrade parameter now subsitutes an alternate primary key name + name when applicable. diff --git a/releasenotes/notes/ironic-antelope-prelude-0b77964469f56b13.yaml b/releasenotes/notes/ironic-antelope-prelude-0b77964469f56b13.yaml new file mode 100644 index 000000000..98bf9c014 --- /dev/null +++ b/releasenotes/notes/ironic-antelope-prelude-0b77964469f56b13.yaml @@ -0,0 +1,14 @@ +--- +prelude: > + The Ironic team hereby announces the release of OpenStack 2023.1 + (Ironic 23.4.0). This repesents the completion of a six month development + cycle, which primarily focused on internal and scaling improvements. + Those improvements included revamping the database layer to improve + performance and ensure compatability with new versions of SQLAlchemy, + enhancing the ironic-conductor service to export application metrics to + prometheus via the ironic-prometheus-exporter, and the addition of a + new API concept of node sharding to help with scaling of services that + make frequent API calls to Ironic. + + The new Ironic release also comes with a slew of bugfixes for Ironic + services and hardware drivers. We sincerely hope you enjoy it! diff --git a/releasenotes/notes/lockutils-default-logging-8c38b8c0ac71043f.yaml b/releasenotes/notes/lockutils-default-logging-8c38b8c0ac71043f.yaml new file mode 100644 index 000000000..6ef3fd546 --- /dev/null +++ b/releasenotes/notes/lockutils-default-logging-8c38b8c0ac71043f.yaml @@ -0,0 +1,8 @@ +--- +other: + - | + The default logging level for the ``oslo_concurrencty.lockutils`` + module logging has been changed to ``WARNING``. By default, the debug + logging was resulting in lots of noise. Operators wishing to view debug + logging for this module can tuilize the ``[DEFAULT]default_log_levels`` + configuration option. diff --git a/releasenotes/notes/wait_hash_ring_reset-ef8bd548659e9906.yaml b/releasenotes/notes/wait_hash_ring_reset-ef8bd548659e9906.yaml new file mode 100644 index 000000000..cea3e28f3 --- /dev/null +++ b/releasenotes/notes/wait_hash_ring_reset-ef8bd548659e9906.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + When a conductor service is stopped it will now continue to respond to RPC + requests until ``[DEFAULT]hash_ring_reset_interval`` has elapsed, allowing + a hash ring reset to complete on the cluster after conductor is + unregistered. This will improve the reliability of the cluster when scaling + down or rolling out updates. + + This delay only occurs when there is more than one online conductor, + to allow fast restarts on single-node ironic installs (bifrost, + metal3). + diff --git a/requirements.txt b/requirements.txt index 0c73e632e..2f4813baa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ WebOb>=1.7.1 # MIT python-cinderclient!=4.0.0,>=3.3.0 # Apache-2.0 python-glanceclient>=2.8.0 # Apache-2.0 keystoneauth1>=4.2.0 # Apache-2.0 -ironic-lib>=4.6.1 # Apache-2.0 +ironic-lib>=5.4.0 # Apache-2.0 python-swiftclient>=3.2.0 # Apache-2.0 pytz>=2013.6 # MIT stevedore>=1.29.0 # Apache-2.0 |