diff options
Diffstat (limited to 'nova/conductor/manager.py')
-rw-r--r-- | nova/conductor/manager.py | 73 |
1 files changed, 66 insertions, 7 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 3b43644d9a..4b34b8339c 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -21,8 +21,10 @@ import eventlet import functools import sys +from keystoneauth1 import exceptions as ks_exc from oslo_config import cfg from oslo_db import exception as db_exc +from oslo_limit import exception as limit_exceptions from oslo_log import log as logging import oslo_messaging as messaging from oslo_serialization import jsonutils @@ -45,6 +47,7 @@ from nova import context as nova_context from nova import exception from nova.i18n import _ from nova.image import glance +from nova.limit import placement as placement_limits from nova import manager from nova.network import neutron from nova import notifications @@ -232,7 +235,7 @@ class ComputeTaskManager: may involve coordinating activities on multiple compute nodes. """ - target = messaging.Target(namespace='compute_task', version='1.23') + target = messaging.Target(namespace='compute_task', version='1.25') def __init__(self): self.compute_rpcapi = compute_rpcapi.ComputeAPI() @@ -241,11 +244,42 @@ class ComputeTaskManager: self.network_api = neutron.API() self.servicegroup_api = servicegroup.API() self.query_client = query.SchedulerQueryClient() - self.report_client = report.SchedulerReportClient() self.notifier = rpc.get_notifier('compute') # Help us to record host in EventReporter self.host = CONF.host + try: + # Test our placement client during initialization + self.report_client + except (ks_exc.EndpointNotFound, + ks_exc.DiscoveryFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + ks_exc.ConnectFailure) as e: + # Non-fatal, likely transient (although not definitely); + # continue startup but log the warning so that when things + # fail later, it will be clear why we can not do certain + # things. + LOG.warning('Unable to initialize placement client (%s); ' + 'Continuing with startup, but some operations ' + 'will not be possible.', e) + except (ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized) as e: + # This is almost definitely fatal mis-configuration. The + # Unauthorized error might be transient, but it is + # probably reasonable to consider it fatal. + LOG.error('Fatal error initializing placement client; ' + 'config is incorrect or incomplete: %s', e) + raise + except Exception as e: + # Unknown/unexpected errors here are fatal + LOG.error('Fatal error initializing placement client: %s', e) + raise + + @property + def report_client(self): + return report.report_client_singleton() + def reset(self): LOG.info('Reloading compute RPC API') compute_rpcapi.LAST_VERSION = None @@ -1003,6 +1037,12 @@ class ComputeTaskManager: request_spec.requested_resources = res_req request_spec.request_level_params = req_lvl_params + # NOTE(gibi): as PCI devices is tracked in placement we + # need to generate request groups from InstancePCIRequests. + # This will append new RequestGroup objects to the + # request_spec.requested_resources list if needed + request_spec.generate_request_groups_from_pci_requests() + # NOTE(cfriesen): Ensure that we restrict the scheduler to # the cell specified by the instance mapping. self._restrict_request_spec_to_cell( @@ -1020,6 +1060,12 @@ class ComputeTaskManager: scheduler_utils.populate_filter_properties( filter_properties, selection) (host, node) = (selection.service_host, selection.nodename) + LOG.debug( + "Scheduler selected host: %s, node:%s", + host, + node, + instance=instance + ) instance.availability_zone = ( availability_zones.get_host_availability_zone( context, host)) @@ -1106,7 +1152,8 @@ class ComputeTaskManager: injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral=False, host=None, - request_spec=None): + request_spec=None, reimage_boot_volume=False, + target_state=None): # recreate=True means the instance is being evacuated from a failed # host to a new destination host. The 'recreate' variable name is # confusing, so rename it to evacuate here at the top, which is simpler @@ -1202,6 +1249,12 @@ class ComputeTaskManager: request_spec.requested_resources = res_req request_spec.request_level_params = req_lvl_params + # NOTE(gibi): as PCI devices is tracked in placement we + # need to generate request groups from InstancePCIRequests. + # This will append new RequestGroup objects to the + # request_spec.requested_resources list if needed + request_spec.generate_request_groups_from_pci_requests() + try: # if this is a rebuild of instance on the same host with # new image. @@ -1303,7 +1356,9 @@ class ComputeTaskManager: node=node, limits=limits, request_spec=request_spec, - accel_uuids=accel_uuids) + accel_uuids=accel_uuids, + reimage_boot_volume=reimage_boot_volume, + target_state=target_state) def _validate_image_traits_for_rebuild(self, context, instance, image_ref): """Validates that the traits specified in the image can be satisfied @@ -1632,7 +1687,11 @@ class ComputeTaskManager: compute_utils.check_num_instances_quota( context, instance.flavor, 0, 0, orig_num_req=len(build_requests)) - except exception.TooManyInstances as exc: + placement_limits.enforce_num_instances_and_flavor( + context, context.project_id, instance.flavor, + request_specs[0].is_bfv, 0, 0) + except (exception.TooManyInstances, + limit_exceptions.ProjectOverLimit) as exc: with excutils.save_and_reraise_exception(): self._cleanup_build_artifacts(context, exc, instances, build_requests, @@ -2037,8 +2096,8 @@ class ComputeTaskManager: skipped_host(target_ctxt, host, image_ids) continue - fetch_pool.spawn_n(wrap_cache_images, target_ctxt, host, - image_ids) + utils.pass_context(fetch_pool.spawn_n, wrap_cache_images, + target_ctxt, host, image_ids) # Wait until all those things finish fetch_pool.waitall() |