summaryrefslogtreecommitdiff
path: root/nova/conductor/manager.py
diff options
context:
space:
mode:
Diffstat (limited to 'nova/conductor/manager.py')
-rw-r--r--nova/conductor/manager.py73
1 files changed, 66 insertions, 7 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index 3b43644d9a..4b34b8339c 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -21,8 +21,10 @@ import eventlet
import functools
import sys
+from keystoneauth1 import exceptions as ks_exc
from oslo_config import cfg
from oslo_db import exception as db_exc
+from oslo_limit import exception as limit_exceptions
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
@@ -45,6 +47,7 @@ from nova import context as nova_context
from nova import exception
from nova.i18n import _
from nova.image import glance
+from nova.limit import placement as placement_limits
from nova import manager
from nova.network import neutron
from nova import notifications
@@ -232,7 +235,7 @@ class ComputeTaskManager:
may involve coordinating activities on multiple compute nodes.
"""
- target = messaging.Target(namespace='compute_task', version='1.23')
+ target = messaging.Target(namespace='compute_task', version='1.25')
def __init__(self):
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
@@ -241,11 +244,42 @@ class ComputeTaskManager:
self.network_api = neutron.API()
self.servicegroup_api = servicegroup.API()
self.query_client = query.SchedulerQueryClient()
- self.report_client = report.SchedulerReportClient()
self.notifier = rpc.get_notifier('compute')
# Help us to record host in EventReporter
self.host = CONF.host
+ try:
+ # Test our placement client during initialization
+ self.report_client
+ except (ks_exc.EndpointNotFound,
+ ks_exc.DiscoveryFailure,
+ ks_exc.RequestTimeout,
+ ks_exc.GatewayTimeout,
+ ks_exc.ConnectFailure) as e:
+ # Non-fatal, likely transient (although not definitely);
+ # continue startup but log the warning so that when things
+ # fail later, it will be clear why we can not do certain
+ # things.
+ LOG.warning('Unable to initialize placement client (%s); '
+ 'Continuing with startup, but some operations '
+ 'will not be possible.', e)
+ except (ks_exc.MissingAuthPlugin,
+ ks_exc.Unauthorized) as e:
+ # This is almost definitely fatal mis-configuration. The
+ # Unauthorized error might be transient, but it is
+ # probably reasonable to consider it fatal.
+ LOG.error('Fatal error initializing placement client; '
+ 'config is incorrect or incomplete: %s', e)
+ raise
+ except Exception as e:
+ # Unknown/unexpected errors here are fatal
+ LOG.error('Fatal error initializing placement client: %s', e)
+ raise
+
+ @property
+ def report_client(self):
+ return report.report_client_singleton()
+
def reset(self):
LOG.info('Reloading compute RPC API')
compute_rpcapi.LAST_VERSION = None
@@ -1003,6 +1037,12 @@ class ComputeTaskManager:
request_spec.requested_resources = res_req
request_spec.request_level_params = req_lvl_params
+ # NOTE(gibi): as PCI devices is tracked in placement we
+ # need to generate request groups from InstancePCIRequests.
+ # This will append new RequestGroup objects to the
+ # request_spec.requested_resources list if needed
+ request_spec.generate_request_groups_from_pci_requests()
+
# NOTE(cfriesen): Ensure that we restrict the scheduler to
# the cell specified by the instance mapping.
self._restrict_request_spec_to_cell(
@@ -1020,6 +1060,12 @@ class ComputeTaskManager:
scheduler_utils.populate_filter_properties(
filter_properties, selection)
(host, node) = (selection.service_host, selection.nodename)
+ LOG.debug(
+ "Scheduler selected host: %s, node:%s",
+ host,
+ node,
+ instance=instance
+ )
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host))
@@ -1106,7 +1152,8 @@ class ComputeTaskManager:
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral=False, host=None,
- request_spec=None):
+ request_spec=None, reimage_boot_volume=False,
+ target_state=None):
# recreate=True means the instance is being evacuated from a failed
# host to a new destination host. The 'recreate' variable name is
# confusing, so rename it to evacuate here at the top, which is simpler
@@ -1202,6 +1249,12 @@ class ComputeTaskManager:
request_spec.requested_resources = res_req
request_spec.request_level_params = req_lvl_params
+ # NOTE(gibi): as PCI devices is tracked in placement we
+ # need to generate request groups from InstancePCIRequests.
+ # This will append new RequestGroup objects to the
+ # request_spec.requested_resources list if needed
+ request_spec.generate_request_groups_from_pci_requests()
+
try:
# if this is a rebuild of instance on the same host with
# new image.
@@ -1303,7 +1356,9 @@ class ComputeTaskManager:
node=node,
limits=limits,
request_spec=request_spec,
- accel_uuids=accel_uuids)
+ accel_uuids=accel_uuids,
+ reimage_boot_volume=reimage_boot_volume,
+ target_state=target_state)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied
@@ -1632,7 +1687,11 @@ class ComputeTaskManager:
compute_utils.check_num_instances_quota(
context, instance.flavor, 0, 0,
orig_num_req=len(build_requests))
- except exception.TooManyInstances as exc:
+ placement_limits.enforce_num_instances_and_flavor(
+ context, context.project_id, instance.flavor,
+ request_specs[0].is_bfv, 0, 0)
+ except (exception.TooManyInstances,
+ limit_exceptions.ProjectOverLimit) as exc:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
@@ -2037,8 +2096,8 @@ class ComputeTaskManager:
skipped_host(target_ctxt, host, image_ids)
continue
- fetch_pool.spawn_n(wrap_cache_images, target_ctxt, host,
- image_ids)
+ utils.pass_context(fetch_pool.spawn_n, wrap_cache_images,
+ target_ctxt, host, image_ids)
# Wait until all those things finish
fetch_pool.waitall()