1 files changed, 66 insertions, 7 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index 3b43644d9a..4b34b8339c 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -21,8 +21,10 @@ import eventlet
 import functools
 import sys
 
+from keystoneauth1 import exceptions as ks_exc
 from oslo_config import cfg
 from oslo_db import exception as db_exc
+from oslo_limit import exception as limit_exceptions
 from oslo_log import log as logging
 import oslo_messaging as messaging
 from oslo_serialization import jsonutils
@@ -45,6 +47,7 @@ from nova import context as nova_context
 from nova import exception
 from nova.i18n import _
 from nova.image import glance
+from nova.limit import placement as placement_limits
 from nova import manager
 from nova.network import neutron
 from nova import notifications
@@ -232,7 +235,7 @@ class ComputeTaskManager:
     may involve coordinating activities on multiple compute nodes.
     """
 
-    target = messaging.Target(namespace='compute_task', version='1.23')
+    target = messaging.Target(namespace='compute_task', version='1.25')
 
     def __init__(self):
         self.compute_rpcapi = compute_rpcapi.ComputeAPI()
@@ -241,11 +244,42 @@ class ComputeTaskManager:
         self.network_api = neutron.API()
         self.servicegroup_api = servicegroup.API()
         self.query_client = query.SchedulerQueryClient()
-        self.report_client = report.SchedulerReportClient()
         self.notifier = rpc.get_notifier('compute')
         # Help us to record host in EventReporter
         self.host = CONF.host
 
+        try:
+            # Test our placement client during initialization
+            self.report_client
+        except (ks_exc.EndpointNotFound,
+                ks_exc.DiscoveryFailure,
+                ks_exc.RequestTimeout,
+                ks_exc.GatewayTimeout,
+                ks_exc.ConnectFailure) as e:
+            # Non-fatal, likely transient (although not definitely);
+            # continue startup but log the warning so that when things
+            # fail later, it will be clear why we can not do certain
+            # things.
+            LOG.warning('Unable to initialize placement client (%s); '
+                        'Continuing with startup, but some operations '
+                        'will not be possible.', e)
+        except (ks_exc.MissingAuthPlugin,
+                ks_exc.Unauthorized) as e:
+            # This is almost definitely fatal mis-configuration. The
+            # Unauthorized error might be transient, but it is
+            # probably reasonable to consider it fatal.
+            LOG.error('Fatal error initializing placement client; '
+                      'config is incorrect or incomplete: %s', e)
+            raise
+        except Exception as e:
+            # Unknown/unexpected errors here are fatal
+            LOG.error('Fatal error initializing placement client: %s', e)
+            raise
+
+    @property
+    def report_client(self):
+        return report.report_client_singleton()
+
     def reset(self):
         LOG.info('Reloading compute RPC API')
         compute_rpcapi.LAST_VERSION = None
@@ -1003,6 +1037,12 @@ class ComputeTaskManager:
                     request_spec.requested_resources = res_req
                     request_spec.request_level_params = req_lvl_params
 
+                    # NOTE(gibi): as PCI devices is tracked in placement we
+                    # need to generate request groups from InstancePCIRequests.
+                    # This will append new RequestGroup objects to the
+                    # request_spec.requested_resources list if needed
+                    request_spec.generate_request_groups_from_pci_requests()
+
                     # NOTE(cfriesen): Ensure that we restrict the scheduler to
                     # the cell specified by the instance mapping.
                     self._restrict_request_spec_to_cell(
@@ -1020,6 +1060,12 @@ class ComputeTaskManager:
                     scheduler_utils.populate_filter_properties(
                             filter_properties, selection)
                     (host, node) = (selection.service_host, selection.nodename)
+                    LOG.debug(
+                        "Scheduler selected host: %s, node:%s",
+                        host,
+                        node,
+                        instance=instance
+                    )
                     instance.availability_zone = (
                         availability_zones.get_host_availability_zone(
                             context, host))
@@ -1106,7 +1152,8 @@ class ComputeTaskManager:
                          injected_files, new_pass, orig_sys_metadata,
                          bdms, recreate, on_shared_storage,
                          preserve_ephemeral=False, host=None,
-                         request_spec=None):
+                         request_spec=None, reimage_boot_volume=False,
+                         target_state=None):
         # recreate=True means the instance is being evacuated from a failed
         # host to a new destination host. The 'recreate' variable name is
         # confusing, so rename it to evacuate here at the top, which is simpler
@@ -1202,6 +1249,12 @@ class ComputeTaskManager:
                     request_spec.requested_resources = res_req
                     request_spec.request_level_params = req_lvl_params
 
+                    # NOTE(gibi): as PCI devices is tracked in placement we
+                    # need to generate request groups from InstancePCIRequests.
+                    # This will append new RequestGroup objects to the
+                    # request_spec.requested_resources list if needed
+                    request_spec.generate_request_groups_from_pci_requests()
+
                 try:
                     # if this is a rebuild of instance on the same host with
                     # new image.
@@ -1303,7 +1356,9 @@ class ComputeTaskManager:
                 node=node,
                 limits=limits,
                 request_spec=request_spec,
-                accel_uuids=accel_uuids)
+                accel_uuids=accel_uuids,
+                reimage_boot_volume=reimage_boot_volume,
+                target_state=target_state)
 
     def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
         """Validates that the traits specified in the image can be satisfied
@@ -1632,7 +1687,11 @@ class ComputeTaskManager:
                 compute_utils.check_num_instances_quota(
                     context, instance.flavor, 0, 0,
                     orig_num_req=len(build_requests))
-            except exception.TooManyInstances as exc:
+                placement_limits.enforce_num_instances_and_flavor(
+                    context, context.project_id, instance.flavor,
+                    request_specs[0].is_bfv, 0, 0)
+            except (exception.TooManyInstances,
+                    limit_exceptions.ProjectOverLimit) as exc:
                 with excutils.save_and_reraise_exception():
                     self._cleanup_build_artifacts(context, exc, instances,
                                                   build_requests,
@@ -2037,8 +2096,8 @@ class ComputeTaskManager:
                         skipped_host(target_ctxt, host, image_ids)
                         continue
 
-                    fetch_pool.spawn_n(wrap_cache_images, target_ctxt, host,
-                                       image_ids)
+                    utils.pass_context(fetch_pool.spawn_n, wrap_cache_images,
+                                       target_ctxt, host, image_ids)
 
         # Wait until all those things finish
         fetch_pool.waitall()