summaryrefslogtreecommitdiff
path: root/nova/conductor
diff options
context:
space:
mode:
authorSean Mooney <work@seanmooney.info>2020-03-26 23:01:35 +0000
committerBalazs Gibizer <balazs.gibizer@est.tech>2020-09-01 08:41:45 +0000
commit1356ef5b571f80d9fc9a0284034e853cb9c97233 (patch)
treeb976ff43bfb3a769225d271b685110ce22c7da4e /nova/conductor
parent75b5535e3448335cb26972b5ca37afbd24dba01d (diff)
downloadnova-1356ef5b571f80d9fc9a0284034e853cb9c97233.tar.gz
Cyborg evacuate support
This change extends the conductor manager to append the cyborg resource request to the request spec when performing an evacuate. This change passes the ARQs to spawn during rebuild and evacuate. On evacuate the existing ARQs will be deleted and new ARQs will be created and bound, during rebuild the existing ARQs are reused. This change extends the rebuild_instance compute rpcapi function to carry the arq_uuids. This eliminates the need to lookup the uuids associated with the arqs assinged to the instance by quering cyborg. Co-Authored-By: Wenping Song <songwenping@inspur.com> Co-Authored-By: Brin Zhang <zhangbailin@inspur.com> Implements: blueprint cyborg-rebuild-and-evacuate Change-Id: I147bf4d95e6d86ff1f967a8ce37260730f21d236
Diffstat (limited to 'nova/conductor')
-rw-r--r--nova/conductor/manager.py73
1 files changed, 54 insertions, 19 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index 1a3be3e84f..aa79f8324e 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -1147,14 +1147,21 @@ class ComputeTaskManager(base.Base):
# is not forced to be the original host
request_spec.reset_forced_destinations()
- port_res_req = (
+ external_resources = []
+ external_resources += (
self.network_api.get_requested_resource_for_instance(
context, instance.uuid))
- # NOTE(gibi): When cyborg or other module wants to handle
- # similar non-nova resources then here we have to collect
- # all the external resource requests in a single list and
+ extra_specs = request_spec.flavor.extra_specs
+ device_profile = extra_specs.get('accel:device_profile')
+ external_resources.extend(
+ cyborg.get_device_profile_request_groups(
+ context, device_profile)
+ if device_profile else [])
+ # NOTE(gibi): When other modules want to handle similar
+ # non-nova resources then here we have to collect all
+ # the external resource requests in a single list and
# add them to the RequestSpec.
- request_spec.requested_resources = port_res_req
+ request_spec.requested_resources = external_resources
try:
# if this is a rebuild of instance on the same host with
@@ -1219,21 +1226,49 @@ class ComputeTaskManager(base.Base):
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host))
+ try:
+ accel_uuids = self._rebuild_cyborg_arq(
+ context, instance, host, request_spec, evacuate)
+ except exception.AcceleratorRequestBindingFailed as exc:
+ cyclient = cyborg.get_client(context)
+ cyclient.delete_arqs_by_uuid(exc.arqs)
+ LOG.exception('Failed to rebuild. Reason: %s', exc)
+ raise exc
+
+ self.compute_rpcapi.rebuild_instance(
+ context,
+ instance=instance,
+ new_pass=new_pass,
+ injected_files=injected_files,
+ image_ref=image_ref,
+ orig_image_ref=orig_image_ref,
+ orig_sys_metadata=orig_sys_metadata,
+ bdms=bdms,
+ recreate=evacuate,
+ on_shared_storage=on_shared_storage,
+ preserve_ephemeral=preserve_ephemeral,
+ migration=migration,
+ host=host,
+ node=node,
+ limits=limits,
+ request_spec=request_spec,
+ accel_uuids=accel_uuids)
+
+ def _rebuild_cyborg_arq(
+ self, context, instance, host, request_spec, evacuate):
+ dp_name = instance.flavor.extra_specs.get('accel:device_profile')
+ if not dp_name:
+ return []
- self.compute_rpcapi.rebuild_instance(context,
- instance=instance,
- new_pass=new_pass,
- injected_files=injected_files,
- image_ref=image_ref,
- orig_image_ref=orig_image_ref,
- orig_sys_metadata=orig_sys_metadata,
- bdms=bdms,
- recreate=evacuate,
- on_shared_storage=on_shared_storage,
- preserve_ephemeral=preserve_ephemeral,
- migration=migration,
- host=host, node=node, limits=limits,
- request_spec=request_spec)
+ cyclient = cyborg.get_client(context)
+ if not evacuate:
+ return cyclient.get_arq_uuids_for_instance(instance)
+
+ cyclient.delete_arqs_for_instance(instance.uuid)
+ resource_provider_mapping = request_spec.get_request_group_mapping()
+ return self._create_and_bind_arqs(
+ context, instance.uuid, instance.flavor.extra_specs,
+ host, resource_provider_mapping)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied